diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index 35f0ddbe23..5ab058ef82 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -761,6 +761,7 @@ static void WriteRegister() break; case 131: ppc_state.spr[SPR_MMCR0] = re32hex(bufptr); + PowerPC::MMCRUpdated(ppc_state); break; case 132: ppc_state.spr[SPR_PMC1] = re32hex(bufptr); @@ -773,6 +774,7 @@ static void WriteRegister() break; case 135: ppc_state.spr[SPR_MMCR1] = re32hex(bufptr); + PowerPC::MMCRUpdated(ppc_state); break; case 136: ppc_state.spr[SPR_PMC3] = re32hex(bufptr); diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index 1acf6632f2..62a1743b45 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -930,6 +930,7 @@ enum CPUEmuFeatureFlags : u32 { FEATURE_FLAG_MSR_DR = 1 << 0, FEATURE_FLAG_MSR_IR = 1 << 1, + FEATURE_FLAG_PERFMON = 1 << 2, }; constexpr s32 SignExt16(s16 x) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index f4afeb6b96..8e8025cec9 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -491,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst) } break; + case SPR_MMCR0: + case SPR_MMCR1: + MMCRUpdated(ppc_state); + break; + case SPR_THRM1: case SPR_THRM2: case SPR_THRM3: diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 1b97ae1851..a6881c1172 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); @@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Reset(js.op->regsOut); fpr.Reset(js.op->GetFregsOut()); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { if (js.isLastInstruction) { @@ -445,7 +445,6 @@ bool Jit64::Cleanup() did_something = true; } - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) { ABI_PushRegistersAndAdjustStack({}, 0); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4f973f5c06..9474805972 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { // also flush the program counter ARM64Reg WA = gpr.GetReg(); @@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) fpr.ResetRegisters(js.op->GetFregsOut()); gpr.ResetCRRegisters(js.op->crOut); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { if (js.isLastInstruction) { @@ -276,7 +276,6 @@ void JitArm64::Cleanup() SetJumpTarget(exit); } - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) { ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 8c327c202e..feb872ee7b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -130,8 +130,8 @@ class JitBaseBlockCache { public: // The size of the fast map is determined like this: - // ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (2 feature flag bits) - static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000; + // ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits) + static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1; diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index f093863492..04d15b971e 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func) func->flags = flags; } +static bool IsMtspr(UGeckoInstruction inst) +{ + return inst.OPCD == 31 && inst.SUBOP10 == 467; +} + +static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst) +{ + const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + return index == SPR_MMCR0 || index == SPR_MMCR1; +} + +static bool InstructionCanEndBlock(const CodeOp& op) +{ + return (op.opinfo->flags & FL_ENDBLOCK) && + (!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst)); +} + bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const { const GekkoOPInfo* a_info = a.opinfo; @@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const // [1] https://bugs.dolphin-emu.org/issues/5864#note-7 if (a.canCauseException || b.canCauseException) return false; - if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + if (a.canEndBlock || b.canEndBlock) return false; - if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + return false; + if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE)) return false; if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA))) return false; @@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0; code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0; - code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0; + code->canEndBlock = InstructionCanEndBlock(*code); code->canCauseException = first_fpu_instruction || (opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 || @@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, { // Just pick the next instruction address += 4; - if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early + if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early { found_exit = true; break; diff --git a/Source/Core/Core/PowerPC/PPCTables.cpp b/Source/Core/Core/PowerPC/PPCTables.cpp index 9d70fb662b..8eeb5da076 100644 --- a/Source/Core/Core/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/PowerPC/PPCTables.cpp @@ -374,7 +374,7 @@ constexpr std::array s_table31{{ {210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION}, {242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION}, {339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION}, - {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION}, + {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION}, {371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION}, {512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA}, {595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION}, diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index f83df655e5..b994b00f6e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -137,7 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p) } RoundingModeUpdated(m_ppc_state); - MSRUpdated(m_ppc_state); + RecalculateAllFeatureFlags(m_ppc_state); auto& mmu = m_system.GetMMU(); mmu.IBATUpdated(); @@ -209,7 +209,7 @@ void PowerPCManager::ResetRegisters() SystemTimers::DecrementerSet(); RoundingModeUpdated(m_ppc_state); - MSRUpdated(m_ppc_state); + RecalculateAllFeatureFlags(m_ppc_state); } void PowerPCManager::InitializeCPUCore(CPUCore cpu_core) @@ -710,7 +710,27 @@ void MSRUpdated(PowerPCState& ppc_state) static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); - ppc_state.feature_flags = static_cast((ppc_state.msr.Hex >> 4) & 0x3); + ppc_state.feature_flags = static_cast( + (ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3)); +} + +void MMCRUpdated(PowerPCState& ppc_state) +{ + const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1]; + ppc_state.feature_flags = static_cast( + (ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0)); +} + +void RecalculateAllFeatureFlags(PowerPCState& ppc_state) +{ + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + + const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1]; + ppc_state.feature_flags = static_cast(((ppc_state.msr.Hex >> 4) & 0x3) | + (perfmon ? FEATURE_FLAG_PERFMON : 0)); } void CheckExceptionsFromJIT(PowerPCManager& power_pc) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 8a1701bb10..afcc0ac8bf 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -349,5 +349,7 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc); void RoundingModeUpdated(PowerPCState& ppc_state); void MSRUpdated(PowerPCState& ppc_state); +void MMCRUpdated(PowerPCState& ppc_state); +void RecalculateAllFeatureFlags(PowerPCState& ppc_state); } // namespace PowerPC