From 0659827485afd86d15673b53bc55c36e52fabca8 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 24 Dec 2022 12:59:02 +0100 Subject: [PATCH 1/2] Jit64: Check for breakpoint without disabling block link Small optimization. By not calling WriteExit, the block linking system never finds out about the exit we're doing, saving us from having to disable block linking. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 13 ++++++------- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 2 ++ Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 4b94c5c076..3bd14acba6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -799,11 +799,10 @@ void Jit64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) if (m_enable_debugging) { - // We can link blocks as long as we are not single stepping and there are no breakpoints here + // We can link blocks as long as we are not single stepping EnableBlockLink(); EnableOptimization(); - // Comment out the following to disable breakpoints (speed-up) if (!jo.profile_blocks) { if (CPU::IsStepping()) @@ -1099,10 +1098,6 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (m_enable_debugging && breakpoints.IsAddressBreakPoint(op.address) && !CPU::IsStepping()) { - // Turn off block linking if there are breakpoints so that the Step Over command does not - // link this block. - jo.enableBlocklink = false; - gpr.Flush(); fpr.Flush(); @@ -1114,7 +1109,11 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) TEST(32, MatR(RSCRATCH), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - WriteExit(op.address); + Cleanup(); + MOV(32, PPCSTATE(npc), Imm32(op.address)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); + JMP(asm_routines.dispatcher_exit, true); + SetJumpTarget(noBreakpoint); } diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index a162caebe5..3dc98a317e 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -205,8 +205,10 @@ void Jit64AsmRoutineManager::Generate() J_CC(CC_Z, outerLoop); // Landing pad for drec space + dispatcher_exit = GetCodePtr(); if (enable_debugging) SetJumpTarget(dbg_exit); + ResetStack(*this); if (m_stack_top) { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index d975408ed3..4fd58bc897 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -19,6 +19,7 @@ struct CommonAsmRoutinesBase const u8* dispatcher; const u8* dispatcher_no_timing_check; const u8* dispatcher_no_check; + const u8* dispatcher_exit; const u8* do_timing; From c744ff4934fb28da1a0a0b545c903b4e4e336faa Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 24 Dec 2022 13:00:45 +0100 Subject: [PATCH 2/2] JitArm64: Implement breakpoints Plus two miscellaneous debugger features that I found along the way when reading Jit64's code for comparison: bJITNoBlockLinking and tracing. Fixes https://bugs.dolphin-emu.org/issues/13127. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 101 +++++++++++++++++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 + Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 22 +++- 3 files changed, 117 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 79fde63a93..e34a5583a6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -17,12 +17,14 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/HLE/HLE.h" +#include "Core/HW/CPU.h" #include "Core/HW/GPFifo.h" #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PatchEngine.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/Profiler.h" #include "Core/System.h" @@ -57,9 +59,10 @@ void JitArm64::Init() auto& memory = system.GetMemory(); jo.fastmem_arena = m_fastmem_enabled && memory.InitFastmemArena(); - jo.enableBlocklink = true; jo.optimizeGatherPipe = true; UpdateMemoryAndExceptionOptions(); + SetBlockLinkingEnabled(true); + SetOptimizationEnabled(true); gpr.Init(this); fpr.Init(this); blocks.Init(); @@ -67,9 +70,6 @@ void JitArm64::Init() code_block.m_stats = &js.st; code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; - analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); - analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE); - analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW); m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; m_cleanup_after_stackfault = false; @@ -80,6 +80,27 @@ void JitArm64::Init() ResetFreeMemoryRanges(); } +void JitArm64::SetBlockLinkingEnabled(bool enabled) +{ + jo.enableBlocklink = enabled && !SConfig::GetInstance().bJITNoBlockLinking; +} + +void JitArm64::SetOptimizationEnabled(bool enabled) +{ + if (enabled) + { + analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); + analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE); + analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW); + } + else + { + analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); + analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE); + analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW); + } +} + bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) { // Ifdef this since the exception handler runs on a separate thread on macOS (ARM) @@ -661,6 +682,31 @@ void JitArm64::SingleStep() pExecAddr(); } +void JitArm64::Trace() +{ + std::string regs; + std::string fregs; + +#ifdef JIT_LOG_GPR + for (size_t i = 0; i < std::size(PowerPC::ppcState.gpr); i++) + { + regs += fmt::format("r{:02d}: {:08x} ", i, PowerPC::ppcState.gpr[i]); + } +#endif + +#ifdef JIT_LOG_FPR + for (size_t i = 0; i < std::size(PowerPC::ppcState.ps); i++) + { + fregs += fmt::format("f{:02d}: {:016x} ", i, PowerPC::ppcState.ps[i].PS0AsU64()); + } +#endif + + DEBUG_LOG_FMT(DYNA_REC, + "JitArm64 PC: {:08x} SRR0: {:08x} SRR1: {:08x} FPSCR: {:08x} " + "MSR: {:08x} LR: {:08x} {} {}", + PC, SRR0, SRR1, FPSCR.Hex, MSR.Hex, PowerPC::ppcState.spr[8], regs, fregs); +} + void JitArm64::Jit(u32 em_address) { Jit(em_address, true); @@ -706,8 +752,22 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) if (m_enable_debugging) { - // Comment out the following to disable breakpoints (speed-up) - block_size = 1; + // We can link blocks as long as we are not single stepping + SetBlockLinkingEnabled(true); + SetOptimizationEnabled(true); + + if (!jo.profile_blocks) + { + if (CPU::IsStepping()) + { + block_size = 1; + + // Do not link this block to other blocks while single stepping + SetBlockLinkingEnabled(false); + SetOptimizationEnabled(false); + } + Trace(); + } } // Analyze the block, collect all instructions it is made of (including inlining, @@ -1006,11 +1066,38 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.firstFPInstructionFound = true; } - if (bJITRegisterCacheOff) + if (m_enable_debugging && PowerPC::breakpoints.IsAddressBreakPoint(op.address) && + !CPU::IsStepping()) { + FlushCarry(); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + + static_assert(PPCSTATE_OFF(pc) <= 252); + static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc)); + + MOVI2R(DISPATCHER_PC, op.address); + STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + MOVP2R(ARM64Reg::X0, &PowerPC::CheckBreakPoints); + BLR(ARM64Reg::X0); + + LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, + MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr())); + FixupBranch no_breakpoint = CBZ(ARM64Reg::W0); + + Cleanup(); + EndTimeProfile(js.curBlock); + DoDownCount(); + B(dispatcher_exit); + + SetJumpTarget(no_breakpoint); + } + + if (bJITRegisterCacheOff) + { FlushCarry(); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); } CompileInstruction(op); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index ae87d815c8..00f761ace9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -190,6 +190,9 @@ protected: const u8* slowmem_code; }; + void SetBlockLinkingEnabled(bool enabled); + void SetOptimizationEnabled(bool enabled); + void CompileInstruction(PPCAnalyst::CodeOp& op); bool HandleFunctionHooking(u32 address); @@ -276,6 +279,8 @@ protected: bool DoJit(u32 em_address, JitBlock* b, u32 nextPC); + void Trace(); + // Finds a free memory region and sets the near and far code emitters to point at that region. // Returns false if no free memory region can be found for either of the two. bool SetEmitterStateToFreeCodeRegion(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index dde07de9a4..7d4a25f7f8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -8,10 +8,12 @@ #include "Common/Arm64Emitter.h" #include "Common/BitUtils.h" #include "Common/CommonTypes.h" +#include "Common/Config/Config.h" #include "Common/FloatUtils.h" #include "Common/JitRegister.h" #include "Common/MathUtil.h" +#include "Core/Config/MainSettings.h" #include "Core/CoreTiming.h" #include "Core/HW/CPU.h" #include "Core/HW/Memmap.h" @@ -28,6 +30,8 @@ void JitArm64::GenerateAsm() { const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes; + const bool enable_debugging = Config::Get(Config::MAIN_ENABLE_DEBUGGING); + // This value is all of the callee saved registers that we are required to save. // According to the AACPS64 we need to save R19 ~ R30 and Q8 ~ Q15. const u32 ALL_CALLEE_SAVED = 0x7FF80000; @@ -85,6 +89,15 @@ void JitArm64::GenerateAsm() FixupBranch bail = B(CC_LE); dispatcher_no_timing_check = GetCodePtr(); + + FixupBranch debug_exit; + if (enable_debugging) + { + LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, + MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr())); + debug_exit = CBNZ(ARM64Reg::W0); + } + dispatcher_no_check = GetCodePtr(); bool assembly_dispatcher = true; @@ -174,9 +187,7 @@ void JitArm64::GenerateAsm() // Check the state pointer to see if we are exiting // Gets checked on at the end of every slice LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr())); - - CMP(ARM64Reg::W0, 0); - FixupBranch Exit = B(CC_NEQ); + FixupBranch exit = CBNZ(ARM64Reg::W0); SetJumpTarget(to_start_of_timing_slice); MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance); @@ -188,7 +199,10 @@ void JitArm64::GenerateAsm() // We can safely assume that downcount >= 1 B(dispatcher_no_check); - SetJumpTarget(Exit); + dispatcher_exit = GetCodePtr(); + SetJumpTarget(exit); + if (enable_debugging) + SetJumpTarget(debug_exit); // Reset the stack pointer, as the BLR optimization have touched it. LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,