diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/NativeLibrary.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/NativeLibrary.java index a3d02e31ee..8abec6927b 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/NativeLibrary.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/NativeLibrary.java @@ -385,16 +385,9 @@ public final class NativeLibrary public static native boolean IsRunningAndUnpaused(); /** - * Enables or disables CPU block profiling - * - * @param enable + * Writes out the JitBlock Cache log dump */ - public static native void SetProfiling(boolean enable); - - /** - * Writes out the block profile results - */ - public static native void WriteProfileResults(); + public static native void WriteJitBlockLogDump(); /** * Native EGL functions not exposed by Java bindings diff --git a/Source/Android/jni/MainAndroid.cpp b/Source/Android/jni/MainAndroid.cpp index e07b1bd275..389bb815fc 100644 --- a/Source/Android/jni/MainAndroid.cpp +++ b/Source/Android/jni/MainAndroid.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include "Common/Event.h" #include "Common/FileUtil.h" #include "Common/Flag.h" +#include "Common/IOFile.h" #include "Common/IniFile.h" #include "Common/Logging/LogManager.h" #include "Common/MsgHandler.h" @@ -42,7 +44,6 @@ #include "Core/Host.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/Profiler.h" #include "Core/State.h" #include "Core/System.h" @@ -404,26 +405,34 @@ JNIEXPORT jint JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_GetMaxLogLev return static_cast(Common::Log::MAX_LOGLEVEL); } -JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_SetProfiling(JNIEnv*, jclass, - jboolean enable) +JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_WriteJitBlockLogDump( + JNIEnv* env, jclass native_library_class) { HostThreadLock guard; auto& system = Core::System::GetInstance(); auto& jit_interface = system.GetJitInterface(); - const Core::CPUThreadGuard cpu_guard(system); - jit_interface.ClearCache(cpu_guard); - jit_interface.SetProfilingState(enable ? JitInterface::ProfilingState::Enabled : - JitInterface::ProfilingState::Disabled); -} - -JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_WriteProfileResults(JNIEnv*, - jclass) -{ - HostThreadLock guard; - std::string filename = File::GetUserPath(D_DUMP_IDX) + "Debug/profiler.txt"; - File::CreateFullPath(filename); - auto& jit_interface = Core::System::GetInstance().GetJitInterface(); - jit_interface.WriteProfileResults(filename); + if (jit_interface.GetCore() == nullptr) + { + env->CallStaticVoidMethod(native_library_class, IDCache::GetDisplayToastMsg(), + ToJString(env, Common::GetStringT("JIT is not active")), + static_cast(false)); + return; + } + const std::string filename = fmt::format("{}{}.txt", File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX), + SConfig::GetInstance().GetGameID()); + File::IOFile f(filename, "w"); + if (!f) + { + env->CallStaticVoidMethod( + native_library_class, IDCache::GetDisplayToastMsg(), + ToJString(env, Common::FmtFormatT("Failed to open \"{0}\" for writing.", filename)), + static_cast(false)); + return; + } + jit_interface.JitBlockLogDump(Core::CPUThreadGuard{system}, f.GetHandle()); + env->CallStaticVoidMethod(native_library_class, IDCache::GetDisplayToastMsg(), + ToJString(env, Common::FmtFormatT("Wrote to \"{0}\".", filename)), + static_cast(false)); } // Surface Handling diff --git a/Source/Core/Common/CommonPaths.h b/Source/Core/Common/CommonPaths.h index 1dce5c47f5..bd0f946991 100644 --- a/Source/Core/Common/CommonPaths.h +++ b/Source/Core/Common/CommonPaths.h @@ -77,6 +77,7 @@ #define DUMP_SSL_DIR "SSL" #define DUMP_DEBUG_DIR "Debug" #define DUMP_DEBUG_BRANCHWATCH_DIR "BranchWatch" +#define DUMP_DEBUG_JITBLOCKS_DIR "JitBlocks" #define LOGS_DIR "Logs" #define MAIL_LOGS_DIR "Mail" #define SHADERS_DIR "Shaders" diff --git a/Source/Core/Common/FileUtil.cpp b/Source/Core/Common/FileUtil.cpp index 20c57c2b5a..a93db638df 100644 --- a/Source/Core/Common/FileUtil.cpp +++ b/Source/Core/Common/FileUtil.cpp @@ -859,6 +859,8 @@ static void RebuildUserDirectories(unsigned int dir_index) s_user_paths[D_DUMPDEBUG_IDX] = s_user_paths[D_DUMP_IDX] + DUMP_DEBUG_DIR DIR_SEP; s_user_paths[D_DUMPDEBUG_BRANCHWATCH_IDX] = s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_BRANCHWATCH_DIR DIR_SEP; + s_user_paths[D_DUMPDEBUG_JITBLOCKS_IDX] = + s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_JITBLOCKS_DIR DIR_SEP; s_user_paths[D_LOGS_IDX] = s_user_paths[D_USER_IDX] + LOGS_DIR DIR_SEP; s_user_paths[D_MAILLOGS_IDX] = s_user_paths[D_LOGS_IDX] + MAIL_LOGS_DIR DIR_SEP; s_user_paths[D_THEMES_IDX] = s_user_paths[D_USER_IDX] + THEMES_DIR DIR_SEP; @@ -938,6 +940,8 @@ static void RebuildUserDirectories(unsigned int dir_index) s_user_paths[D_DUMPDEBUG_IDX] = s_user_paths[D_DUMP_IDX] + DUMP_DEBUG_DIR DIR_SEP; s_user_paths[D_DUMPDEBUG_BRANCHWATCH_IDX] = s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_BRANCHWATCH_DIR DIR_SEP; + s_user_paths[D_DUMPDEBUG_JITBLOCKS_IDX] = + s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_JITBLOCKS_DIR DIR_SEP; s_user_paths[F_MEM1DUMP_IDX] = s_user_paths[D_DUMP_IDX] + MEM1_DUMP; s_user_paths[F_MEM2DUMP_IDX] = s_user_paths[D_DUMP_IDX] + MEM2_DUMP; s_user_paths[F_ARAMDUMP_IDX] = s_user_paths[D_DUMP_IDX] + ARAM_DUMP; diff --git a/Source/Core/Common/FileUtil.h b/Source/Core/Common/FileUtil.h index 975ab55256..7d2d5e737a 100644 --- a/Source/Core/Common/FileUtil.h +++ b/Source/Core/Common/FileUtil.h @@ -54,6 +54,7 @@ enum D_DUMPSSL_IDX, D_DUMPDEBUG_IDX, D_DUMPDEBUG_BRANCHWATCH_IDX, + D_DUMPDEBUG_JITBLOCKS_IDX, D_LOAD_IDX, D_LOGS_IDX, D_MAILLOGS_IDX, diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index fb2c0c18fa..9d477b50a5 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -523,7 +523,6 @@ add_library(core PowerPC/PPCSymbolDB.h PowerPC/PPCTables.cpp PowerPC/PPCTables.h - PowerPC/Profiler.h PowerPC/SignatureDB/CSVSignatureDB.cpp PowerPC/SignatureDB/CSVSignatureDB.h PowerPC/SignatureDB/DSYSignatureDB.cpp diff --git a/Source/Core/Core/Config/MainSettings.cpp b/Source/Core/Core/Config/MainSettings.cpp index 432abbeb24..0add7e42bf 100644 --- a/Source/Core/Core/Config/MainSettings.cpp +++ b/Source/Core/Core/Config/MainSettings.cpp @@ -509,6 +509,8 @@ const Info MAIN_DEBUG_JIT_SYSTEM_REGISTERS_OFF{ const Info MAIN_DEBUG_JIT_BRANCH_OFF{{System::Main, "Debug", "JitBranchOff"}, false}; const Info MAIN_DEBUG_JIT_REGISTER_CACHE_OFF{{System::Main, "Debug", "JitRegisterCacheOff"}, false}; +const Info MAIN_DEBUG_JIT_ENABLE_PROFILING{{System::Main, "Debug", "JitEnableProfiling"}, + false}; // Main.BluetoothPassthrough diff --git a/Source/Core/Core/Config/MainSettings.h b/Source/Core/Core/Config/MainSettings.h index 5c028d9b36..d7e78bf6e9 100644 --- a/Source/Core/Core/Config/MainSettings.h +++ b/Source/Core/Core/Config/MainSettings.h @@ -335,6 +335,7 @@ extern const Info MAIN_DEBUG_JIT_PAIRED_OFF; extern const Info MAIN_DEBUG_JIT_SYSTEM_REGISTERS_OFF; extern const Info MAIN_DEBUG_JIT_BRANCH_OFF; extern const Info MAIN_DEBUG_JIT_REGISTER_CACHE_OFF; +extern const Info MAIN_DEBUG_JIT_ENABLE_PROFILING; // Main.BluetoothPassthrough diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index b9d78514d5..fd7d175b76 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -316,7 +316,7 @@ void CachedInterpreter::Jit(u32 address) js.numFloatingPointInst = 0; js.curBlock = b; - b->normalEntry = GetCodePtr(); + b->normalEntry = b->near_begin = GetCodePtr(); for (u32 i = 0; i < code_block.m_num_instructions; i++) { @@ -378,6 +378,10 @@ void CachedInterpreter::Jit(u32 address) } m_code.emplace_back(); + b->near_end = GetCodePtr(); + b->far_begin = nullptr; + b->far_end = nullptr; + b->codeSize = static_cast(GetCodePtr() - b->normalEntry); b->originalSize = code_block.m_num_instructions; diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index 62a1743b45..86c6dd8632 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -931,6 +931,7 @@ enum CPUEmuFeatureFlags : u32 FEATURE_FLAG_MSR_DR = 1 << 0, FEATURE_FLAG_MSR_IR = 1 << 1, FEATURE_FLAG_PERFMON = 1 << 2, + FEATURE_FLAG_END_OF_ENUMERATION, }; constexpr s32 SignExt16(s16 x) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index ce1dc906d3..826a7ac8d6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -43,7 +43,6 @@ #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/Profiler.h" #include "Core/System.h" using namespace Gen; @@ -454,20 +453,11 @@ bool Jit64::Cleanup() did_something = true; } - if (jo.profile_blocks) + if (IsProfilingEnabled()) { ABI_PushRegistersAndAdjustStack({}, 0); - // get end tic - MOV(64, R(ABI_PARAM1), ImmPtr(&js.curBlock->profile_data.ticStop)); - ABI_CallFunction(QueryPerformanceCounter); - // tic counter += (end tic - start tic) - MOV(64, R(RSCRATCH2), ImmPtr(&js.curBlock->profile_data)); - MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStop))); - SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStart))); - ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter))); - ADD(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, downcountCounter)), - Imm32(js.downcountAmount)); - MOV(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)), R(RSCRATCH)); + ABI_CallFunctionPC(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(), + js.downcountAmount); ABI_PopRegistersAndAdjustStack({}, 0); did_something = true; } @@ -773,7 +763,7 @@ void Jit64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) EnableBlockLink(); EnableOptimization(); - if (!jo.profile_blocks) + if (!IsProfilingEnabled()) { if (m_system.GetCPU().IsStepping()) { @@ -899,15 +889,9 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) } // Conditionally add profiling code. - if (jo.profile_blocks) - { - // get start tic - MOV(64, R(ABI_PARAM1), ImmPtr(&b->profile_data.ticStart)); - int offset = static_cast(offsetof(JitBlock::ProfileData, runCount)) - - static_cast(offsetof(JitBlock::ProfileData, ticStart)); - ADD(64, MDisp(ABI_PARAM1, offset), Imm8(1)); - ABI_CallFunction(QueryPerformanceCounter); - } + if (IsProfilingEnabled()) + ABI_CallFunctionP(&JitBlock::ProfileData::BeginProfiling, b->profile_data.get()); + #if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK) // should help logged stack-traces become more accurate MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index b8fada1dcb..8e08d2b21d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -26,7 +26,6 @@ #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/Profiler.h" #include "Core/System.h" using namespace Arm64Gen; @@ -408,7 +407,11 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return ARM64Reg exit_address_after_return_reg) { Cleanup(); - EndTimeProfile(js.curBlock); + if (IsProfilingEnabled()) + { + ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(), + js.downcountAmount); + } DoDownCount(); LK &= m_enable_blr_optimization; @@ -509,7 +512,11 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte MOV(DISPATCHER_PC, dest); Cleanup(); - EndTimeProfile(js.curBlock); + if (IsProfilingEnabled()) + { + ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(), + js.downcountAmount); + } DoDownCount(); LK &= m_enable_blr_optimization; @@ -672,7 +679,11 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest) MOV(DISPATCHER_PC, dest); Cleanup(); - EndTimeProfile(js.curBlock); + if (IsProfilingEnabled()) + { + ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(), + js.downcountAmount); + } // Check if {PPC_PC, feature_flags} matches the current state, then RET to ARM_PC. LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16); @@ -736,7 +747,11 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always if (!always_exception) SetJumpTarget(no_exceptions); - EndTimeProfile(js.curBlock); + if (IsProfilingEnabled()) + { + ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(), + js.downcountAmount); + } DoDownCount(); B(dispatcher); @@ -804,44 +819,6 @@ void JitArm64::DumpCode(const u8* start, const u8* end) WARN_LOG_FMT(DYNA_REC, "Code dump from {} to {}:\n{}", fmt::ptr(start), fmt::ptr(end), output); } -void JitArm64::BeginTimeProfile(JitBlock* b) -{ - MOVP2R(ARM64Reg::X0, &b->profile_data); - LDR(IndexType::Unsigned, ARM64Reg::X1, ARM64Reg::X0, offsetof(JitBlock::ProfileData, runCount)); - ADD(ARM64Reg::X1, ARM64Reg::X1, 1); - - // Fetch the current counter register - CNTVCT(ARM64Reg::X2); - - // stores runCount and ticStart - STP(IndexType::Signed, ARM64Reg::X1, ARM64Reg::X2, ARM64Reg::X0, - offsetof(JitBlock::ProfileData, runCount)); -} - -void JitArm64::EndTimeProfile(JitBlock* b) -{ - if (!jo.profile_blocks) - return; - - // Fetch the current counter register - CNTVCT(ARM64Reg::X1); - - MOVP2R(ARM64Reg::X0, &b->profile_data); - - LDR(IndexType::Unsigned, ARM64Reg::X2, ARM64Reg::X0, offsetof(JitBlock::ProfileData, ticStart)); - SUB(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::X2); - - // loads ticCounter and downcountCounter - LDP(IndexType::Signed, ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X0, - offsetof(JitBlock::ProfileData, ticCounter)); - ADD(ARM64Reg::X2, ARM64Reg::X2, ARM64Reg::X1); - ADDI2R(ARM64Reg::X3, ARM64Reg::X3, js.downcountAmount, ARM64Reg::X1); - - // stores ticCounter and downcountCounter - STP(IndexType::Signed, ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X0, - offsetof(JitBlock::ProfileData, ticCounter)); -} - void JitArm64::Run() { ProtectStack(); @@ -933,7 +910,7 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) SetBlockLinkingEnabled(true); SetOptimizationEnabled(true); - if (!jo.profile_blocks) + if (!IsProfilingEnabled()) { if (cpu.IsStepping()) { @@ -1052,11 +1029,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) b->normalEntry = GetWritableCodePtr(); // Conditionally add profiling code. - if (jo.profile_blocks) - { - // get start tic - BeginTimeProfile(b); - } + if (IsProfilingEnabled()) + ABI_CallFunction(&JitBlock::ProfileData::BeginProfiling, b->profile_data.get()); if (code_block.m_gqr_used.Count() == 1 && js.pairedQuantizeAddresses.find(js.blockStart) == js.pairedQuantizeAddresses.end()) @@ -1246,7 +1220,11 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) FixupBranch no_breakpoint = CBZ(ARM64Reg::W0); Cleanup(); - EndTimeProfile(js.curBlock); + if (IsProfilingEnabled()) + { + ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(), + js.downcountAmount); + } DoDownCount(); B(dispatcher_exit); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6d0ded667a..a572b2e38f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -307,10 +307,6 @@ protected: void GenerateQuantizedLoads(); void GenerateQuantizedStores(); - // Profiling - void BeginTimeProfile(JitBlock* b); - void EndTimeProfile(JitBlock* b); - void EmitUpdateMembase(); void MSRUpdated(u32 msr); void MSRUpdated(Arm64Gen::ARM64Reg msr); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index c999388760..5ec9af3967 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -57,7 +57,7 @@ // After resetting the stack to the top, we call _resetstkoflw() to restore // the guard page at the 256kb mark. -const std::array*>, 22> JitBase::JIT_SETTINGS{{ +const std::array*>, 23> JitBase::JIT_SETTINGS{{ {&JitBase::bJITOff, &Config::MAIN_DEBUG_JIT_OFF}, {&JitBase::bJITLoadStoreOff, &Config::MAIN_DEBUG_JIT_LOAD_STORE_OFF}, {&JitBase::bJITLoadStorelXzOff, &Config::MAIN_DEBUG_JIT_LOAD_STORE_LXZ_OFF}, @@ -71,6 +71,7 @@ const std::array*>, 22> JitB {&JitBase::bJITSystemRegistersOff, &Config::MAIN_DEBUG_JIT_SYSTEM_REGISTERS_OFF}, {&JitBase::bJITBranchOff, &Config::MAIN_DEBUG_JIT_BRANCH_OFF}, {&JitBase::bJITRegisterCacheOff, &Config::MAIN_DEBUG_JIT_REGISTER_CACHE_OFF}, + {&JitBase::m_enable_profiling, &Config::MAIN_DEBUG_JIT_ENABLE_PROFILING}, {&JitBase::m_enable_debugging, &Config::MAIN_ENABLE_DEBUGGING}, {&JitBase::m_enable_branch_following, &Config::MAIN_JIT_FOLLOW_BRANCH}, {&JitBase::m_enable_float_exceptions, &Config::MAIN_FLOAT_EXCEPTIONS}, diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index d3579368f4..cf0ce78dbc 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -86,7 +86,6 @@ protected: bool memcheck; bool fp_exceptions; bool div_by_zero_exceptions; - bool profile_blocks; }; struct JitState { @@ -149,6 +148,7 @@ protected: bool bJITSystemRegistersOff = false; bool bJITBranchOff = false; bool bJITRegisterCacheOff = false; + bool m_enable_profiling = false; bool m_enable_debugging = false; bool m_enable_branch_following = false; bool m_enable_float_exceptions = false; @@ -163,7 +163,7 @@ protected: bool m_cleanup_after_stackfault = false; u8* m_stack_guard = nullptr; - static const std::array*>, 22> JIT_SETTINGS; + static const std::array*>, 23> JIT_SETTINGS; bool DoesConfigNeedRefresh(); void RefreshConfig(); @@ -187,6 +187,7 @@ public: JitBase& operator=(JitBase&&) = delete; ~JitBase() override; + bool IsProfilingEnabled() const { return m_enable_profiling; } bool IsDebuggingEnabled() const { return m_enable_debugging; } static const u8* Dispatch(JitBase& jit); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index d8d6e8d13b..8029d06c2d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -32,6 +32,18 @@ bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const physical_addresses.lower_bound(address + length); } +void JitBlock::ProfileData::BeginProfiling(ProfileData* data) +{ + data->run_count += 1; + data->time_start = Clock::now(); +} + +void JitBlock::ProfileData::EndProfiling(ProfileData* data, int downcount_amount) +{ + data->cycles_spent += downcount_amount; + data->time_spent += Clock::now() - data->time_start; +} + JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit} { } @@ -98,7 +110,8 @@ JitBlock** JitBaseBlockCache::GetFastBlockMapFallback() return m_fast_block_map_fallback.data(); } -void JitBaseBlockCache::RunOnBlocks(std::function f) +void JitBaseBlockCache::RunOnBlocks(const Core::CPUThreadGuard&, + std::function f) const { for (const auto& e : block_map) f(e.second); @@ -107,7 +120,7 @@ void JitBaseBlockCache::RunOnBlocks(std::function f) JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) { const u32 physical_address = m_jit.m_mmu.JitCache_TranslateAddress(em_address).address; - JitBlock& b = block_map.emplace(physical_address, JitBlock())->second; + JitBlock& b = block_map.emplace(physical_address, m_jit.IsProfilingEnabled())->second; b.effectiveAddress = em_address; b.physicalAddress = physical_address; b.feature_flags = m_jit.m_ppc_state.feature_flags; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index feb872ee7b..d44cc097d3 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -64,6 +65,27 @@ static_assert(std::is_standard_layout_v, "JitBlockData must have a // address. struct JitBlock : public JitBlockData { + // Software profiling data for JIT block. + struct ProfileData + { + using Clock = std::chrono::steady_clock; + + static void BeginProfiling(ProfileData* data); + static void EndProfiling(ProfileData* data, int downcount_amount); + + std::size_t run_count = 0; + u64 cycles_spent = 0; + Clock::duration time_spent = {}; + + private: + Clock::time_point time_start; + }; + + explicit JitBlock(bool profiling_enabled) + : profile_data(profiling_enabled ? std::make_unique() : nullptr) + { + } + bool OverlapsPhysicalRange(u32 address, u32 length) const; // Information about exits to a known address from this block. @@ -83,15 +105,7 @@ struct JitBlock : public JitBlockData // This set stores all physical addresses of all occupied instructions. std::set physical_addresses; - // Block profiling data, structure is inlined in Jit.cpp - struct ProfileData - { - u64 ticCounter; - u64 downcountCounter; - u64 runCount; - u64 ticStart; - u64 ticStop; - } profile_data = {}; + std::unique_ptr profile_data; }; typedef void (*CompiledCode)(); @@ -146,7 +160,7 @@ public: // Code Cache u8** GetEntryPoints(); JitBlock** GetFastBlockMapFallback(); - void RunOnBlocks(std::function f); + void RunOnBlocks(const Core::CPUThreadGuard& guard, std::function f) const; JitBlock* AllocateBlock(u32 em_address); void FinalizeBlock(JitBlock& block, bool block_link, const std::set& physical_addresses); diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 8372bf302a..7c01b1fa83 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -4,7 +4,6 @@ #include "Core/PowerPC/JitInterface.h" #include -#include #include #include @@ -19,7 +18,6 @@ #include "Common/Assert.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" -#include "Common/IOFile.h" #include "Common/MsgHandler.h" #include "Core/Core.h" @@ -29,7 +27,6 @@ #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCSymbolDB.h" #include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/Profiler.h" #include "Core/System.h" #ifdef _M_X86_64 @@ -90,14 +87,6 @@ CPUCoreBase* JitInterface::GetCore() const return m_jit.get(); } -void JitInterface::SetProfilingState(ProfilingState state) -{ - if (!m_jit) - return; - - m_jit->jo.profile_blocks = state == ProfilingState::Enabled; -} - void JitInterface::UpdateMembase() { if (!m_jit) @@ -123,58 +112,80 @@ void JitInterface::UpdateMembase() } } -void JitInterface::WriteProfileResults(const std::string& filename) const +static std::string_view GetDescription(const CPUEmuFeatureFlags flags) { - Profiler::ProfileStats prof_stats; - GetProfileResults(&prof_stats); - - File::IOFile f(filename, "w"); - if (!f) - { - PanicAlertFmt("Failed to open {}", filename); - return; - } - f.WriteString("origAddr\tblkName\trunCount\tcost\ttimeCost\tpercent\ttimePercent\tOvAllinBlkTime(" - "ms)\tblkCodeSize\n"); - for (auto& stat : prof_stats.block_stats) - { - std::string name = m_system.GetPPCSymbolDB().GetDescription(stat.addr); - double percent = 100.0 * (double)stat.cost / (double)prof_stats.cost_sum; - double timePercent = 100.0 * (double)stat.tick_counter / (double)prof_stats.timecost_sum; - f.WriteString(fmt::format("{0:08x}\t{1}\t{2}\t{3}\t{4}\t{5:.2f}\t{6:.2f}\t{7:.2f}\t{8}\n", - stat.addr, name, stat.run_count, stat.cost, stat.tick_counter, - percent, timePercent, - static_cast(stat.tick_counter) * 1000.0 / - static_cast(prof_stats.countsPerSec), - stat.block_size)); - } + static constexpr std::array + descriptions = { + "", "DR", "IR", "DR|IR", "PERFMON", "DR|PERFMON", "IR|PERFMON", "DR|IR|PERFMON", + }; + return descriptions[flags]; } -void JitInterface::GetProfileResults(Profiler::ProfileStats* prof_stats) const +void JitInterface::JitBlockLogDump(const Core::CPUThreadGuard& guard, std::FILE* file) const { - // Can't really do this with no m_jit core available + std::fputs( + "ppcFeatureFlags\tppcAddress\tppcSize\thostNearSize\thostFarSize\trunCount\tcyclesSpent" + "\tcyclesAverage\tcyclesPercent\ttimeSpent(ns)\ttimeAverage(ns)\ttimePercent\tsymbol\n", + file); + if (!m_jit) return; - prof_stats->cost_sum = 0; - prof_stats->timecost_sum = 0; - prof_stats->block_stats.clear(); + if (m_jit->IsProfilingEnabled()) + { + u64 overall_cycles_spent = 0; + JitBlock::ProfileData::Clock::duration overall_time_spent = {}; + m_jit->GetBlockCache()->RunOnBlocks(guard, [&](const JitBlock& block) { + overall_cycles_spent += block.profile_data->cycles_spent; + overall_time_spent += block.profile_data->time_spent; + }); + m_jit->GetBlockCache()->RunOnBlocks(guard, [&](const JitBlock& block) { + const Common::Symbol* const symbol = + m_jit->m_ppc_symbol_db.GetSymbolFromAddr(block.effectiveAddress); + const JitBlock::ProfileData* const data = block.profile_data.get(); - const Core::CPUThreadGuard guard(m_system); - QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec); - m_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) { - const auto& data = block.profile_data; - u64 cost = data.downcountCounter; - u64 timecost = data.ticCounter; - // Todo: tweak. - if (data.runCount >= 1) - prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, data.runCount, - block.codeSize); - prof_stats->cost_sum += cost; - prof_stats->timecost_sum += timecost; - }); + const double cycles_percent = + overall_cycles_spent == 0 ? double{} : 100.0 * data->cycles_spent / overall_cycles_spent; + const double time_percent = overall_time_spent == JitBlock::ProfileData::Clock::duration{} ? + double{} : + 100.0 * data->time_spent.count() / overall_time_spent.count(); + const double cycles_average = data->run_count == 0 ? + double{} : + static_cast(data->cycles_spent) / data->run_count; + const double time_average = + data->run_count == 0 ? + double{} : + std::chrono::duration_cast>(data->time_spent) + .count() / + data->run_count; - sort(prof_stats->block_stats.begin(), prof_stats->block_stats.end()); + const std::size_t host_near_code_size = block.near_end - block.near_begin; + const std::size_t host_far_code_size = block.far_end - block.far_begin; + + fmt::println( + file, "{}\t{:08x}\t{}\t{}\t{}\t{}\t{}\t{:.6f}\t{:.6f}\t{}\t{:.6f}\t{:.6f}\t\"{}\"", + GetDescription(block.feature_flags), block.effectiveAddress, + block.originalSize * sizeof(UGeckoInstruction), host_near_code_size, host_far_code_size, + data->run_count, data->cycles_spent, cycles_average, cycles_percent, + std::chrono::duration_cast(data->time_spent).count(), + time_average, time_percent, symbol ? std::string_view{symbol->name} : ""); + }); + } + else + { + m_jit->GetBlockCache()->RunOnBlocks(guard, [&](const JitBlock& block) { + const Common::Symbol* const symbol = + m_jit->m_ppc_symbol_db.GetSymbolFromAddr(block.effectiveAddress); + + const std::size_t host_near_code_size = block.near_end - block.near_begin; + const std::size_t host_far_code_size = block.far_end - block.far_begin; + + fmt::println(file, "{}\t{:08x}\t{}\t{}\t{}\t-\t-\t-\t-\t-\t-\t-\t\"{}\"", + GetDescription(block.feature_flags), block.effectiveAddress, + block.originalSize * sizeof(UGeckoInstruction), host_near_code_size, + host_far_code_size, symbol ? std::string_view{symbol->name} : ""); + }); + } } std::variant diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h index 4cf7c3179c..17d0796bfd 100644 --- a/Source/Core/Core/PowerPC/JitInterface.h +++ b/Source/Core/Core/PowerPC/JitInterface.h @@ -3,6 +3,9 @@ #pragma once +#include +#include +#include #include #include #include @@ -24,11 +27,6 @@ namespace PowerPC enum class CPUCore; } -namespace Profiler -{ -struct ProfileStats; -} - class JitInterface { public: @@ -45,11 +43,6 @@ public: CPUCoreBase* GetCore() const; // Debugging - enum class ProfilingState - { - Enabled, - Disabled - }; enum class GetHostCodeError { NoJitActive, @@ -63,9 +56,7 @@ public: }; void UpdateMembase(); - void SetProfilingState(ProfilingState state); - void WriteProfileResults(const std::string& filename) const; - void GetProfileResults(Profiler::ProfileStats* prof_stats) const; + void JitBlockLogDump(const Core::CPUThreadGuard& guard, std::FILE* file) const; std::variant GetHostCode(u32 address) const; // Memory Utilities diff --git a/Source/Core/Core/PowerPC/Profiler.h b/Source/Core/Core/PowerPC/Profiler.h deleted file mode 100644 index 3c4b3eb33b..0000000000 --- a/Source/Core/Core/PowerPC/Profiler.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include - -#include "Common/CommonTypes.h" - -namespace Profiler -{ -struct BlockStat -{ - BlockStat(u32 _addr, u64 c, u64 ticks, u64 run, u32 size) - : addr(_addr), cost(c), tick_counter(ticks), run_count(run), block_size(size) - { - } - u32 addr; - u64 cost; - u64 tick_counter; - u64 run_count; - u32 block_size; - - bool operator<(const BlockStat& other) const { return cost > other.cost; } -}; -struct ProfileStats -{ - std::vector block_stats; - u64 cost_sum = 0; - u64 timecost_sum = 0; - u64 countsPerSec = 0; -}; - -} // namespace Profiler diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 5622bef028..935efe89d2 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -447,7 +447,6 @@ - diff --git a/Source/Core/UICommon/UICommon.cpp b/Source/Core/UICommon/UICommon.cpp index 5ff9364dd0..0875b53383 100644 --- a/Source/Core/UICommon/UICommon.cpp +++ b/Source/Core/UICommon/UICommon.cpp @@ -76,6 +76,7 @@ static void CreateDumpPath(std::string path) File::CreateFullPath(File::GetUserPath(D_DUMPTEXTURES_IDX)); File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_IDX)); File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_BRANCHWATCH_IDX)); + File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX)); } static void CreateLoadPath(std::string path) @@ -257,6 +258,7 @@ void CreateDirectories() File::CreateFullPath(File::GetUserPath(D_DUMPTEXTURES_IDX)); File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_IDX)); File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_BRANCHWATCH_IDX)); + File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX)); File::CreateFullPath(File::GetUserPath(D_GAMESETTINGS_IDX)); File::CreateFullPath(File::GetUserPath(D_GCUSER_IDX)); File::CreateFullPath(File::GetUserPath(D_GCUSER_IDX) + USA_DIR DIR_SEP);