From d4d5dc99f307de3aaf9836c80edd0ec17220e8f9 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 19 Nov 2020 11:12:59 +0300 Subject: [PATCH] Fix perf_meter<> after adding thread pool. Print and reset sorted information after emulation stop/pause. Print useless average value. --- rpcs3/Emu/System.cpp | 5 ++++ rpcs3/Emu/perf_meter.cpp | 64 ++++++++++++++++++++++++++++++++++++++-- rpcs3/Emu/perf_meter.hpp | 33 +++++++++++---------- 3 files changed, 85 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 7e0943e9e8..9bcef0628c 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -3,6 +3,7 @@ #include "Utilities/bin_patch.h" #include "Emu/Memory/vm.h" #include "Emu/System.h" +#include "Emu/perf_meter.hpp" #include "Emu/Cell/PPUThread.h" #include "Emu/Cell/PPUCallback.h" @@ -1797,6 +1798,8 @@ void Emulator::Resume() ppu_log.notice("[RESUME] Dumping instruction stats:%s", dump); } + perf_stat_base::report(); + // Try to resume if (!m_state.compare_and_swap_test(system_state::paused, system_state::running)) { @@ -1880,6 +1883,8 @@ void Emulator::Stop(bool restart) jit_runtime::finalize(); + perf_stat_base::report(); + static u64 aw_refs = 0; static u64 aw_colm = 0; static u64 aw_colc = 0; diff --git a/rpcs3/Emu/perf_meter.cpp b/rpcs3/Emu/perf_meter.cpp index ee35d077f5..ab6a5532bd 100644 --- a/rpcs3/Emu/perf_meter.cpp +++ b/rpcs3/Emu/perf_meter.cpp @@ -1,11 +1,19 @@ #include "stdafx.h" #include "perf_meter.hpp" +#include +#include + void perf_stat_base::push(u64 ns[66]) noexcept { + if (!ns[0]) + { + return; + } + for (u32 i = 0; i < 66; i++) { - m_log[i] += ns[i]; + m_log[i] += atomic_storage::exchange(ns[i], 0); } } @@ -13,7 +21,7 @@ void perf_stat_base::print(const char* name) noexcept { if (u64 num_total = m_log[0].load()) { - perf_log.notice("Perf stats for %s: total events: %u (total time %.4fs)", name, num_total, m_log[65].load() / 1000'000'000.); + perf_log.notice(u8"Perf stats for %s: total events: %u (total time %.4fs, avg %.4fµs)", name, num_total, m_log[65].load() / 1000'000'000., m_log[65].load() / 1000. / num_total); for (u32 i = 0; i < 13; i++) { @@ -56,3 +64,55 @@ void perf_stat_base::print(const char* name) noexcept } } } + +static shared_mutex s_perf_mutex; + +static std::map s_perf_acc; + +static std::multimap s_perf_sources; + +void perf_stat_base::add(u64 ns[66], const char* name) noexcept +{ + std::lock_guard lock(s_perf_mutex); + + s_perf_sources.emplace(name, ns); + s_perf_acc[name]; +} + +void perf_stat_base::remove(u64 ns[66], const char* name) noexcept +{ + std::lock_guard lock(s_perf_mutex); + + const auto found = s_perf_sources.equal_range(name); + + for (auto it = found.first; it != found.second; it++) + { + if (it->second == ns) + { + s_perf_acc[name].push(ns); + s_perf_sources.erase(it); + break; + } + } +} + +void perf_stat_base::report() noexcept +{ + std::lock_guard lock(s_perf_mutex); + + perf_log.notice("Performance report begin (%u src, %u acc):", s_perf_sources.size(), s_perf_acc.size()); + + for (auto& [name, ns] : s_perf_sources) + { + s_perf_acc[name].push(ns); + } + + for (auto& [name, data] : s_perf_acc) + { + data.print(name.c_str()); + } + + s_perf_acc.clear(); + + perf_log.notice("Performance report end."); +} diff --git a/rpcs3/Emu/perf_meter.hpp b/rpcs3/Emu/perf_meter.hpp index 93101f0a99..b1559b3146 100644 --- a/rpcs3/Emu/perf_meter.hpp +++ b/rpcs3/Emu/perf_meter.hpp @@ -31,6 +31,12 @@ protected: // Accumulate values from a thread void push(u64 ns[66]) noexcept; + // Register TLS storage for stats + static void add(u64 ns[66], const char* name) noexcept; + + // Unregister TLS storage and drain its data + static void remove(u64 ns[66], const char* name) noexcept; + public: perf_stat_base() noexcept = default; @@ -39,6 +45,9 @@ public: perf_stat_base& operator =(const perf_stat_base&) = delete; ~perf_stat_base() {} + + // Collect all data, report it, and clean + static void report() noexcept; }; // Object that prints event length stats at the end @@ -50,26 +59,20 @@ class perf_stat final : public perf_stat_base // Local non-atomic values for increments u64 m_log[66]{}; + perf_stat_local() noexcept + { + perf_stat_base::add(m_log, perf_name.data()); + } + ~perf_stat_local() { - // Update on thread exit - if (m_log[0]) - { - if (auto* pfs = g_fxo->get()) - { - pfs->perf_stat_base::push(m_log); - } - } + perf_stat_base::remove(m_log, perf_name.data()); } + } g_tls_perf_stat; public: - ~perf_stat() - { - perf_stat_base::print(perf_name.data()); - } - - void push(u64 ns) noexcept + static void push(u64 ns) noexcept { auto& data = g_tls_perf_stat.m_log; data[0] += ns != 0; @@ -178,7 +181,7 @@ public: const f64 diff = (end_time - m_timestamps[0]) * 1. / utils::get_tsc_freq(); // Register perf stat in nanoseconds - g_fxo->get>()->push(static_cast(diff * 1000'000'000.)); + perf_stat::push(static_cast(diff * 1000'000'000.)); // Print in microseconds if (static_cast(diff * 1000'000.) >= g_cfg.core.perf_report_threshold)