Fix perf_meter<> after adding thread pool.

Print and reset sorted information after emulation stop/pause.
Print useless average value.
This commit is contained in:
Nekotekina 2020-11-19 11:12:59 +03:00
parent 3e1344e4e4
commit d4d5dc99f3
3 changed files with 85 additions and 17 deletions

View File

@ -3,6 +3,7 @@
#include "Utilities/bin_patch.h"
#include "Emu/Memory/vm.h"
#include "Emu/System.h"
#include "Emu/perf_meter.hpp"
#include "Emu/Cell/PPUThread.h"
#include "Emu/Cell/PPUCallback.h"
@ -1797,6 +1798,8 @@ void Emulator::Resume()
ppu_log.notice("[RESUME] Dumping instruction stats:%s", dump);
}
perf_stat_base::report();
// Try to resume
if (!m_state.compare_and_swap_test(system_state::paused, system_state::running))
{
@ -1880,6 +1883,8 @@ void Emulator::Stop(bool restart)
jit_runtime::finalize();
perf_stat_base::report();
static u64 aw_refs = 0;
static u64 aw_colm = 0;
static u64 aw_colc = 0;

View File

@ -1,11 +1,19 @@
#include "stdafx.h"
#include "perf_meter.hpp"
#include <map>
#include <mutex>
void perf_stat_base::push(u64 ns[66]) noexcept
{
if (!ns[0])
{
return;
}
for (u32 i = 0; i < 66; i++)
{
m_log[i] += ns[i];
m_log[i] += atomic_storage<u64>::exchange(ns[i], 0);
}
}
@ -13,7 +21,7 @@ void perf_stat_base::print(const char* name) noexcept
{
if (u64 num_total = m_log[0].load())
{
perf_log.notice("Perf stats for %s: total events: %u (total time %.4fs)", name, num_total, m_log[65].load() / 1000'000'000.);
perf_log.notice(u8"Perf stats for %s: total events: %u (total time %.4fs, avg %.4fµs)", name, num_total, m_log[65].load() / 1000'000'000., m_log[65].load() / 1000. / num_total);
for (u32 i = 0; i < 13; i++)
{
@ -56,3 +64,55 @@ void perf_stat_base::print(const char* name) noexcept
}
}
}
static shared_mutex s_perf_mutex;
static std::map<std::string, perf_stat_base> s_perf_acc;
static std::multimap<std::string, u64*> s_perf_sources;
void perf_stat_base::add(u64 ns[66], const char* name) noexcept
{
std::lock_guard lock(s_perf_mutex);
s_perf_sources.emplace(name, ns);
s_perf_acc[name];
}
void perf_stat_base::remove(u64 ns[66], const char* name) noexcept
{
std::lock_guard lock(s_perf_mutex);
const auto found = s_perf_sources.equal_range(name);
for (auto it = found.first; it != found.second; it++)
{
if (it->second == ns)
{
s_perf_acc[name].push(ns);
s_perf_sources.erase(it);
break;
}
}
}
void perf_stat_base::report() noexcept
{
std::lock_guard lock(s_perf_mutex);
perf_log.notice("Performance report begin (%u src, %u acc):", s_perf_sources.size(), s_perf_acc.size());
for (auto& [name, ns] : s_perf_sources)
{
s_perf_acc[name].push(ns);
}
for (auto& [name, data] : s_perf_acc)
{
data.print(name.c_str());
}
s_perf_acc.clear();
perf_log.notice("Performance report end.");
}

View File

@ -31,6 +31,12 @@ protected:
// Accumulate values from a thread
void push(u64 ns[66]) noexcept;
// Register TLS storage for stats
static void add(u64 ns[66], const char* name) noexcept;
// Unregister TLS storage and drain its data
static void remove(u64 ns[66], const char* name) noexcept;
public:
perf_stat_base() noexcept = default;
@ -39,6 +45,9 @@ public:
perf_stat_base& operator =(const perf_stat_base&) = delete;
~perf_stat_base() {}
// Collect all data, report it, and clean
static void report() noexcept;
};
// Object that prints event length stats at the end
@ -50,26 +59,20 @@ class perf_stat final : public perf_stat_base
// Local non-atomic values for increments
u64 m_log[66]{};
perf_stat_local() noexcept
{
perf_stat_base::add(m_log, perf_name<ShortName>.data());
}
~perf_stat_local()
{
// Update on thread exit
if (m_log[0])
{
if (auto* pfs = g_fxo->get<perf_stat>())
{
pfs->perf_stat_base::push(m_log);
}
}
perf_stat_base::remove(m_log, perf_name<ShortName>.data());
}
} g_tls_perf_stat;
public:
~perf_stat()
{
perf_stat_base::print(perf_name<ShortName>.data());
}
void push(u64 ns) noexcept
static void push(u64 ns) noexcept
{
auto& data = g_tls_perf_stat.m_log;
data[0] += ns != 0;
@ -178,7 +181,7 @@ public:
const f64 diff = (end_time - m_timestamps[0]) * 1. / utils::get_tsc_freq();
// Register perf stat in nanoseconds
g_fxo->get<perf_stat<ShortName>>()->push(static_cast<u64>(diff * 1000'000'000.));
perf_stat<ShortName>::push(static_cast<u64>(diff * 1000'000'000.));
// Print in microseconds
if (static_cast<u64>(diff * 1000'000.) >= g_cfg.core.perf_report_threshold)