PPU debugger: Implement PPU calling history

This commit is contained in:
Eladash 2021-07-10 11:56:48 +03:00 committed by Megamouse
parent c2f0fbcd82
commit 8e2c34a003
8 changed files with 186 additions and 35 deletions

View File

@ -140,6 +140,22 @@ FORCE_INLINE auto ppu_feed_data(ppu_thread& ppu, u64 addr)
return value;
}
// Push called address to custom call history for debugging
inline u32 ppu_record_call(ppu_thread& ppu, u32 new_cia, ppu_opcode_t op)
{
if (!op.lk)
{
return new_cia;
}
if (auto& history = ppu.call_history; !history.data.empty())
{
history.data[history.index++ % ppu.call_history_max_size] = new_cia;
}
return new_cia;
}
// Compare 16 packed unsigned bytes (greater than)
inline __m128i sse_cmpgt_epu8(__m128i A, __m128i B)
{
@ -3085,6 +3101,10 @@ bool ppu_interpreter::BC(ppu_thread& ppu, ppu_opcode_t op)
if (ctr_ok && cond_ok)
{
// Provide additional information by using the origin of the call
// Because this is a fixed target branch there's no abiguity about it
ppu_record_call(ppu, ppu.cia, op);
ppu.cia = (op.aa ? 0 : ppu.cia) + op.bt14;
return false;
}
@ -3107,6 +3127,10 @@ bool ppu_interpreter::SC(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::B(ppu_thread& ppu, ppu_opcode_t op)
{
// Provide additional information by using the origin of the call
// Because this is a fixed target branch there's no abiguity about it
ppu_record_call(ppu, ppu.cia, op);
const u32 link = ppu.cia + 4;
ppu.cia = (op.aa ? 0 : ppu.cia) + op.bt24;
if (op.lk) ppu.lr = link;
@ -3137,7 +3161,7 @@ bool ppu_interpreter::BCLR(ppu_thread& ppu, ppu_opcode_t op)
if (ctr_ok && cond_ok)
{
ppu.cia = target;
ppu.cia = ppu_record_call(ppu, target, op);
return false;
}
else
@ -3206,7 +3230,7 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op)
if (op.bo & 0x10 || ppu.cr[op.bi] == ((op.bo & 0x8) != 0))
{
ppu.cia = static_cast<u32>(ppu.ctr) & ~3;
ppu.cia = ppu_record_call(ppu, static_cast<u32>(ppu.ctr) & ~3, op);
return false;
}

View File

@ -118,6 +118,17 @@ void fmt_class_string<ppu_thread_status>::format(std::string& out, u64 arg)
});
}
template <>
void fmt_class_string<typename ppu_thread::call_history_t>::format(std::string& out, u64 arg)
{
const auto& history = get_object(arg);
for (u64 count = 0, idx = history.index - 1; idx != umax && count < ppu_thread::call_history_max_size; count++, idx--)
{
fmt::append(out, "\n(%u) 0x%08x", count, history.data[idx % ppu_thread::call_history_max_size]);
}
}
const ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise;
const ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast;
const ppu_decoder<ppu_itype> g_ppu_itype;
@ -885,6 +896,22 @@ std::string ppu_thread::dump_misc() const
return ret;
}
std::string ppu_thread::dump_all() const
{
std::string ret = cpu_thread::dump_all();
if (!call_history.data.empty())
{
ret +=
"\nCalling History:"
"\n================";
fmt::append(ret, "%s", call_history);
}
return ret;
}
extern thread_local std::string(*g_tls_log_prefix)();
void ppu_thread::cpu_task()
@ -1152,6 +1179,11 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3
{
state += cpu_flag::memory;
}
if (g_cfg.core.ppu_call_history)
{
call_history.data.resize(call_history_max_size);
}
}
ppu_thread::thread_name_t::operator std::string() const

View File

@ -134,6 +134,7 @@ public:
virtual std::string dump_callstack() const override;
virtual std::vector<std::pair<u32, u32>> dump_callstack_list() const override;
virtual std::string dump_misc() const override;
virtual std::string dump_all() const override;
virtual void cpu_task() override final;
virtual void cpu_sleep() override;
virtual void cpu_on_stop() override;
@ -296,6 +297,14 @@ public:
u32 dbg_step_pc = 0;
struct call_history_t
{
std::vector<u32> data;
u64 index = 0;
} call_history;
static constexpr u32 call_history_max_size = 4096;
// For named_thread ctor
const struct thread_name_t
{

View File

@ -1676,11 +1676,6 @@ void spu_thread::cleanup()
// Deallocate local storage
ensure(vm::dealloc(vm_offset(), vm::spu, &shm));
if (g_cfg.core.mfc_debug)
{
utils::memory_decommit(vm::g_stat_addr + vm_offset(), SPU_LS_SIZE);
}
// Deallocate RawSPU ID
if (get_type() >= spu_type::raw)
{
@ -1713,11 +1708,6 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u
, shm(std::make_shared<utils::shm>(SPU_LS_SIZE))
, ls([&]()
{
if (g_cfg.core.mfc_debug)
{
utils::memory_commit(vm::g_stat_addr + vm_offset(), SPU_LS_SIZE);
}
if (!group)
{
ensure(vm::get(vm::spu)->falloc(vm_offset(), SPU_LS_SIZE, &shm, 0x200));
@ -1791,6 +1781,11 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u
cpu_init();
}
if (g_cfg.core.mfc_debug)
{
mfc_history.resize(max_mfc_dump_idx);
}
range_lock = vm::alloc_range_lock();
}
@ -2124,7 +2119,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
{
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
@ -2322,7 +2317,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
@ -2469,7 +2464,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
@ -2538,7 +2533,7 @@ plain_access:
if (g_cfg.core.mfc_debug && _this)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + _this->vm_offset())[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = _this->mfc_history[_this->mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = args;
dump.cmd.eah = _this->pc;
std::memcpy(dump.data, is_get ? dst : src, std::min<u32>(args.size, 128));
@ -3289,7 +3284,7 @@ bool spu_thread::process_mfc_cmd()
if (g_cfg.core.mfc_debug)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = mfc_history[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
std::memcpy(dump.data, rdata, 128);
@ -3314,7 +3309,7 @@ bool spu_thread::process_mfc_cmd()
if (dump)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
dump.cmd.tag = static_cast<u32>(ch_atomic_stat.get_value()); // Use tag as atomic status
@ -3327,7 +3322,7 @@ bool spu_thread::process_mfc_cmd()
{
if (g_cfg.core.mfc_debug)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
@ -3341,7 +3336,7 @@ bool spu_thread::process_mfc_cmd()
{
if (g_cfg.core.mfc_debug)
{
auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx];
dump.cmd = ch_mfc_cmd;
dump.cmd.eah = pc;
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
@ -3426,7 +3421,7 @@ bool spu_thread::process_mfc_cmd()
//if (g_cfg.core.mfc_debug)
//{
// TODO: This needs a disambiguator with list elements dumping
// auto& dump = reinterpret_cast<mfc_cmd_dump*>(vm::g_stat_addr + vm_offset())[mfc_dump_idx++ % spu_thread::max_mfc_dump_idx];
// auto& dump = mfc_history[mfc_dump_idx++ % max_mfc_dump_idx];
// dump.cmd = ch_mfc_cmd;
// dump.cmd.eah = pc;
// std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.eah & 0x3fff0), std::min<u32>(ch_mfc_cmd.size, 128));

View File

@ -770,8 +770,9 @@ public:
u64 last_fail = 0;
u64 last_succ = 0;
std::vector<mfc_cmd_dump> mfc_history;
u64 mfc_dump_idx = 0;
static constexpr u32 max_mfc_dump_idx = SPU_LS_SIZE / sizeof(mfc_cmd_dump);
static constexpr u32 max_mfc_dump_idx = 2048;
std::array<v128, 0x4000> stack_mirror; // Return address information

View File

@ -47,6 +47,13 @@ bool ppu_thread_exit(ppu_thread& ppu)
dct.used -= ppu.stack_size;
}
if (ppu.call_history.index)
{
std::string str = fmt::format("%s", ppu.call_history);
ppu.call_history.index = 0;
ppu_log.notice("Calling history: %s", str);
}
return false;
}

View File

@ -22,6 +22,7 @@ struct cfg_root : cfg::node
cfg::_enum<ppu_decoder_type> ppu_decoder{ this, "PPU Decoder", ppu_decoder_type::llvm };
cfg::_int<1, 8> ppu_threads{ this, "PPU Threads", 2 }; // Amount of PPU threads running simultaneously (must be 2)
cfg::_bool ppu_debug{ this, "PPU Debug" };
cfg::_bool ppu_call_history{ this, "PPU Calling History" }; // Enable PPU calling history recording
cfg::_bool llvm_logs{ this, "Save LLVM logs" };
cfg::string llvm_cpu{ this, "Use LLVM CPU" };
cfg::_int<0, 1024> llvm_threads{ this, "Max LLVM Compile Threads", 0 };

View File

@ -298,6 +298,7 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
"\nKeys Ctrl+B: Open breakpoints settings."
"\nKeys Alt+S: Capture SPU images of selected SPU."
"\nKey D: SPU MFC commands logger, MFC debug setting must be enabled."
"\nKey D: Also PPU calling history logger, interpreter and non-zero call history size must be used."
"\nKey E: Instruction Editor: click on the instruction you want to modify, then press E."
"\nKey F: Dedicated floating point mode switch for SPU threads."
"\nKey R: Registers Editor for selected thread."
@ -365,10 +366,9 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
if (event->isAutoRepeat())
return;
if (cpu->id_type() == 2 && g_cfg.core.mfc_debug && !cpu->is_stopped())
auto get_max_allowed = [&](QString title, QString description, u32 limit) -> u32
{
input_dialog dlg(4, "", tr("Max MFC cmds logged"),
tr("Decimal only, max allowed is 1820."), "0", this);
input_dialog dlg(4, "", title, description.arg(limit), QString::number(limit), this);
QFont mono = QFontDatabase::systemFont(QFontDatabase::FixedFont);
mono.setPointSize(8);
@ -383,7 +383,7 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
{
bool ok = false;
const u32 dummy = changed.toUInt(&ok, 10);
ok = ok && dummy && dummy <= spu_thread::max_mfc_dump_idx;
ok = ok && dummy && dummy <= limit;
dlg.set_button_enabled(QDialogButtonBox::StandardButton::Ok, ok);
if (ok)
@ -397,30 +397,71 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
max = 0;
}
const auto spu = static_cast<spu_thread*>(cpu);
return max;
};
const auto ptr = reinterpret_cast<const mfc_cmd_dump*>(vm::g_stat_addr + spu->vm_offset());
auto copy_overlapping_list = [&] <typename T> (u64& index, u64 max, const std::vector<T>& in, std::vector<T>& out, bool& emptied)
{
const u64 current_pos = index % in.size();
const u64 last_elements = std::min<u64>(current_pos, max);
const u64 overlapped_old_elements = std::min<u64>(index, max) - last_elements;
out.resize(overlapped_old_elements + last_elements);
// Save list contents (only the relavant parts)
std::copy(in.end() - overlapped_old_elements, in.end(), out.begin());
std::copy_n(in.begin() + current_pos - last_elements, last_elements, out.begin() + overlapped_old_elements);
// Check if max elements to log is larger/equal to current list size
if ((emptied = index && max >= index))
{
// Empty list when possible (further calls' history logging will not log any call before this)
index = 0;
}
};
if (cpu->id_type() == 2 && g_cfg.core.mfc_debug)
{
const u32 max = get_max_allowed(tr("Max MFC cmds logged"), tr("Decimal only, max allowed is %0."), spu_thread::max_mfc_dump_idx);
// Preallocate in order to save execution time when inside suspend_all.
std::vector<mfc_cmd_dump> copy(max);
bool emptied = false;
cpu_thread::suspend_all(nullptr, {}, [&]
{
const auto spu = static_cast<spu_thread*>(cpu);
copy_overlapping_list(spu->mfc_dump_idx, max, spu->mfc_history, copy, emptied);
});
std::string ret;
for (u64 count = 0, idx = spu->mfc_dump_idx - 1; idx != umax && count < max; count++, idx--)
u32 i = 0;
for (auto it = copy.rbegin(); it != copy.rend(); it++, i++)
{
auto dump = ptr[idx % spu_thread::max_mfc_dump_idx];
auto& dump = *it;
const u32 pc = std::exchange(dump.cmd.eah, 0);
fmt::append(ret, "\n(%d) PC 0x%05x: [%s]", count, pc, dump.cmd);
fmt::append(ret, "\n(%d) PC 0x%05x: [%s]", i, pc, dump.cmd);
if (dump.cmd.cmd == MFC_PUTLLC_CMD)
{
fmt::append(ret, " %s", dump.cmd.tag == MFC_PUTLLC_SUCCESS ? "(passed)" : "(failed)");
}
const auto data = reinterpret_cast<const be_t<u32>*>(dump.data);
auto load = [&](usz index)
{
be_t<u32> data{};
std::memcpy(&data, dump.data + index * sizeof(data), sizeof(data));
return data;
};
for (usz i = 0; i < utils::aligned_div(std::min<u32>(dump.cmd.size, 128), 4); i += 4)
{
fmt::append(ret, "\n[0x%02x] %08x %08x %08x %08x", i * sizeof(data[0])
, data[i + 0], data[i + 1], data[i + 2], data[i + 3]);
fmt::append(ret, "\n[0x%02x] %08x %08x %08x %08x", i * sizeof(be_t<u32>)
, load(i + 0), load(i + 1), load(i + 2), load(i + 3));
}
}
@ -429,8 +470,49 @@ void debugger_frame::keyPressEvent(QKeyEvent* event)
ret = "No MFC commands have been logged";
}
spu_log.warning("SPU MFC dump of '%s': %s", spu->get_name(), ret);
if (emptied)
{
ret += "\nPrevious MFC history has been emptied!";
}
spu_log.success("SPU MFC dump of '%s': %s", cpu->get_name(), ret);
}
else if (cpu->id_type() == 1 && g_cfg.core.ppu_call_history)
{
const u32 max = get_max_allowed(tr("Max PPU calls logged"), tr("Decimal only, max allowed is %0."), ppu_thread::call_history_max_size);
// Preallocate in order to save execution time when inside suspend_all.
std::vector<u32> copy(max);
bool emptied = false;
cpu_thread::suspend_all(nullptr, {}, [&]
{
auto& list = static_cast<ppu_thread*>(cpu)->call_history;
copy_overlapping_list(list.index, max, list.data, copy, emptied);
});
std::string ret;
u32 i = 0;
for (auto it = copy.rbegin(); it != copy.rend(); it++, i++)
{
fmt::append(ret, "\n(%u) 0x%08x", i, *it);
}
if (ret.empty())
{
ret = "No PPU calls have been logged";
}
if (emptied)
{
ret += "\nPrevious call history has been emptied!";
}
ppu_log.success("PPU calling history dump of '%s': %s", cpu->get_name(), ret);
}
return;
}
case Qt::Key_E: