mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 03:32:55 +00:00
SPU Recompiler fixed
This commit is contained in:
parent
ab1c05aa71
commit
7e01c81154
File diff suppressed because it is too large
Load Diff
@ -30,32 +30,28 @@ private:
|
||||
asmjit::X86GpVar* cpu;
|
||||
asmjit::X86GpVar* ls;
|
||||
|
||||
// output:
|
||||
asmjit::X86GpVar* pos;
|
||||
|
||||
// temporary:
|
||||
asmjit::X86GpVar* addr;
|
||||
asmjit::X86GpVar* qw0;
|
||||
asmjit::X86GpVar* qw1;
|
||||
asmjit::X86GpVar* qw2;
|
||||
std::array<asmjit::X86XmmVar*, 10> vec;
|
||||
std::array<asmjit::X86XmmVar*, 6> vec;
|
||||
|
||||
// labels:
|
||||
asmjit::Label* labels; // array[0x10000]
|
||||
asmjit::Label* jt; // jump table resolver
|
||||
asmjit::Label* jt; // jump table resolver (uses *addr)
|
||||
asmjit::Label* end; // function end (return *addr)
|
||||
|
||||
class XmmLink
|
||||
{
|
||||
friend class spu_recompiler;
|
||||
|
||||
asmjit::X86XmmVar*& m_alloc;
|
||||
asmjit::X86XmmVar* xmm_var;
|
||||
asmjit::X86XmmVar* const m_var;
|
||||
|
||||
XmmLink(asmjit::X86XmmVar*& xmm_var)
|
||||
: m_alloc(xmm_var)
|
||||
, xmm_var(xmm_var)
|
||||
: m_var(xmm_var)
|
||||
{
|
||||
m_alloc = nullptr;
|
||||
xmm_var = nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
@ -64,24 +60,13 @@ private:
|
||||
XmmLink(const XmmLink&) = delete;
|
||||
|
||||
XmmLink(XmmLink&& right)
|
||||
: m_alloc(right.m_alloc)
|
||||
, xmm_var(right.xmm_var)
|
||||
: m_var(right.m_var)
|
||||
{
|
||||
right.xmm_var = nullptr;
|
||||
}
|
||||
|
||||
XmmLink& operator =(const XmmLink&) = delete;
|
||||
|
||||
XmmLink& operator =(XmmLink&& right) = delete;
|
||||
|
||||
~XmmLink()
|
||||
{
|
||||
if (xmm_var) m_alloc = xmm_var;
|
||||
}
|
||||
|
||||
inline operator const asmjit::X86XmmVar&() const
|
||||
{
|
||||
return *xmm_var;
|
||||
return *m_var;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -7,6 +7,25 @@
|
||||
|
||||
const spu_opcode_table_t<spu_itype_t> g_spu_itype{ DEFINE_SPU_OPCODES(spu_itype::), spu_itype::UNK };
|
||||
|
||||
std::shared_ptr<spu_function_t> SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size)
|
||||
{
|
||||
for (auto found = m_db.find(key); found != m_db.end(); found++)
|
||||
{
|
||||
if (found->second->size > max_size)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compare binary data explicitly (TODO: optimize)
|
||||
if (std::equal(found->second->data.begin(), found->second->data.end(), data))
|
||||
{
|
||||
return found->second;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SPUDatabase::SPUDatabase()
|
||||
{
|
||||
// TODO: load existing database associated with currently running executable
|
||||
@ -33,13 +52,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
const u64 key = entry | u64{ ls[entry / 4] } << 32;
|
||||
|
||||
// Try to find existing function in the database
|
||||
for (auto found = m_db.find(key); found != m_db.end(); found++)
|
||||
if (auto func = find(ls + entry / 4, key, max_limit - entry))
|
||||
{
|
||||
// Compare binary data explicitly (TODO: optimize)
|
||||
if (std::equal(found->second->data.begin(), found->second->data.end(), ls + entry / 4))
|
||||
{
|
||||
return found->second;
|
||||
}
|
||||
return func;
|
||||
}
|
||||
|
||||
// Initialize block entries with the function entry point
|
||||
@ -51,6 +66,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
// Set initial limit which will be narrowed later
|
||||
u32 limit = max_limit;
|
||||
|
||||
// Minimal position of ila $SP,* instruction
|
||||
u32 ila_sp_pos = max_limit;
|
||||
|
||||
// Find preliminary set of possible block entries (first pass), `start` is the current block address
|
||||
for (u32 start = entry, pos = entry; pos < limit; pos += 4)
|
||||
{
|
||||
@ -60,7 +78,15 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
|
||||
using namespace spu_itype;
|
||||
|
||||
if (start == pos) // Additional analysis at the beginning of the block (questionable)
|
||||
// Find existing function
|
||||
if (pos != entry && find(ls + pos / 4, pos | u64{ op.opcode } << 32, limit - pos))
|
||||
{
|
||||
limit = pos;
|
||||
break;
|
||||
}
|
||||
|
||||
// Additional analysis at the beginning of the block
|
||||
if (start != entry && start == pos)
|
||||
{
|
||||
// Possible jump table
|
||||
std::vector<u32> jt_abs, jt_rel;
|
||||
@ -128,17 +154,19 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
{
|
||||
// Discard current block and abort the operation
|
||||
limit = start;
|
||||
|
||||
break;
|
||||
}
|
||||
else if (op.opcode == 0) // Hack: special case (STOP 0)
|
||||
|
||||
if (op.opcode == 0) // Hack: special case (STOP 0)
|
||||
{
|
||||
limit = pos + 4;
|
||||
|
||||
break;
|
||||
}
|
||||
else if (type == BI) // Branch Indirect
|
||||
|
||||
if (type == BI || type == IRET) // Branch Indirect
|
||||
{
|
||||
if (type == IRET) LOG_ERROR(SPU, "[0x%05x] Interrupt Return", pos);
|
||||
|
||||
blocks.emplace(start); start = pos + 4;
|
||||
}
|
||||
else if (type == BR || type == BRA) // Branch Relative/Absolute
|
||||
@ -163,7 +191,7 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
{
|
||||
// Branch to the next instruction and set link ("get next instruction address" idiom)
|
||||
|
||||
if (op.rt == 0) LOG_ERROR(SPU, "Suspicious instruction at [0x%05x]", pos);
|
||||
if (op.rt == 0) LOG_ERROR(SPU, "[0x%05x] Branch-to-next with $LR", pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -174,13 +202,15 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
{
|
||||
limit = std::min<u32>(limit, target);
|
||||
}
|
||||
|
||||
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Function call without $LR", pos);
|
||||
}
|
||||
}
|
||||
else if (type == BISL) // Branch Indirect and Set Link
|
||||
else if (type == BISL || type == BISLED) // Branch Indirect and Set Link
|
||||
{
|
||||
// Nothing
|
||||
if (op.rt != 0) LOG_ERROR(SPU, "[0x%05x] Indirect function call without $LR", pos);
|
||||
}
|
||||
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero Word/Halfword
|
||||
else if (type == BRNZ || type == BRZ || type == BRHNZ || type == BRHZ) // Branch Relative if (Not) Zero (Half)word
|
||||
{
|
||||
const u32 target = spu_branch_target(pos, op.i16);
|
||||
|
||||
@ -192,6 +222,40 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
blocks.emplace(target);
|
||||
}
|
||||
}
|
||||
else if (type == BINZ || type == BIZ || type == BIHNZ || type == BIHZ) // Branch Indirect if (Not) Zero (Half)word
|
||||
{
|
||||
}
|
||||
else if (type == HBR || type == HBRA || type == HBRR) // Hint for Branch
|
||||
{
|
||||
}
|
||||
else if (type == STQA || type == STQD || type == STQR || type == STQX || type == FSCRWR || type == MTSPR || type == WRCH) // Store
|
||||
{
|
||||
}
|
||||
else if (type == HEQ || type == HEQI || type == HGT || type == HGTI || type == HLGT || type == HLGTI) // Halt
|
||||
{
|
||||
}
|
||||
else if (type == STOP || type == STOPD || type == NOP || type == LNOP || type == SYNC || type == DSYNC) // Miscellaneous
|
||||
{
|
||||
}
|
||||
else // Other instructions (writing rt reg)
|
||||
{
|
||||
const u32 rt = type == SELB || type == SHUFB || type == MPYA || type == FNMS || type == FMA || type == FMS ? op.rc : op.rt;
|
||||
|
||||
// Analyse link register access
|
||||
if (rt == 0)
|
||||
{
|
||||
}
|
||||
|
||||
// Analyse stack pointer access
|
||||
if (rt == 1)
|
||||
{
|
||||
if (type == ILA && pos < ila_sp_pos)
|
||||
{
|
||||
// set minimal ila $SP,* instruction position
|
||||
ila_sp_pos = pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find more function calls (second pass, questionable)
|
||||
@ -228,6 +292,9 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
// Prepare new function (set addr and size)
|
||||
auto func = std::make_shared<spu_function_t>(entry, limit - entry);
|
||||
|
||||
// Copy function contents
|
||||
func->data = { ls + entry / 4, ls + limit / 4 };
|
||||
|
||||
// Fill function block info
|
||||
for (auto i = blocks.crbegin(); i != blocks.crend(); i++)
|
||||
{
|
||||
@ -255,8 +322,8 @@ std::shared_ptr<spu_function_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 en
|
||||
}
|
||||
}
|
||||
|
||||
// Copy function contents
|
||||
func->data = { ls + entry / 4, ls + limit / 4 };
|
||||
// Set whether the function can reset stack
|
||||
func->does_reset_stack = ila_sp_pos < limit;
|
||||
|
||||
// Add function to the database
|
||||
m_db.emplace(key, func);
|
||||
|
@ -242,6 +242,9 @@ struct spu_function_t
|
||||
// jump table values (start addresses)
|
||||
std::set<u32> jtable;
|
||||
|
||||
// whether ila $SP,* instruction found
|
||||
bool does_reset_stack;
|
||||
|
||||
// pointer to the compiled function
|
||||
spu_jit_func_t compiled = nullptr;
|
||||
|
||||
@ -260,6 +263,9 @@ class SPUDatabase final
|
||||
// All registered functions (uses addr and first instruction as a key)
|
||||
std::unordered_multimap<u64, std::shared_ptr<spu_function_t>> m_db;
|
||||
|
||||
// For internal use
|
||||
std::shared_ptr<spu_function_t> find(const be_t<u32>* data, u64 key, u32 max_size);
|
||||
|
||||
public:
|
||||
SPUDatabase();
|
||||
~SPUDatabase();
|
||||
|
@ -298,7 +298,7 @@ void spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op)
|
||||
if (spu.gpr[op.rt]._u32[3] == 0)
|
||||
{
|
||||
set_interrupt_status(spu, op);
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
|
||||
}
|
||||
}
|
||||
|
||||
@ -307,7 +307,7 @@ void spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op)
|
||||
if (spu.gpr[op.rt]._u32[3] != 0)
|
||||
{
|
||||
set_interrupt_status(spu, op);
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
|
||||
}
|
||||
}
|
||||
|
||||
@ -316,7 +316,7 @@ void spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op)
|
||||
if (spu.gpr[op.rt]._u16[6] == 0)
|
||||
{
|
||||
set_interrupt_status(spu, op);
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
|
||||
}
|
||||
}
|
||||
|
||||
@ -325,13 +325,13 @@ void spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op)
|
||||
if (spu.gpr[op.rt]._u16[6] != 0)
|
||||
{
|
||||
set_interrupt_status(spu, op);
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
|
||||
}
|
||||
}
|
||||
|
||||
void spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
throw EXCEPTION("Unexpected instruction");
|
||||
throw EXCEPTION("Unimplemented instruction");
|
||||
}
|
||||
|
||||
void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
|
||||
@ -342,25 +342,25 @@ void spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
|
||||
void spu_interpreter::BI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
set_interrupt_status(spu, op);
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3], 0) - 4;
|
||||
spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]) - 4;
|
||||
}
|
||||
|
||||
void spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
set_interrupt_status(spu, op);
|
||||
const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3], 0);
|
||||
spu.gpr[op.rt] = v128::from32r(spu.pc + 4);
|
||||
const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3]);
|
||||
spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
|
||||
spu.pc = target - 4;
|
||||
}
|
||||
|
||||
void spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
throw EXCEPTION("Unexpected instruction");
|
||||
throw EXCEPTION("Unimplemented instruction");
|
||||
}
|
||||
|
||||
void spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
throw EXCEPTION("Unexpected instruction");
|
||||
throw EXCEPTION("Unimplemented instruction");
|
||||
}
|
||||
|
||||
void spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op)
|
||||
@ -1022,7 +1022,7 @@ void spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op)
|
||||
void spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const u32 target = spu_branch_target(0, op.i16);
|
||||
spu.gpr[op.rt] = v128::from32r(spu.pc + 4);
|
||||
spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
|
||||
spu.pc = target - 4;
|
||||
}
|
||||
|
||||
@ -1039,7 +1039,7 @@ void spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
|
||||
void spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const u32 target = spu_branch_target(spu.pc, op.i16);
|
||||
spu.gpr[op.rt] = v128::from32r(spu.pc + 4);
|
||||
spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
|
||||
spu.pc = target - 4;
|
||||
}
|
||||
|
||||
|
@ -316,7 +316,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
inline u32 spu_branch_target(u32 pc, s32 imm)
|
||||
inline u32 spu_branch_target(u32 pc, s32 imm = 0)
|
||||
{
|
||||
return (pc + (imm << 2)) & 0x3fffc;
|
||||
}
|
||||
|
@ -23,22 +23,28 @@ u32 SPURecompilerDecoder::DecodeMemory(const u32 address)
|
||||
throw EXCEPTION("Invalid address or PC (address=0x%x, PC=0x%05x)", address, spu.pc);
|
||||
}
|
||||
|
||||
// get SPU LS pointer
|
||||
const auto _ls = vm::get_ptr<be_t<u32>>(spu.offset);
|
||||
|
||||
const u32 index = spu.pc / 4;
|
||||
// always validate (TODO)
|
||||
const auto func = db->analyse(_ls, spu.pc);
|
||||
|
||||
if (!m_entries.at(index) || true) // always validate (TODO)
|
||||
// reset callstack if necessary
|
||||
if (func->does_reset_stack && spu.recursion_level)
|
||||
{
|
||||
const auto func = db->analyse(_ls, spu.pc);
|
||||
spu.m_state |= CPU_STATE_RETURN;
|
||||
|
||||
if (!func->compiled) rec->compile(*func);
|
||||
|
||||
if (!func->compiled) throw EXCEPTION("Compilation failed");
|
||||
|
||||
m_entries[index] = func->compiled;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u32 res = m_entries[index](&spu, _ls);
|
||||
if (!func->compiled)
|
||||
{
|
||||
rec->compile(*func);
|
||||
|
||||
if (!func->compiled) throw EXCEPTION("Compilation failed");
|
||||
}
|
||||
|
||||
const u32 res = func->compiled(&spu, _ls);
|
||||
|
||||
if (const auto exception = spu.pending_exception)
|
||||
{
|
||||
|
@ -23,8 +23,6 @@ public:
|
||||
// SPU Decoder instance (created per SPU thread)
|
||||
class SPURecompilerDecoder final : public CPUDecoder
|
||||
{
|
||||
std::array<spu_jit_func_t, 0x10000> m_entries = {}; // currently useless
|
||||
|
||||
public:
|
||||
const std::shared_ptr<SPUDatabase> db; // associated SPU Analyser instance
|
||||
|
||||
|
@ -1361,7 +1361,7 @@ void SPUThread::stop_and_signal(u32 code)
|
||||
throw EXCEPTION("Invalid SPU Thread Group");
|
||||
}
|
||||
|
||||
for (auto thread : group->threads)
|
||||
for (auto& thread : group->threads)
|
||||
{
|
||||
if (thread && thread.get() != this)
|
||||
{
|
||||
|
@ -524,6 +524,7 @@ public:
|
||||
|
||||
class SPUThread : public CPUThread
|
||||
{
|
||||
friend class SPURecompilerDecoder;
|
||||
friend class spu_recompiler;
|
||||
|
||||
public:
|
||||
@ -658,6 +659,7 @@ public:
|
||||
|
||||
std::function<void(SPUThread&)> custom_task;
|
||||
std::exception_ptr pending_exception;
|
||||
u32 recursion_level = 0;
|
||||
|
||||
protected:
|
||||
SPUThread(CPUThreadType type, const std::string& name, std::function<std::string()> thread_name, u32 index, u32 offset);
|
||||
|
Loading…
x
Reference in New Issue
Block a user