SPU ASMJIT: simplify patchpoints

Remove SPU thread reference from spu_recompiler_base
Disable support for far jumps in pathpoints (they were rare and unsafe)
This commit is contained in:
Nekotekina 2018-05-03 15:55:45 +03:00
parent 1ca51a023c
commit 8f91917e8c
5 changed files with 43 additions and 91 deletions

View File

@ -25,9 +25,9 @@ const spu_decoder<spu_recompiler> s_spu_decoder;
extern u64 get_timebased_time();
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler(SPUThread& spu)
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler()
{
return std::make_unique<spu_recompiler>(spu);
return std::make_unique<spu_recompiler>();
}
spu_runtime::spu_runtime()
@ -44,8 +44,7 @@ spu_runtime::spu_runtime()
m_map[std::vector<u32>()] = &spu_recompiler_base::dispatch;
}
spu_recompiler::spu_recompiler(SPUThread& spu)
: spu_recompiler_base(spu)
spu_recompiler::spu_recompiler()
{
if (!g_cfg.core.spu_shared_runtime)
{
@ -1115,15 +1114,18 @@ void spu_recompiler::branch_fixed(u32 target)
c->cmp(SPU_OFF_32(state), 0);
c->jz(local->second);
c->mov(SPU_OFF_32(pc), target);
c->ret();
c->jmp(label_stop);
return;
}
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->mov(SPU_OFF_32(pc), target);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_stop);
if (false)
{
// Don't generate patch points in this mode
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->mov(SPU_OFF_32(pc), target);
// Don't generate patch points (TODO)
c->xor_(qw0->r32(), qw0->r32());
c->jmp(x86::rax);
return;
@ -1132,40 +1134,17 @@ void spu_recompiler::branch_fixed(u32 target)
// Set patch address as a third argument and fallback to it
Label patch_point = c->newLabel();
c->lea(*qw0, x86::qword_ptr(patch_point));
c->mov(SPU_OFF_32(pc), target);
// Need to emit exactly one executable instruction within 8 bytes
c->align(kAlignCode, 8);
c->bind(patch_point);
//c->dq(0x841f0f);
c->jmp(imm_ptr(&spu_recompiler_base::branch));
const auto result = m_spurt->m_map.emplace(block(m_spu, target), nullptr);
if (result.second || !result.first->second)
{
if (result.first->first.size())
{
// Target block hasn't been compiled yet, record overwriting position
c->jmp(imm_ptr(&spu_recompiler_base::branch));
}
else
{
// SPURS Workload entry point or similar thing (emit 8-byte NOP)
c->dq(0x841f0f);
}
}
else
{
c->jmp(imm_ptr(result.first->second));
}
// Branch via dispatcher (occupies 16 bytes including padding)
// Fallback to the branch via dispatcher
c->align(kAlignCode, 8);
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->xor_(qw0->r32(), qw0->r32());
c->jmp(x86::rax);
c->align(kAlignCode, 8);
c->dq(reinterpret_cast<u64>(&*result.first));
c->dq(reinterpret_cast<u64>(result.first->second));
}
void spu_recompiler::branch_indirect(spu_opcode_t op)

View File

@ -35,7 +35,7 @@ class spu_recompiler : public spu_recompiler_base
std::shared_ptr<spu_runtime> m_spurt;
public:
spu_recompiler(class SPUThread& spu);
spu_recompiler();
virtual spu_function_t get(u32 lsa) override;

View File

@ -17,11 +17,8 @@ extern u64 get_system_time();
const spu_decoder<spu_itype> s_spu_itype;
spu_recompiler_base::spu_recompiler_base(SPUThread& spu)
: m_spu(spu)
spu_recompiler_base::spu_recompiler_base()
{
// Initialize lookup table
spu.jit_dispatcher.fill(&dispatch);
}
spu_recompiler_base::~spu_recompiler_base()
@ -30,7 +27,7 @@ spu_recompiler_base::~spu_recompiler_base()
void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
{
// If check failed after direct branch, patch it with single NOP
// If code verification failed from a patched patchpoint, clear it with a single NOP
if (rip)
{
#ifdef _MSC_VER
@ -63,16 +60,9 @@ void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip)
{
const auto pair = *reinterpret_cast<std::pair<const std::vector<u32>, spu_function_t>**>(rip + 24);
spu.pc = pair->first[0];
const auto func = pair->second ? pair->second : spu.jit->compile(pair->first);
verify(HERE), func, pair->second == func;
// Overwrite function address
reinterpret_cast<atomic_t<spu_function_t>*>(rip + 32)->store(func);
// Compile
const auto func = verify(HERE, spu.jit->compile(block(spu, spu.pc, &spu.jit->m_block_info)));
spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc);
// Overwrite jump to this function with jump to the compiled function
const s64 rel = reinterpret_cast<u64>(func) - reinterpret_cast<u64>(rip) - 5;
@ -98,14 +88,11 @@ void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip)
}
else
{
bytes[0] = 0xff; // jmp [rip+26]
bytes[1] = 0x25;
bytes[2] = 0x1a;
bytes[3] = 0x00;
bytes[4] = 0x00;
bytes[5] = 0x00;
bytes[6] = 0x90;
bytes[7] = 0x90;
// Far jumps: extremely rare and disabled due to implementation complexity
bytes[0] = 0x0f; // nop (8-byte form)
bytes[1] = 0x1f;
bytes[2] = 0x84;
std::memset(bytes + 3, 0x00, 5);
}
#ifdef _MSC_VER
@ -200,7 +187,7 @@ std::vector<u32> spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset
//case spu_itype::DFCMGT:
case spu_itype::DFTSV:
{
// Stop on invalid instructions (TODO)
// Stop before invalid instructions (TODO)
blocks[pos / 4] = true;
next_block();
continue;
@ -211,7 +198,7 @@ std::vector<u32> spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset
case spu_itype::STOP:
case spu_itype::STOPD:
{
if (data == 0)
if (data == 0 || data == 0x80)
{
// Stop before null data
blocks[pos / 4] = true;
@ -751,8 +738,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
public:
spu_llvm_recompiler(class SPUThread& spu)
: spu_recompiler_base(spu)
spu_llvm_recompiler()
: spu_recompiler_base()
, cpu_translator(nullptr, false)
{
if (g_cfg.core.spu_shared_runtime)
@ -1035,21 +1022,6 @@ public:
}
}
if (g_cfg.core.spu_debug)
{
log += '\n';
for (u32 i = 0; i < 128; i++)
{
if (m_gpr[i].first)
{
fmt::append(log, "$% -3u = %s\n", i, m_spu.gpr[i]);
}
}
log += '\n';
}
// Make fallthrough if necessary
if (!m_ir->GetInsertBlock()->getTerminator())
{
@ -2827,9 +2799,9 @@ public:
static const spu_decoder<spu_llvm_recompiler> g_decoder;
};
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_llvm_recompiler(SPUThread& spu)
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_llvm_recompiler()
{
return std::make_unique<spu_llvm_recompiler>(spu);
return std::make_unique<spu_llvm_recompiler>();
}
DECLARE(spu_llvm_recompiler::g_decoder);

View File

@ -7,15 +7,13 @@
class spu_recompiler_base
{
protected:
SPUThread& m_spu;
u32 m_pos;
u32 m_size;
std::bitset<0x10000> m_block_info;
public:
spu_recompiler_base(SPUThread& spu);
spu_recompiler_base();
virtual ~spu_recompiler_base();
@ -25,18 +23,18 @@ public:
// Compile function
virtual spu_function_t compile(const std::vector<u32>& func) = 0;
// Default dispatch function fallback (second pointer is unused)
static void dispatch(SPUThread&, void*, u8*);
// Default dispatch function fallback (second arg is unused)
static void dispatch(SPUThread&, void*, u8* rip);
// Direct branch fallback for non-compiled destination
static void branch(SPUThread&, void*, u8*);
// Target for the unresolved patch point (second arg is unused)
static void branch(SPUThread&, void*, u8* rip);
// Get the block at specified address
static std::vector<u32> block(SPUThread&, u32 lsa, std::bitset<0x10000>* = nullptr);
// Create recompiler instance (ASMJIT)
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler(SPUThread& spu);
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler();
// Create recompiler instance (LLVM)
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler(SPUThread& spu);
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler();
};

View File

@ -531,13 +531,16 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
{
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
{
jit = spu_recompiler_base::make_asmjit_recompiler(*this);
jit = spu_recompiler_base::make_asmjit_recompiler();
}
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
{
jit = spu_recompiler_base::make_llvm_recompiler(*this);
jit = spu_recompiler_base::make_llvm_recompiler();
}
// Initialize lookup table
jit_dispatcher.fill(&spu_recompiler_base::dispatch);
}
void SPUThread::push_snr(u32 number, u32 value)