mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-27 03:35:24 +00:00
spu: Recompiler Interrupt optimizations - Pigeonhole optimize for branching pattern that is used to enable and disable interrupts used in code, this should lower amount of blocks that are compiled and avoid falling out of a block - Recompiled interupt check in some cases to stay in block instead of falling out to dispatcher
This commit is contained in:
parent
ad97780c4f
commit
8b476b5bfa
@ -288,6 +288,31 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
|
||||
return XmmConst(v128::fromV(data));
|
||||
}
|
||||
|
||||
void spu_recompiler::CheckInterruptStatus(spu_opcode_t op)
|
||||
{
|
||||
if (op.d)
|
||||
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
|
||||
else if (op.e) {
|
||||
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
|
||||
c->mov(*qw0, SPU_OFF_32(ch_event_stat));
|
||||
c->and_(*qw0, SPU_OFF_32(ch_event_mask));
|
||||
c->and_(*qw0, SPU_EVENT_INTR_TEST);
|
||||
c->cmp(*qw0, 0);
|
||||
|
||||
asmjit::Label noInterrupt = c->newLabel();
|
||||
c->je(noInterrupt);
|
||||
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
|
||||
c->mov(SPU_OFF_32(srr0), *addr);
|
||||
c->mov(SPU_OFF_32(pc), 0);
|
||||
|
||||
FunctionCall();
|
||||
|
||||
c->mov(*addr, SPU_OFF_32(srr0));
|
||||
c->bind(noInterrupt);
|
||||
c->unuse(*qw0);
|
||||
}
|
||||
}
|
||||
|
||||
void spu_recompiler::InterpreterCall(spu_opcode_t op)
|
||||
{
|
||||
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32
|
||||
@ -1013,7 +1038,7 @@ void spu_recompiler::BI(spu_opcode_t op)
|
||||
{
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
||||
CheckInterruptStatus(op);
|
||||
c->jmp(*jt);
|
||||
}
|
||||
|
||||
@ -1037,7 +1062,7 @@ void spu_recompiler::IRET(spu_opcode_t op)
|
||||
{
|
||||
c->mov(*addr, SPU_OFF_32(srr0));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
||||
CheckInterruptStatus(op);
|
||||
c->jmp(*jt);
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,7 @@ private:
|
||||
asmjit::X86Mem XmmConst(__m128i data);
|
||||
|
||||
public:
|
||||
void CheckInterruptStatus(spu_opcode_t op);
|
||||
void InterpreterCall(spu_opcode_t op);
|
||||
void FunctionCall();
|
||||
|
||||
|
@ -78,6 +78,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||
// Minimal position of ila $SP,* instruction
|
||||
u32 ila_sp_pos = max_limit;
|
||||
|
||||
// pigeonhole optimization, addr of last ila r2, addr, or 0 if last instruction was not
|
||||
u32 ila_r2_addr = 0;
|
||||
|
||||
// Find preliminary set of possible block entries (first pass), `start` is the current block address
|
||||
for (u32 start = entry, pos = entry; pos < limit; pos += 4)
|
||||
{
|
||||
@ -173,11 +176,19 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||
limit = pos + 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// if upcoming instruction is not BI, reset the pigeonhole optimization
|
||||
// todo: can constant propogation somewhere get rid of this check?
|
||||
if ((type != BI))
|
||||
ila_r2_addr = 0; // reset
|
||||
|
||||
if (type == BI || type == IRET) // Branch Indirect
|
||||
{
|
||||
blocks.emplace(start);
|
||||
start = pos + 4;
|
||||
|
||||
if (op.ra == 2 && ila_r2_addr > entry)
|
||||
blocks.emplace(ila_r2_addr);
|
||||
}
|
||||
else if (type == BR || type == BRA) // Branch Relative/Absolute
|
||||
{
|
||||
@ -233,6 +244,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||
blocks.emplace(target);
|
||||
}
|
||||
}
|
||||
else if (type == LNOP || type == NOP) {
|
||||
// theres a chance that theres some random lnops/nops after the end of a function
|
||||
// havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start
|
||||
// todo: remove the last added start pos as its probly unnecessary
|
||||
if (pos == start)
|
||||
start = pos + 4;
|
||||
}
|
||||
else // Other instructions (writing rt reg)
|
||||
{
|
||||
const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
||||
@ -241,9 +259,8 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||
if (rt == 0)
|
||||
{
|
||||
}
|
||||
|
||||
// Analyse stack pointer access
|
||||
if (rt == 1)
|
||||
else if (rt == 1)
|
||||
{
|
||||
if (type == ILA && pos < ila_sp_pos)
|
||||
{
|
||||
@ -251,6 +268,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
|
||||
ila_sp_pos = pos;
|
||||
}
|
||||
}
|
||||
// pigeonhole optimize
|
||||
// ila r2, addr
|
||||
// bi r2
|
||||
else if (rt == 2) {
|
||||
if (type == ILA)
|
||||
ila_r2_addr = spu_branch_target(op.i18);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user