spu: Recompiler Interrupt optimizations - Pigeonhole optimize for branching pattern that is used to enable and disable interrupts used in code, this should lower amount of blocks that are compiled and avoid falling out of a block - Recompiled interupt check in some cases to stay in block instead of falling out to dispatcher

This commit is contained in:
Jake 2017-11-30 20:50:01 -06:00 committed by kd-11
parent ad97780c4f
commit 8b476b5bfa
3 changed files with 54 additions and 4 deletions

View File

@ -288,6 +288,31 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
return XmmConst(v128::fromV(data));
}
void spu_recompiler::CheckInterruptStatus(spu_opcode_t op)
{
if (op.d)
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
else if (op.e) {
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(*qw0, SPU_OFF_32(ch_event_stat));
c->and_(*qw0, SPU_OFF_32(ch_event_mask));
c->and_(*qw0, SPU_EVENT_INTR_TEST);
c->cmp(*qw0, 0);
asmjit::Label noInterrupt = c->newLabel();
c->je(noInterrupt);
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_32(srr0), *addr);
c->mov(SPU_OFF_32(pc), 0);
FunctionCall();
c->mov(*addr, SPU_OFF_32(srr0));
c->bind(noInterrupt);
c->unuse(*qw0);
}
}
void spu_recompiler::InterpreterCall(spu_opcode_t op)
{
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32
@ -1013,7 +1038,7 @@ void spu_recompiler::BI(spu_opcode_t op)
{
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
CheckInterruptStatus(op);
c->jmp(*jt);
}
@ -1037,7 +1062,7 @@ void spu_recompiler::IRET(spu_opcode_t op)
{
c->mov(*addr, SPU_OFF_32(srr0));
c->and_(*addr, 0x3fffc);
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
CheckInterruptStatus(op);
c->jmp(*jt);
}

View File

@ -78,6 +78,7 @@ private:
asmjit::X86Mem XmmConst(__m128i data);
public:
void CheckInterruptStatus(spu_opcode_t op);
void InterpreterCall(spu_opcode_t op);
void FunctionCall();

View File

@ -78,6 +78,9 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
// Minimal position of ila $SP,* instruction
u32 ila_sp_pos = max_limit;
// pigeonhole optimization, addr of last ila r2, addr, or 0 if last instruction was not
u32 ila_r2_addr = 0;
// Find preliminary set of possible block entries (first pass), `start` is the current block address
for (u32 start = entry, pos = entry; pos < limit; pos += 4)
{
@ -173,11 +176,19 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
limit = pos + 4;
break;
}
// if upcoming instruction is not BI, reset the pigeonhole optimization
// todo: can constant propogation somewhere get rid of this check?
if ((type != BI))
ila_r2_addr = 0; // reset
if (type == BI || type == IRET) // Branch Indirect
{
blocks.emplace(start);
start = pos + 4;
if (op.ra == 2 && ila_r2_addr > entry)
blocks.emplace(ila_r2_addr);
}
else if (type == BR || type == BRA) // Branch Relative/Absolute
{
@ -233,6 +244,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
blocks.emplace(target);
}
}
else if (type == LNOP || type == NOP) {
// theres a chance that theres some random lnops/nops after the end of a function
// havent found a definite pattern, but, is an easy optimization to check for, just push start down if lnop is tagged as a start
// todo: remove the last added start pos as its probly unnecessary
if (pos == start)
start = pos + 4;
}
else // Other instructions (writing rt reg)
{
const u32 rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
@ -241,9 +259,8 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
if (rt == 0)
{
}
// Analyse stack pointer access
if (rt == 1)
else if (rt == 1)
{
if (type == ILA && pos < ila_sp_pos)
{
@ -251,6 +268,13 @@ spu_function_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, u32 max_lim
ila_sp_pos = pos;
}
}
// pigeonhole optimize
// ila r2, addr
// bi r2
else if (rt == 2) {
if (type == ILA)
ila_r2_addr = spu_branch_target(op.i18);
}
}
}