SPU LLVM: Initial precompilation of tail-calls

This commit is contained in:
Eladash 2023-09-01 19:38:06 +03:00 committed by Elad Ashkenazi
parent a9810ccb72
commit a626ccfcad
4 changed files with 124 additions and 15 deletions

View File

@ -14,12 +14,13 @@ struct spu_itype
static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values
static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR
enum type : unsigned char
{
UNK = 0,
HEQ,
HEQ, // zregmod_tag first
HEQI,
HGT,
HGTI,
@ -36,11 +37,21 @@ struct spu_itype
NOP,
SYNC,
DSYNC,
MFSPR,
MTSPR,
WRCH,
STQD, // memory_tag first
STQX,
STQA,
STQR, // zregmod_tag last
LQD,
LQX,
LQA,
LQR, // memory_tag last
MFSPR,
RDCH,
RCHCNT,
WRCH,
BR, // branch_tag first
BRA,
@ -59,15 +70,6 @@ struct spu_itype
BIHZ,
BIHNZ, // branch_tag last
LQD, // memory_tag first
LQX,
LQA,
LQR,
STQD,
STQX,
STQA,
STQR, // memory_tag last
ILH, // constant_tag_first
ILHU,
IL,
@ -267,7 +269,7 @@ struct spu_itype
// Test for memory instruction
friend constexpr bool operator &(type value, memory_tag)
{
return value >= LQD && value <= STQR;
return value >= STQD && value <= LQR;
}
// Test for compare instruction
@ -293,6 +295,12 @@ struct spu_itype
{
return value >= ILH && value <= FSMBI;
}
// Test for non register-modifying instruction
friend constexpr bool operator &(type value, zregmod_tag)
{
return value >= HEQ && value <= STQR;
}
};
struct spu_iflag

View File

@ -851,6 +851,13 @@ public:
}
void BR(spu_opcode_t op)
{
if (op.rt && op.rt != 127u)
{
// Valid but makes no sense
DisAsm("br??", DisAsmBranchTarget(op.i16));
return;
}
DisAsm("br", DisAsmBranchTarget(op.i16));
}
void FSMBI(spu_opcode_t op)

View File

@ -2091,21 +2091,25 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
{
std::vector<u32> calls;
std::vector<u32> branches;
calls.reserve(100);
// Discover functions
// Use the most simple method: search for instructions that calls them
// And then filter invalid cases (does not detect tail calls)
// And then filter invalid cases
// TODO: Does not detect jumptables or fixed-addr indirect calls
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
{
// Search for BRSL LR and BRASL LR
// Search for BRSL LR and BRASL LR or BR
// TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
const v128 result = eq_brsl | eq_brasl;
if (!gv_testz(result))
@ -2118,6 +2122,17 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
}
}
}
if (!gv_testz(eq_br))
{
for (u32 j = 0; j < 4; j++)
{
if (eq_br.u32r[j])
{
branches.push_back(i + j * 4);
}
}
}
}
calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
@ -2126,6 +2141,12 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
}), calls.end());
branches.erase(std::remove_if(branches.begin(), branches.end(), [&](u32 caller)
{
// Check the validity of the callee code
return !is_exec_code(caller, ls, base_addr);
}), branches.end());
std::vector<u32> addrs;
for (u32 addr : calls)
@ -2142,6 +2163,69 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
addrs.push_back(func);
}
for (u32 addr : branches)
{
const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
const u32 func = op_branch_targets(addr, op)[0];
if (func == umax || addr + 4 == func || func == addr || !addr)
{
continue;
}
// Search for AI R1, +x or OR R3/4, Rx, 0
// Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call
// R3 and R4 are common function arguments because they are the first two
for (u32 back = addr - 4, it = 5; it && back >= base_addr; back -= 4)
{
const spu_opcode_t test_op{read_from_ptr<be_t<u32>>(ls, back - base_addr)};
const auto type = g_spu_itype.decode(test_op.opcode);
if (type & spu_itype::branch)
{
break;
}
bool is_tail = false;
if (type == spu_itype::AI && test_op.rt == 1u && test_op.ra == 1u)
{
if (test_op.si10 <= 0)
{
break;
}
is_tail = true;
}
else if (!(type & spu_itype::zregmod))
{
const u32 op_rt = type & spu_itype::_quadrop ? +test_op.rt4 : +test_op.rt;
if (op_rt >= 80u && (type != spu_itype::LQD || test_op.ra != 1u))
{
// Modifying non-volatile registers, not a call (and not context restoration)
break;
}
//is_tail = op_rt == 3u || op_rt == 4u;
}
if (!is_tail)
{
continue;
}
if (std::count(addrs.begin(), addrs.end(), func))
{
break;
}
addrs.push_back(func);
break;
}
}
std::sort(addrs.begin(), addrs.end());
return addrs;

View File

@ -4042,8 +4042,18 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
return false;
}
if (type == spu_itype::STOP && op.rb)
{
return false;
}
if (type & spu_itype::branch)
{
if (type == spu_itype::BR && op.rt && op.rt != 127u)
{
return false;
}
const auto results = op_branch_targets(addr, spu_opcode_t{op});
if (results[0] == umax)