mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-15 22:21:25 +00:00
Update SPU cache (v2)
Improve SPU analyser: filter unreachable fragments More strict NOP/LNOP analysis Fill block predecessors info ASMJIT: fix assertion and improve indirect branch
This commit is contained in:
parent
84a4671a0e
commit
3c70645f0b
@ -1156,11 +1156,11 @@ void spu_recompiler::branch_fixed(u32 target)
|
||||
c->jmp(x86::rax);
|
||||
}
|
||||
|
||||
void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt)
|
||||
void spu_recompiler::branch_indirect(spu_opcode_t op, bool local)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !jt)
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !local)
|
||||
{
|
||||
// Simply external call (return or indirect call)
|
||||
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
@ -2660,13 +2660,13 @@ void spu_recompiler::BIZ(spu_opcode_t op)
|
||||
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
|
||||
c->je(branch_label);
|
||||
|
||||
after.emplace_back([=]
|
||||
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op);
|
||||
branch_indirect(op, jt);
|
||||
});
|
||||
}
|
||||
|
||||
@ -2676,13 +2676,13 @@ void spu_recompiler::BINZ(spu_opcode_t op)
|
||||
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
|
||||
c->jne(branch_label);
|
||||
|
||||
after.emplace_back([=]
|
||||
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op);
|
||||
branch_indirect(op, jt);
|
||||
});
|
||||
}
|
||||
|
||||
@ -2692,13 +2692,13 @@ void spu_recompiler::BIHZ(spu_opcode_t op)
|
||||
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
|
||||
c->je(branch_label);
|
||||
|
||||
after.emplace_back([=]
|
||||
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op);
|
||||
branch_indirect(op, jt);
|
||||
});
|
||||
}
|
||||
|
||||
@ -2708,13 +2708,13 @@ void spu_recompiler::BIHNZ(spu_opcode_t op)
|
||||
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
|
||||
c->jne(branch_label);
|
||||
|
||||
after.emplace_back([=]
|
||||
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op);
|
||||
branch_indirect(op, jt);
|
||||
});
|
||||
}
|
||||
|
||||
@ -2750,7 +2750,7 @@ void spu_recompiler::BI(spu_opcode_t op)
|
||||
{
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op, verify(HERE, m_targets[m_pos].size()) > 2);
|
||||
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
@ -2761,7 +2761,7 @@ void spu_recompiler::BISL(spu_opcode_t op)
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
branch_indirect(op);
|
||||
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
|
@ -107,7 +107,7 @@ private:
|
||||
asmjit::X86Mem XmmConst(__m128i data);
|
||||
|
||||
void branch_fixed(u32 target);
|
||||
void branch_indirect(spu_opcode_t op, bool jt = false);
|
||||
void branch_indirect(spu_opcode_t op, bool local = false);
|
||||
void fall(spu_opcode_t op);
|
||||
void save_rcx();
|
||||
void load_rcx();
|
||||
|
@ -89,7 +89,7 @@ void spu_cache::initialize()
|
||||
}
|
||||
|
||||
// SPU cache file (version + block size type)
|
||||
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1.dat";
|
||||
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v2.dat";
|
||||
|
||||
auto cache = std::make_shared<spu_cache>(loc);
|
||||
|
||||
@ -140,6 +140,11 @@ void spu_cache::initialize()
|
||||
// Call analyser
|
||||
std::vector<u32> func2 = compiler->block(ls.data(), func[0]);
|
||||
|
||||
if (func2.size() != func.size())
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, func.size() - 1);
|
||||
}
|
||||
|
||||
compiler->compile(std::move(func));
|
||||
|
||||
// Clear fake LS
|
||||
@ -281,6 +286,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
|
||||
m_regmod.fill(0xff);
|
||||
m_targets.clear();
|
||||
m_preds.clear();
|
||||
|
||||
// Value flags (TODO)
|
||||
enum class vf : u32
|
||||
@ -306,6 +312,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
wi++;
|
||||
};
|
||||
|
||||
const u32 pos = wl[wi];
|
||||
|
||||
const auto add_block = [&](u32 target)
|
||||
{
|
||||
// Verify validity of the new target (TODO)
|
||||
@ -316,12 +324,21 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
{
|
||||
m_block_info[target / 4] = true;
|
||||
wl.push_back(target);
|
||||
return;
|
||||
}
|
||||
|
||||
// Add predecessor (check if already exists)
|
||||
for (u32 pred : m_preds[target])
|
||||
{
|
||||
if (pred == pos)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
m_preds[target].push_back(pos);
|
||||
}
|
||||
};
|
||||
|
||||
const u32 pos = wl[wi];
|
||||
const u32 data = ls[pos / 4];
|
||||
const auto op = spu_opcode_t{data};
|
||||
|
||||
@ -779,8 +796,33 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
}
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
while (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
{
|
||||
const u32 initial_size = result.size();
|
||||
|
||||
// Check unreachable blocks in safe mode (TODO)
|
||||
u32 limit = lsa + result.size() * 4 - 4;
|
||||
|
||||
for (auto& pair : m_preds)
|
||||
{
|
||||
bool reachable = false;
|
||||
|
||||
for (u32 pred : pair.second)
|
||||
{
|
||||
if (pred >= lsa && pred < limit)
|
||||
{
|
||||
reachable = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!reachable && pair.first < limit)
|
||||
{
|
||||
limit = pair.first;
|
||||
}
|
||||
}
|
||||
|
||||
result.resize((limit - lsa) / 4 + 1);
|
||||
|
||||
// Check holes in safe mode (TODO)
|
||||
u32 valid_size = 0;
|
||||
|
||||
@ -790,13 +832,13 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
{
|
||||
const u32 pos = lsa + (i - 1) * 4;
|
||||
const u32 data = ls[pos / 4];
|
||||
const auto type = s_spu_itype.decode(data);
|
||||
|
||||
// Allow only NOP or LNOP instructions in holes
|
||||
if (type == spu_itype::NOP || type == spu_itype::LNOP)
|
||||
if (data == 0x200000 || (data & 0xffffff80) == 0x40200000)
|
||||
{
|
||||
if (i + 1 < result.size())
|
||||
{
|
||||
result[i] = se_storage<u32>::swap(data);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -809,6 +851,12 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
valid_size = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Repeat if blocks were removed
|
||||
if (result.size() == initial_size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.size() == 1)
|
||||
|
@ -45,6 +45,9 @@ protected:
|
||||
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
|
||||
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
|
||||
|
||||
// List of block predecessors
|
||||
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_preds;
|
||||
|
||||
std::shared_ptr<spu_cache> m_cache;
|
||||
|
||||
public:
|
||||
|
Loading…
x
Reference in New Issue
Block a user