mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-16 16:21:02 +00:00
Update SPU cache (v2)
Improve SPU analyser: filter unreachable fragments More strict NOP/LNOP analysis Fill block predecessors info ASMJIT: fix assertion and improve indirect branch
This commit is contained in:
parent
84a4671a0e
commit
3c70645f0b
@ -1156,11 +1156,11 @@ void spu_recompiler::branch_fixed(u32 target)
|
|||||||
c->jmp(x86::rax);
|
c->jmp(x86::rax);
|
||||||
}
|
}
|
||||||
|
|
||||||
void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt)
|
void spu_recompiler::branch_indirect(spu_opcode_t op, bool local)
|
||||||
{
|
{
|
||||||
using namespace asmjit;
|
using namespace asmjit;
|
||||||
|
|
||||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !jt)
|
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !local)
|
||||||
{
|
{
|
||||||
// Simply external call (return or indirect call)
|
// Simply external call (return or indirect call)
|
||||||
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||||
@ -2660,13 +2660,13 @@ void spu_recompiler::BIZ(spu_opcode_t op)
|
|||||||
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
|
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
|
||||||
c->je(branch_label);
|
c->je(branch_label);
|
||||||
|
|
||||||
after.emplace_back([=]
|
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||||
{
|
{
|
||||||
c->align(asmjit::kAlignCode, 16);
|
c->align(asmjit::kAlignCode, 16);
|
||||||
c->bind(branch_label);
|
c->bind(branch_label);
|
||||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
branch_indirect(op);
|
branch_indirect(op, jt);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2676,13 +2676,13 @@ void spu_recompiler::BINZ(spu_opcode_t op)
|
|||||||
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
|
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
|
||||||
c->jne(branch_label);
|
c->jne(branch_label);
|
||||||
|
|
||||||
after.emplace_back([=]
|
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||||
{
|
{
|
||||||
c->align(asmjit::kAlignCode, 16);
|
c->align(asmjit::kAlignCode, 16);
|
||||||
c->bind(branch_label);
|
c->bind(branch_label);
|
||||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
branch_indirect(op);
|
branch_indirect(op, jt);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2692,13 +2692,13 @@ void spu_recompiler::BIHZ(spu_opcode_t op)
|
|||||||
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
|
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
|
||||||
c->je(branch_label);
|
c->je(branch_label);
|
||||||
|
|
||||||
after.emplace_back([=]
|
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||||
{
|
{
|
||||||
c->align(asmjit::kAlignCode, 16);
|
c->align(asmjit::kAlignCode, 16);
|
||||||
c->bind(branch_label);
|
c->bind(branch_label);
|
||||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
branch_indirect(op);
|
branch_indirect(op, jt);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2708,13 +2708,13 @@ void spu_recompiler::BIHNZ(spu_opcode_t op)
|
|||||||
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
|
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
|
||||||
c->jne(branch_label);
|
c->jne(branch_label);
|
||||||
|
|
||||||
after.emplace_back([=]
|
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
|
||||||
{
|
{
|
||||||
c->align(asmjit::kAlignCode, 16);
|
c->align(asmjit::kAlignCode, 16);
|
||||||
c->bind(branch_label);
|
c->bind(branch_label);
|
||||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
branch_indirect(op);
|
branch_indirect(op, jt);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2750,7 +2750,7 @@ void spu_recompiler::BI(spu_opcode_t op)
|
|||||||
{
|
{
|
||||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||||
c->and_(*addr, 0x3fffc);
|
c->and_(*addr, 0x3fffc);
|
||||||
branch_indirect(op, verify(HERE, m_targets[m_pos].size()) > 2);
|
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
|
||||||
m_pos = -1;
|
m_pos = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2761,7 +2761,7 @@ void spu_recompiler::BISL(spu_opcode_t op)
|
|||||||
const XmmLink& vr = XmmAlloc();
|
const XmmLink& vr = XmmAlloc();
|
||||||
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
||||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||||
branch_indirect(op);
|
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
|
||||||
m_pos = -1;
|
m_pos = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ private:
|
|||||||
asmjit::X86Mem XmmConst(__m128i data);
|
asmjit::X86Mem XmmConst(__m128i data);
|
||||||
|
|
||||||
void branch_fixed(u32 target);
|
void branch_fixed(u32 target);
|
||||||
void branch_indirect(spu_opcode_t op, bool jt = false);
|
void branch_indirect(spu_opcode_t op, bool local = false);
|
||||||
void fall(spu_opcode_t op);
|
void fall(spu_opcode_t op);
|
||||||
void save_rcx();
|
void save_rcx();
|
||||||
void load_rcx();
|
void load_rcx();
|
||||||
|
@ -89,7 +89,7 @@ void spu_cache::initialize()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SPU cache file (version + block size type)
|
// SPU cache file (version + block size type)
|
||||||
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1.dat";
|
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v2.dat";
|
||||||
|
|
||||||
auto cache = std::make_shared<spu_cache>(loc);
|
auto cache = std::make_shared<spu_cache>(loc);
|
||||||
|
|
||||||
@ -140,6 +140,11 @@ void spu_cache::initialize()
|
|||||||
// Call analyser
|
// Call analyser
|
||||||
std::vector<u32> func2 = compiler->block(ls.data(), func[0]);
|
std::vector<u32> func2 = compiler->block(ls.data(), func[0]);
|
||||||
|
|
||||||
|
if (func2.size() != func.size())
|
||||||
|
{
|
||||||
|
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, func.size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
compiler->compile(std::move(func));
|
compiler->compile(std::move(func));
|
||||||
|
|
||||||
// Clear fake LS
|
// Clear fake LS
|
||||||
@ -281,6 +286,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||||||
|
|
||||||
m_regmod.fill(0xff);
|
m_regmod.fill(0xff);
|
||||||
m_targets.clear();
|
m_targets.clear();
|
||||||
|
m_preds.clear();
|
||||||
|
|
||||||
// Value flags (TODO)
|
// Value flags (TODO)
|
||||||
enum class vf : u32
|
enum class vf : u32
|
||||||
@ -306,6 +312,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||||||
wi++;
|
wi++;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const u32 pos = wl[wi];
|
||||||
|
|
||||||
const auto add_block = [&](u32 target)
|
const auto add_block = [&](u32 target)
|
||||||
{
|
{
|
||||||
// Verify validity of the new target (TODO)
|
// Verify validity of the new target (TODO)
|
||||||
@ -316,12 +324,21 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||||||
{
|
{
|
||||||
m_block_info[target / 4] = true;
|
m_block_info[target / 4] = true;
|
||||||
wl.push_back(target);
|
wl.push_back(target);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add predecessor (check if already exists)
|
||||||
|
for (u32 pred : m_preds[target])
|
||||||
|
{
|
||||||
|
if (pred == pos)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_preds[target].push_back(pos);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const u32 pos = wl[wi];
|
|
||||||
const u32 data = ls[pos / 4];
|
const u32 data = ls[pos / 4];
|
||||||
const auto op = spu_opcode_t{data};
|
const auto op = spu_opcode_t{data};
|
||||||
|
|
||||||
@ -779,8 +796,33 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
while (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||||
{
|
{
|
||||||
|
const u32 initial_size = result.size();
|
||||||
|
|
||||||
|
// Check unreachable blocks in safe mode (TODO)
|
||||||
|
u32 limit = lsa + result.size() * 4 - 4;
|
||||||
|
|
||||||
|
for (auto& pair : m_preds)
|
||||||
|
{
|
||||||
|
bool reachable = false;
|
||||||
|
|
||||||
|
for (u32 pred : pair.second)
|
||||||
|
{
|
||||||
|
if (pred >= lsa && pred < limit)
|
||||||
|
{
|
||||||
|
reachable = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!reachable && pair.first < limit)
|
||||||
|
{
|
||||||
|
limit = pair.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.resize((limit - lsa) / 4 + 1);
|
||||||
|
|
||||||
// Check holes in safe mode (TODO)
|
// Check holes in safe mode (TODO)
|
||||||
u32 valid_size = 0;
|
u32 valid_size = 0;
|
||||||
|
|
||||||
@ -790,13 +832,13 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||||||
{
|
{
|
||||||
const u32 pos = lsa + (i - 1) * 4;
|
const u32 pos = lsa + (i - 1) * 4;
|
||||||
const u32 data = ls[pos / 4];
|
const u32 data = ls[pos / 4];
|
||||||
const auto type = s_spu_itype.decode(data);
|
|
||||||
|
|
||||||
// Allow only NOP or LNOP instructions in holes
|
// Allow only NOP or LNOP instructions in holes
|
||||||
if (type == spu_itype::NOP || type == spu_itype::LNOP)
|
if (data == 0x200000 || (data & 0xffffff80) == 0x40200000)
|
||||||
{
|
{
|
||||||
if (i + 1 < result.size())
|
if (i + 1 < result.size())
|
||||||
{
|
{
|
||||||
|
result[i] = se_storage<u32>::swap(data);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -809,6 +851,12 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
|||||||
valid_size = i;
|
valid_size = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Repeat if blocks were removed
|
||||||
|
if (result.size() == initial_size)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.size() == 1)
|
if (result.size() == 1)
|
||||||
|
@ -45,6 +45,9 @@ protected:
|
|||||||
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
|
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
|
||||||
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
|
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
|
||||||
|
|
||||||
|
// List of block predecessors
|
||||||
|
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_preds;
|
||||||
|
|
||||||
std::shared_ptr<spu_cache> m_cache;
|
std::shared_ptr<spu_cache> m_cache;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user