Update SPU cache (v2)

Improve SPU analyser: filter unreachable fragments
More strict NOP/LNOP analysis
Fill block predecessors info
ASMJIT: fix assertion and improve indirect branch
This commit is contained in:
Nekotekina 2018-05-13 20:34:11 +03:00
parent 84a4671a0e
commit 3c70645f0b
4 changed files with 70 additions and 19 deletions

View File

@ -1156,11 +1156,11 @@ void spu_recompiler::branch_fixed(u32 target)
c->jmp(x86::rax);
}
void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt)
void spu_recompiler::branch_indirect(spu_opcode_t op, bool local)
{
using namespace asmjit;
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !jt)
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !local)
{
// Simply external call (return or indirect call)
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
@ -2660,13 +2660,13 @@ void spu_recompiler::BIZ(spu_opcode_t op)
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
c->je(branch_label);
after.emplace_back([=]
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
branch_indirect(op);
branch_indirect(op, jt);
});
}
@ -2676,13 +2676,13 @@ void spu_recompiler::BINZ(spu_opcode_t op)
c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0);
c->jne(branch_label);
after.emplace_back([=]
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
branch_indirect(op);
branch_indirect(op, jt);
});
}
@ -2692,13 +2692,13 @@ void spu_recompiler::BIHZ(spu_opcode_t op)
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
c->je(branch_label);
after.emplace_back([=]
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
branch_indirect(op);
branch_indirect(op, jt);
});
}
@ -2708,13 +2708,13 @@ void spu_recompiler::BIHNZ(spu_opcode_t op)
c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0);
c->jne(branch_label);
after.emplace_back([=]
after.emplace_back([=, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
branch_indirect(op);
branch_indirect(op, jt);
});
}
@ -2750,7 +2750,7 @@ void spu_recompiler::BI(spu_opcode_t op)
{
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
branch_indirect(op, verify(HERE, m_targets[m_pos].size()) > 2);
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
m_pos = -1;
}
@ -2761,7 +2761,7 @@ void spu_recompiler::BISL(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
branch_indirect(op);
branch_indirect(op, m_targets.find(m_pos) != m_targets.end());
m_pos = -1;
}

View File

@ -107,7 +107,7 @@ private:
asmjit::X86Mem XmmConst(__m128i data);
void branch_fixed(u32 target);
void branch_indirect(spu_opcode_t op, bool jt = false);
void branch_indirect(spu_opcode_t op, bool local = false);
void fall(spu_opcode_t op);
void save_rcx();
void load_rcx();

View File

@ -89,7 +89,7 @@ void spu_cache::initialize()
}
// SPU cache file (version + block size type)
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1.dat";
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v2.dat";
auto cache = std::make_shared<spu_cache>(loc);
@ -140,6 +140,11 @@ void spu_cache::initialize()
// Call analyser
std::vector<u32> func2 = compiler->block(ls.data(), func[0]);
if (func2.size() != func.size())
{
LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, func.size() - 1);
}
compiler->compile(std::move(func));
// Clear fake LS
@ -281,6 +286,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
m_regmod.fill(0xff);
m_targets.clear();
m_preds.clear();
// Value flags (TODO)
enum class vf : u32
@ -306,6 +312,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
wi++;
};
const u32 pos = wl[wi];
const auto add_block = [&](u32 target)
{
// Verify validity of the new target (TODO)
@ -316,12 +324,21 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
{
m_block_info[target / 4] = true;
wl.push_back(target);
return;
}
// Add predecessor (check if already exists)
for (u32 pred : m_preds[target])
{
if (pred == pos)
{
return;
}
}
m_preds[target].push_back(pos);
}
};
const u32 pos = wl[wi];
const u32 data = ls[pos / 4];
const auto op = spu_opcode_t{data};
@ -779,8 +796,33 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
}
}
if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
while (g_cfg.core.spu_block_size == spu_block_size_type::safe)
{
const u32 initial_size = result.size();
// Check unreachable blocks in safe mode (TODO)
u32 limit = lsa + result.size() * 4 - 4;
for (auto& pair : m_preds)
{
bool reachable = false;
for (u32 pred : pair.second)
{
if (pred >= lsa && pred < limit)
{
reachable = true;
}
}
if (!reachable && pair.first < limit)
{
limit = pair.first;
}
}
result.resize((limit - lsa) / 4 + 1);
// Check holes in safe mode (TODO)
u32 valid_size = 0;
@ -790,13 +832,13 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
{
const u32 pos = lsa + (i - 1) * 4;
const u32 data = ls[pos / 4];
const auto type = s_spu_itype.decode(data);
// Allow only NOP or LNOP instructions in holes
if (type == spu_itype::NOP || type == spu_itype::LNOP)
if (data == 0x200000 || (data & 0xffffff80) == 0x40200000)
{
if (i + 1 < result.size())
{
result[i] = se_storage<u32>::swap(data);
continue;
}
}
@ -809,6 +851,12 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
valid_size = i;
}
}
// Repeat if blocks were removed
if (result.size() == initial_size)
{
break;
}
}
if (result.size() == 1)

View File

@ -45,6 +45,9 @@ protected:
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
// List of block predecessors
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_preds;
std::shared_ptr<spu_cache> m_cache;
public: