diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index c541bc424b..891862999b 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -1156,11 +1156,11 @@ void spu_recompiler::branch_fixed(u32 target) c->jmp(x86::rax); } -void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt) +void spu_recompiler::branch_indirect(spu_opcode_t op, bool local) { using namespace asmjit; - if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !jt) + if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !local) { // Simply external call (return or indirect call) c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); @@ -2660,13 +2660,13 @@ void spu_recompiler::BIZ(spu_opcode_t op) c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0); c->je(branch_label); - after.emplace_back([=] + after.emplace_back([=, jt = m_targets[m_pos].size() > 1] { c->align(asmjit::kAlignCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - branch_indirect(op); + branch_indirect(op, jt); }); } @@ -2676,13 +2676,13 @@ void spu_recompiler::BINZ(spu_opcode_t op) c->cmp(SPU_OFF_32(gpr, op.rt, &v128::_u32, 3), 0); c->jne(branch_label); - after.emplace_back([=] + after.emplace_back([=, jt = m_targets[m_pos].size() > 1] { c->align(asmjit::kAlignCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - branch_indirect(op); + branch_indirect(op, jt); }); } @@ -2692,13 +2692,13 @@ void spu_recompiler::BIHZ(spu_opcode_t op) c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0); c->je(branch_label); - after.emplace_back([=] + after.emplace_back([=, jt = m_targets[m_pos].size() > 1] { c->align(asmjit::kAlignCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - branch_indirect(op); + branch_indirect(op, jt); }); } @@ -2708,13 +2708,13 @@ void spu_recompiler::BIHNZ(spu_opcode_t op) c->cmp(SPU_OFF_16(gpr, op.rt, &v128::_u16, 6), 0); c->jne(branch_label); - after.emplace_back([=] + after.emplace_back([=, jt = m_targets[m_pos].size() > 1] { c->align(asmjit::kAlignCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - branch_indirect(op); + branch_indirect(op, jt); }); } @@ -2750,7 +2750,7 @@ void spu_recompiler::BI(spu_opcode_t op) { c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); - branch_indirect(op, verify(HERE, m_targets[m_pos].size()) > 2); + branch_indirect(op, m_targets.find(m_pos) != m_targets.end()); m_pos = -1; } @@ -2761,7 +2761,7 @@ void spu_recompiler::BISL(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - branch_indirect(op); + branch_indirect(op, m_targets.find(m_pos) != m_targets.end()); m_pos = -1; } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index 8cb66c287c..edac1675d0 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -107,7 +107,7 @@ private: asmjit::X86Mem XmmConst(__m128i data); void branch_fixed(u32 target); - void branch_indirect(spu_opcode_t op, bool jt = false); + void branch_indirect(spu_opcode_t op, bool local = false); void fall(spu_opcode_t op); void save_rcx(); void load_rcx(); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 0e95fc8c8f..b05a1142cf 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -89,7 +89,7 @@ void spu_cache::initialize() } // SPU cache file (version + block size type) - const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1.dat"; + const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v2.dat"; auto cache = std::make_shared(loc); @@ -140,6 +140,11 @@ void spu_cache::initialize() // Call analyser std::vector func2 = compiler->block(ls.data(), func[0]); + if (func2.size() != func.size()) + { + LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, func.size() - 1); + } + compiler->compile(std::move(func)); // Clear fake LS @@ -281,6 +286,7 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) m_regmod.fill(0xff); m_targets.clear(); + m_preds.clear(); // Value flags (TODO) enum class vf : u32 @@ -306,6 +312,8 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) wi++; }; + const u32 pos = wl[wi]; + const auto add_block = [&](u32 target) { // Verify validity of the new target (TODO) @@ -316,12 +324,21 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) { m_block_info[target / 4] = true; wl.push_back(target); - return; } + + // Add predecessor (check if already exists) + for (u32 pred : m_preds[target]) + { + if (pred == pos) + { + return; + } + } + + m_preds[target].push_back(pos); } }; - const u32 pos = wl[wi]; const u32 data = ls[pos / 4]; const auto op = spu_opcode_t{data}; @@ -779,8 +796,33 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) } } - if (g_cfg.core.spu_block_size == spu_block_size_type::safe) + while (g_cfg.core.spu_block_size == spu_block_size_type::safe) { + const u32 initial_size = result.size(); + + // Check unreachable blocks in safe mode (TODO) + u32 limit = lsa + result.size() * 4 - 4; + + for (auto& pair : m_preds) + { + bool reachable = false; + + for (u32 pred : pair.second) + { + if (pred >= lsa && pred < limit) + { + reachable = true; + } + } + + if (!reachable && pair.first < limit) + { + limit = pair.first; + } + } + + result.resize((limit - lsa) / 4 + 1); + // Check holes in safe mode (TODO) u32 valid_size = 0; @@ -790,13 +832,13 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) { const u32 pos = lsa + (i - 1) * 4; const u32 data = ls[pos / 4]; - const auto type = s_spu_itype.decode(data); // Allow only NOP or LNOP instructions in holes - if (type == spu_itype::NOP || type == spu_itype::LNOP) + if (data == 0x200000 || (data & 0xffffff80) == 0x40200000) { if (i + 1 < result.size()) { + result[i] = se_storage::swap(data); continue; } } @@ -809,6 +851,12 @@ std::vector spu_recompiler_base::block(const be_t* ls, u32 lsa) valid_size = i; } } + + // Repeat if blocks were removed + if (result.size() == initial_size) + { + break; + } } if (result.size() == 1) diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index cc8001485b..0c880a4c97 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -45,6 +45,9 @@ protected: // List of possible targets for the instruction ({} = next instruction, {-1} = no targets) std::unordered_map, value_hash> m_targets; + // List of block predecessors + std::unordered_map, value_hash> m_preds; + std::shared_ptr m_cache; public: