diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index aead7e4c84..b46a69799d 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -24,6 +24,9 @@ const spu_decoder s_spu_itype; const spu_decoder s_spu_iname; const spu_decoder s_spu_iflag; +extern const spu_decoder g_spu_interpreter_precise; +extern const spu_decoder g_spu_interpreter_fast; + extern u64 get_timebased_time(); // Move 4 args for calling native function from a GHC calling convention function @@ -74,6 +77,18 @@ DECLARE(spu_runtime::tr_branch) = [] return reinterpret_cast(trptr); }(); +DECLARE(spu_runtime::tr_interpreter) = [] +{ + u8* const trptr = jit_runtime::alloc(32, 16); + u8* raw = move_args_ghc_to_native(trptr); + *raw++ = 0xff; // jmp [rip] + *raw++ = 0x25; + std::memset(raw, 0, 4); + const u64 target = reinterpret_cast(&spu_recompiler_base::old_interpreter); + std::memcpy(raw + 4, &target, 8); + return reinterpret_cast(trptr); +}(); + DECLARE(spu_runtime::g_dispatcher) = [] { const auto ptr = reinterpret_cast(jit_runtime::alloc(sizeof(spu_function_t), 8, false)); @@ -281,7 +296,8 @@ void spu_cache::add(const std::vector& func) void spu_cache::initialize() { - spu_runtime::g_interpreter = nullptr; + spu_runtime::g_interpreter = spu_runtime::g_gateway; + *spu_runtime::g_dispatcher = spu_runtime::tr_interpreter; const std::string ppu_cache = Emu.PPUCache(); @@ -1145,6 +1161,37 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip) atomic_storage::release(*reinterpret_cast(rip), result); } +void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* rip) try +{ + // Select opcode table + const auto& table = *( + g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() : + g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() : + (fmt::throw_exception("Invalid SPU decoder"), nullptr)); + + // LS pointer + const auto base = static_cast(ls); + + while (true) + { + if (UNLIKELY(spu.state)) + { + if (spu.check_state()) + break; + } + + const u32 op = *reinterpret_cast*>(base + spu.pc); + if (table[spu_decode(op)](spu, {op})) + spu.pc += 4; + } +} +catch (const std::exception& e) +{ + Emu.Pause(); + LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what()); + LOG_NOTICE(GENERAL, "\n%s", spu.dump()); +} + const std::vector& spu_recompiler_base::analyse(const be_t* ls, u32 entry_point) { // Result: addr + raw instruction data @@ -4713,8 +4760,6 @@ public: static void interp_check(spu_thread* _spu, bool after) { - static const spu_decoder s_dec; - static thread_local std::array s_gpr; if (!after) @@ -4724,7 +4769,7 @@ public: // Execute interpreter instruction const u32 op = *reinterpret_cast*>(_spu->_ptr(0) + _spu->pc); - if (!s_dec.decode(op)(*_spu, {op})) + if (!g_spu_interpreter_fast.decode(op)(*_spu, {op})) LOG_FATAL(SPU, "Bad instruction" HERE); // Swap state diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index cd9648ef5e..5bccab7c7b 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -70,6 +70,9 @@ public: // Trampoline to spu_recompiler_base::branch static const spu_function_t tr_branch; + // Trampoline to legacy interpreter + static const spu_function_t tr_interpreter; + public: spu_runtime(); @@ -356,6 +359,9 @@ public: // Target for the unresolved patch point (second arg is unused) static void branch(spu_thread&, void*, u8* rip); + // Legacy interpreter loop + static void old_interpreter(spu_thread&, void* ls, u8*); + // Get the function data at specified address const std::vector& analyse(const be_t* ls, u32 lsa); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 48d68d2f17..88af92d8ca 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -71,10 +71,6 @@ static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const declt extern u64 get_timebased_time(); extern u64 get_system_time(); -extern const spu_decoder g_spu_interpreter_precise; - -extern const spu_decoder g_spu_interpreter_fast; - extern thread_local u64 g_tls_fault_spu; template <> @@ -1156,12 +1152,11 @@ void spu_thread::cpu_task() // Print some stats LOG_NOTICE(SPU, "Stats: Block Weight: %u (Retreats: %u);", block_counter, block_failure); - cpu_stop(); - return; } - - if (spu_runtime::g_interpreter) + else { + ASSERT(spu_runtime::g_interpreter); + while (true) { if (UNLIKELY(state)) @@ -1172,31 +1167,6 @@ void spu_thread::cpu_task() spu_runtime::g_interpreter(*this, vm::_ptr(offset), nullptr); } - - cpu_stop(); - return; - } - - // Select opcode table - const auto& table = *( - g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() : - g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() : - (fmt::throw_exception("Invalid SPU decoder"), nullptr)); - - // LS pointer - const auto base = vm::_ptr(offset); - - while (true) - { - if (UNLIKELY(state)) - { - if (check_state()) - break; - } - - const u32 op = *reinterpret_cast*>(base + pc); - if (table[spu_decode(op)](*this, {op})) - pc += 4; } cpu_stop();