From bdf654557164479f7d2e8f6758bac8566891ad54 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 27 May 2018 23:37:01 +0300 Subject: [PATCH] SPU: rewrite spu_interpreter::SHUFB Use ASMJIT to generate SSSE3+ code at runtime Remove static SSSE3 code from spu_interpreter --- rpcs3/Emu/Cell/SPUInterpreter.cpp | 269 +++++++++++++++++------------- rpcs3/Emu/Cell/SPUInterpreter.h | 32 ++-- rpcs3/Emu/Cell/SPUThread.cpp | 59 +------ 3 files changed, 167 insertions(+), 193 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index ac56da16cc..e8fb3eccdb 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -1,6 +1,8 @@ #include "stdafx.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" +#include "Utilities/JIT.h" +#include "Utilities/sysinfo.h" #include "SPUThread.h" #include "SPUInterpreter.h" @@ -8,10 +10,6 @@ #include #include -#if !defined(_MSC_VER) && !defined(__SSSE3__) -#define _mm_shuffle_epi8 -#endif - // Compare 16 packed unsigned bytes (greater than) inline __m128i sse_cmpgt_epu8(__m128i A, __m128i B) { @@ -32,6 +30,59 @@ inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B) return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } +namespace asmjit +{ + template + static void build_spu_gpr_load(X86Assembler& c, X86Xmm x, const bf_t& reg, bool store = false) + { + static_assert(N == 7, "Invalid bitfield"); + +#ifdef _WIN32 + const auto& spu = x86::rcx; + const auto& op = x86::edx; +#else + const auto& spu = x86::rdi; + const auto& op = x86::esi; +#endif + + c.mov(x86::eax, op); + + if (I >= 4) + { + c.shr(x86::eax, I - 4); + c.and_(x86::eax, 0x7f << 4); + } + else + { + c.and_(x86::eax, 0x7f); + c.shl(x86::eax, I + 4); + } + + const auto ptr = x86::oword_ptr(spu, x86::rax, 0, ::offset32(&SPUThread::gpr)); + + if (utils::has_avx()) + { + if (store) + c.vmovdqa(ptr, x); + else + c.vmovdqa(x, ptr); + } + else + { + if (store) + c.movdqa(ptr, x); + else + c.movdqa(x, ptr); + } + } + + template + static void build_spu_gpr_store(X86Assembler& c, X86Xmm x, const bf_t& reg, bool store = true) + { + build_spu_gpr_load(c, x, reg, store); + } +} + bool spu_interpreter::UNK(SPUThread& spu, spu_opcode_t op) { fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op.opcode); @@ -497,7 +548,7 @@ bool spu_interpreter::LQX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::ROTQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBYBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; @@ -505,13 +556,7 @@ bool spu_interpreter_precise::ROTQBYBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::ROTQBYBI(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[spu.gpr[op.rb]._u32[3] >> 3 & 0xf].vi); - return true; -} - -bool spu_interpreter_precise::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; @@ -519,13 +564,7 @@ bool spu_interpreter_precise::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[spu.gpr[op.rb]._s32[3] >> 3 & 0x1f].vi); - return true; -} - -bool spu_interpreter_precise::SHLQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBYBI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; @@ -533,12 +572,6 @@ bool spu_interpreter_precise::SHLQBYBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::SHLQBYBI(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[spu.gpr[op.rb]._u32[3] >> 3 & 0x1f].vi); - return true; -} - bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) @@ -615,7 +648,7 @@ bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::ROTQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBY(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; @@ -623,13 +656,7 @@ bool spu_interpreter_precise::ROTQBY(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::ROTQBY(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[spu.gpr[op.rb]._u32[3] & 0xf].vi); - return true; -} - -bool spu_interpreter_precise::ROTQMBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBY(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; @@ -637,13 +664,7 @@ bool spu_interpreter_precise::ROTQMBY(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::ROTQMBY(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[spu.gpr[op.rb]._s32[3] & 0x1f].vi); - return true; -} - -bool spu_interpreter_precise::SHLQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBY(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; @@ -651,12 +672,6 @@ bool spu_interpreter_precise::SHLQBY(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::SHLQBY(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[spu.gpr[op.rb]._u32[3] & 0x1f].vi); - return true; -} - bool spu_interpreter::ORX(SPUThread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(spu.gpr[op.ra]._u32[0] | spu.gpr[op.ra]._u32[1] | spu.gpr[op.ra]._u32[2] | spu.gpr[op.ra]._u32[3]); @@ -739,7 +754,7 @@ bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::ROTQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBYI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; @@ -747,13 +762,7 @@ bool spu_interpreter_precise::ROTQBYI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::ROTQBYI(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[op.i7 & 0xf].vi); - return true; -} - -bool spu_interpreter_precise::ROTQMBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBYI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; @@ -761,13 +770,7 @@ bool spu_interpreter_precise::ROTQMBYI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::ROTQMBYI(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[op.i7 & 0x1f].vi); - return true; -} - -bool spu_interpreter_precise::SHLQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBYI(SPUThread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; @@ -775,12 +778,6 @@ bool spu_interpreter_precise::SHLQBYI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::SHLQBYI(SPUThread& spu, spu_opcode_t op) -{ - spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[op.i7 & 0x1f].vi); - return true; -} - bool spu_interpreter::NOP(SPUThread& spu, spu_opcode_t op) { return true; @@ -1637,64 +1634,100 @@ bool spu_interpreter::SELB(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::SHUFB(SPUThread& spu, spu_opcode_t op) +static bool SHUFB_(SPUThread& spu, spu_opcode_t op) { - alignas(16) static thread_local u8 s_lut[256] - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - }; - - const auto _a = spu.gpr[op.ra].vi; - const auto _b = spu.gpr[op.rb].vi; - _mm_store_si128((__m128i*)(s_lut + 0x00), _a); - _mm_store_si128((__m128i*)(s_lut + 0x10), _b); - _mm_store_si128((__m128i*)(s_lut + 0x20), _a); - _mm_store_si128((__m128i*)(s_lut + 0x30), _b); - _mm_store_si128((__m128i*)(s_lut + 0x40), _a); - _mm_store_si128((__m128i*)(s_lut + 0x50), _b); - _mm_store_si128((__m128i*)(s_lut + 0x60), _a); - _mm_store_si128((__m128i*)(s_lut + 0x70), _b); - v128 mask = v128::fromV(_mm_xor_si128(spu.gpr[op.rc].vi, _mm_set1_epi8(0xf))); - auto& t = spu.gpr[op.rt4]; + __m128i ab[2]{spu.gpr[op.rb].vi, spu.gpr[op.ra].vi}; + v128 c = spu.gpr[op.rc]; + v128 x = v128::fromV(_mm_andnot_si128(c.vi, _mm_set1_epi8(0x1f))); + v128 res; + // Select bytes for (int i = 0; i < 16; i++) { - t._u8[i] = s_lut[mask._u8[i]]; + res._u8[i] = ((u8*)+ab)[x._u8[i]]; } + + // Select special values + const auto xc0 = _mm_set1_epi8(0xc0); + const auto xe0 = _mm_set1_epi8(0xe0); + const auto cmp0 = _mm_cmpgt_epi8(_mm_setzero_si128(), c.vi); + const auto cmp1 = _mm_cmpeq_epi8(_mm_and_si128(c.vi, xc0), xc0); + const auto cmp2 = _mm_cmpeq_epi8(_mm_and_si128(c.vi, xe0), xc0); + spu.gpr[op.rt4].vi = _mm_or_si128(_mm_andnot_si128(cmp0, res.vi), _mm_avg_epu8(cmp1, cmp2)); return true; } -bool spu_interpreter_fast::SHUFB(SPUThread& spu, spu_opcode_t op) +const spu_inter_func_t spu_interpreter::SHUFB = !utils::has_ssse3() ? &SHUFB_ : build_function_asm([](asmjit::X86Assembler& c, auto& args) { - const auto index = _mm_xor_si128(spu.gpr[op.rc].vi, _mm_set1_epi32(0x0f0f0f0f)); - const auto res1 = _mm_shuffle_epi8(spu.gpr[op.ra].vi, index); - const auto bit4 = _mm_set1_epi32(0x10101010); - const auto k1 = _mm_cmpeq_epi8(_mm_and_si128(index, bit4), bit4); - const auto res2 = _mm_or_si128(_mm_and_si128(k1, _mm_shuffle_epi8(spu.gpr[op.rb].vi, index)), _mm_andnot_si128(k1, res1)); - const auto bit67 = _mm_set1_epi32(0xc0c0c0c0); - const auto k2 = _mm_cmpeq_epi8(_mm_and_si128(index, bit67), bit67); - const auto res3 = _mm_or_si128(res2, k2); - const auto bit567 = _mm_set1_epi32(0xe0e0e0e0); - const auto k3 = _mm_cmpeq_epi8(_mm_and_si128(index, bit567), bit567); - spu.gpr[op.rt4].vi = _mm_sub_epi8(res3, _mm_and_si128(k3, _mm_set1_epi32(0x7f7f7f7f))); - return true; -} + using namespace asmjit; + + const auto& va = x86::xmm0; + const auto& vb = x86::xmm1; + const auto& vc = x86::xmm2; + const auto& vt = x86::xmm3; + const auto& vm = x86::xmm4; + const auto& v5 = x86::xmm5; + + Label xc0 = c.newLabel(); + Label xe0 = c.newLabel(); + Label x0f = c.newLabel(); + + build_spu_gpr_load(c, va, decltype(spu_opcode_t::ra)()); + build_spu_gpr_load(c, vb, decltype(spu_opcode_t::rb)()); + build_spu_gpr_load(c, vc, decltype(spu_opcode_t::rc)()); + + if (utils::has_avx()) + { + c.vpand(v5, vc, x86::oword_ptr(xe0)); + c.vpxor(vc, vc, x86::oword_ptr(x0f)); + c.vpshufb(va, va, vc); + c.vpslld(vt, vc, 3); + c.vmovdqa(vm, x86::oword_ptr(xc0)); + c.vpcmpeqb(v5, v5, vm); + c.vpshufb(vb, vb, vc); + c.vpand(vc, vc, vm); + c.vpblendvb(vb, va, vb, vt); + c.vpcmpeqb(vt, vc, vm); + c.vpavgb(vt, vt, v5); + c.vpor(vt, vt, vb); + } + else + { + c.movdqa(v5, vc); + c.pand(v5, x86::oword_ptr(xe0)); + c.movdqa(vt, vc); + c.movdqa(vm, x86::oword_ptr(xc0)); + c.pand(vt, vm); + c.pxor(vc, x86::oword_ptr(x0f)); + c.pshufb(va, vc); + c.pshufb(vb, vc); + c.pslld(vc, 3); + c.pcmpeqb(v5, vm); + c.pcmpeqb(vt, vm); + c.pcmpeqb(vm, vm); + c.pcmpgtb(vc, vm); + c.pand(va, vc); + c.pandn(vc, vb); + c.por(vc, va); + c.pavgb(vt, v5); + c.por(vt, vc); + } + + build_spu_gpr_store(c, vt, decltype(spu_opcode_t::rt4)()); + c.mov(x86::eax, 1); + c.ret(); + + c.align(kAlignData, 16); + c.bind(xc0); + c.dq(0xc0c0c0c0c0c0c0c0); + c.dq(0xc0c0c0c0c0c0c0c0); + c.bind(xe0); + c.dq(0xe0e0e0e0e0e0e0e0); + c.dq(0xe0e0e0e0e0e0e0e0); + c.bind(x0f); + c.dq(0x0f0f0f0f0f0f0f0f); + c.dq(0x0f0f0f0f0f0f0f0f); +}); bool spu_interpreter::MPYA(SPUThread& spu, spu_opcode_t op) { @@ -2551,3 +2584,7 @@ bool spu_interpreter_precise::FNMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, bool spu_interpreter_precise::FMA(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; } bool spu_interpreter_precise::FMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; } + +extern const spu_decoder g_spu_interpreter_precise{}; + +extern const spu_decoder g_spu_interpreter_fast{}; diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 06ab662d70..19cdb2cd50 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -66,6 +66,9 @@ struct spu_interpreter static bool FSMH(SPUThread&, spu_opcode_t); static bool FSMB(SPUThread&, spu_opcode_t); static bool LQX(SPUThread&, spu_opcode_t); + static bool ROTQBYBI(SPUThread&, spu_opcode_t); + static bool ROTQMBYBI(SPUThread&, spu_opcode_t); + static bool SHLQBYBI(SPUThread&, spu_opcode_t); static bool CBX(SPUThread&, spu_opcode_t); static bool CHX(SPUThread&, spu_opcode_t); static bool CWX(SPUThread&, spu_opcode_t); @@ -73,6 +76,9 @@ struct spu_interpreter static bool ROTQBI(SPUThread&, spu_opcode_t); static bool ROTQMBI(SPUThread&, spu_opcode_t); static bool SHLQBI(SPUThread&, spu_opcode_t); + static bool ROTQBY(SPUThread&, spu_opcode_t); + static bool ROTQMBY(SPUThread&, spu_opcode_t); + static bool SHLQBY(SPUThread&, spu_opcode_t); static bool ORX(SPUThread&, spu_opcode_t); static bool CBD(SPUThread&, spu_opcode_t); static bool CHD(SPUThread&, spu_opcode_t); @@ -81,6 +87,9 @@ struct spu_interpreter static bool ROTQBII(SPUThread&, spu_opcode_t); static bool ROTQMBII(SPUThread&, spu_opcode_t); static bool SHLQBII(SPUThread&, spu_opcode_t); + static bool ROTQBYI(SPUThread&, spu_opcode_t); + static bool ROTQMBYI(SPUThread&, spu_opcode_t); + static bool SHLQBYI(SPUThread&, spu_opcode_t); static bool NOP(SPUThread&, spu_opcode_t); static bool CGT(SPUThread&, spu_opcode_t); static bool XOR(SPUThread&, spu_opcode_t); @@ -166,6 +175,7 @@ struct spu_interpreter static bool HBRR(SPUThread&, spu_opcode_t); static bool ILA(SPUThread&, spu_opcode_t); static bool SELB(SPUThread&, spu_opcode_t); + static const spu_inter_func_t SHUFB; static bool MPYA(SPUThread&, spu_opcode_t); static bool DFCGT(SPUThread&, spu_opcode_t); static bool DFCMGT(SPUThread&, spu_opcode_t); @@ -176,17 +186,6 @@ struct spu_interpreter struct spu_interpreter_fast final : spu_interpreter { - static bool ROTQBYBI(SPUThread&, spu_opcode_t); - static bool ROTQMBYBI(SPUThread&, spu_opcode_t); - static bool SHLQBYBI(SPUThread&, spu_opcode_t); - static bool ROTQBY(SPUThread&, spu_opcode_t); - static bool ROTQMBY(SPUThread&, spu_opcode_t); - static bool SHLQBY(SPUThread&, spu_opcode_t); - static bool ROTQBYI(SPUThread&, spu_opcode_t); - static bool ROTQMBYI(SPUThread&, spu_opcode_t); - static bool SHLQBYI(SPUThread&, spu_opcode_t); - static bool SHUFB(SPUThread&, spu_opcode_t); - static bool FREST(SPUThread&, spu_opcode_t); static bool FRSQEST(SPUThread&, spu_opcode_t); static bool FCGT(SPUThread&, spu_opcode_t); @@ -219,17 +218,6 @@ struct spu_interpreter_fast final : spu_interpreter struct spu_interpreter_precise final : spu_interpreter { - static bool ROTQBYBI(SPUThread&, spu_opcode_t); - static bool ROTQMBYBI(SPUThread&, spu_opcode_t); - static bool SHLQBYBI(SPUThread&, spu_opcode_t); - static bool ROTQBY(SPUThread&, spu_opcode_t); - static bool ROTQMBY(SPUThread&, spu_opcode_t); - static bool SHLQBY(SPUThread&, spu_opcode_t); - static bool ROTQBYI(SPUThread&, spu_opcode_t); - static bool ROTQMBYI(SPUThread&, spu_opcode_t); - static bool SHLQBYI(SPUThread&, spu_opcode_t); - static bool SHUFB(SPUThread&, spu_opcode_t); - static bool FREST(SPUThread&, spu_opcode_t); static bool FRSQEST(SPUThread&, spu_opcode_t); static bool FCGT(SPUThread&, spu_opcode_t); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index c6a5c13034..2ce973a394 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -45,63 +45,12 @@ bool operator ==(const u128& lhs, const u128& rhs) extern u64 get_timebased_time(); extern u64 get_system_time(); +extern const spu_decoder g_spu_interpreter_precise; + +extern const spu_decoder g_spu_interpreter_fast; + extern thread_local u64 g_tls_fault_spu; -// Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions -const std::pair s_spu_dispatch_table[] -{ -#define FUNC(x) {&spu_interpreter_precise::x, &spu_interpreter_fast::x} - FUNC(ROTQBYBI), - FUNC(ROTQMBYBI), - FUNC(SHLQBYBI), - FUNC(ROTQBY), - FUNC(ROTQMBY), - FUNC(SHLQBY), - FUNC(ROTQBYI), - FUNC(ROTQMBYI), - FUNC(SHLQBYI), - FUNC(SHUFB), -#undef FUNC -}; - -extern const spu_decoder g_spu_interpreter_precise([](auto& table) -{ - if (s_use_ssse3) - { - for (auto& func : table) - { - for (const auto& pair : s_spu_dispatch_table) - { - if (pair.first == func) - { - func = pair.second; - break; - } - } - } - } -}); - -extern const spu_decoder g_spu_interpreter_fast([](auto& table) -{ - if (!s_use_ssse3) - { - for (auto& func : table) - { - for (const auto& pair : s_spu_dispatch_table) - { - if (pair.second == func) - { - func = pair.first; - break; - } - } - } - } -}); - -std::atomic g_num_spu_threads{0ull}; - template <> void fmt_class_string::format(std::string& out, u64 arg) {