mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-10 16:14:29 +00:00
SPU: rewrite spu_interpreter::SHUFB
Use ASMJIT to generate SSSE3+ code at runtime Remove static SSSE3 code from spu_interpreter
This commit is contained in:
parent
bebb1bdeda
commit
bdf6545571
@ -1,6 +1,8 @@
|
||||
#include "stdafx.h"
|
||||
#include "Emu/Memory/Memory.h"
|
||||
#include "Emu/System.h"
|
||||
#include "Utilities/JIT.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
|
||||
#include "SPUThread.h"
|
||||
#include "SPUInterpreter.h"
|
||||
@ -8,10 +10,6 @@
|
||||
#include <cmath>
|
||||
#include <cfenv>
|
||||
|
||||
#if !defined(_MSC_VER) && !defined(__SSSE3__)
|
||||
#define _mm_shuffle_epi8
|
||||
#endif
|
||||
|
||||
// Compare 16 packed unsigned bytes (greater than)
|
||||
inline __m128i sse_cmpgt_epu8(__m128i A, __m128i B)
|
||||
{
|
||||
@ -32,6 +30,59 @@ inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B)
|
||||
return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign));
|
||||
}
|
||||
|
||||
namespace asmjit
|
||||
{
|
||||
template <uint I, uint N>
|
||||
static void build_spu_gpr_load(X86Assembler& c, X86Xmm x, const bf_t<u32, I, N>& reg, bool store = false)
|
||||
{
|
||||
static_assert(N == 7, "Invalid bitfield");
|
||||
|
||||
#ifdef _WIN32
|
||||
const auto& spu = x86::rcx;
|
||||
const auto& op = x86::edx;
|
||||
#else
|
||||
const auto& spu = x86::rdi;
|
||||
const auto& op = x86::esi;
|
||||
#endif
|
||||
|
||||
c.mov(x86::eax, op);
|
||||
|
||||
if (I >= 4)
|
||||
{
|
||||
c.shr(x86::eax, I - 4);
|
||||
c.and_(x86::eax, 0x7f << 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.and_(x86::eax, 0x7f);
|
||||
c.shl(x86::eax, I + 4);
|
||||
}
|
||||
|
||||
const auto ptr = x86::oword_ptr(spu, x86::rax, 0, ::offset32(&SPUThread::gpr));
|
||||
|
||||
if (utils::has_avx())
|
||||
{
|
||||
if (store)
|
||||
c.vmovdqa(ptr, x);
|
||||
else
|
||||
c.vmovdqa(x, ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (store)
|
||||
c.movdqa(ptr, x);
|
||||
else
|
||||
c.movdqa(x, ptr);
|
||||
}
|
||||
}
|
||||
|
||||
template <uint I, uint N>
|
||||
static void build_spu_gpr_store(X86Assembler& c, X86Xmm x, const bf_t<u32, I, N>& reg, bool store = true)
|
||||
{
|
||||
build_spu_gpr_load(c, x, reg, store);
|
||||
}
|
||||
}
|
||||
|
||||
bool spu_interpreter::UNK(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op.opcode);
|
||||
@ -497,7 +548,7 @@ bool spu_interpreter::LQX(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::ROTQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::ROTQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(32) const __m128i buf[2]{a, a};
|
||||
@ -505,13 +556,7 @@ bool spu_interpreter_precise::ROTQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::ROTQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[spu.gpr[op.rb]._u32[3] >> 3 & 0xf].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::ROTQMBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::ROTQMBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()};
|
||||
@ -519,13 +564,7 @@ bool spu_interpreter_precise::ROTQMBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::ROTQMBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[spu.gpr[op.rb]._s32[3] >> 3 & 0x1f].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::SHLQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::SHLQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a};
|
||||
@ -533,12 +572,6 @@ bool spu_interpreter_precise::SHLQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::SHLQBYBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[spu.gpr[op.rb]._u32[3] >> 3 & 0x1f].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
|
||||
@ -615,7 +648,7 @@ bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::ROTQBY(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::ROTQBY(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(32) const __m128i buf[2]{a, a};
|
||||
@ -623,13 +656,7 @@ bool spu_interpreter_precise::ROTQBY(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::ROTQBY(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[spu.gpr[op.rb]._u32[3] & 0xf].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::ROTQMBY(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::ROTQMBY(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()};
|
||||
@ -637,13 +664,7 @@ bool spu_interpreter_precise::ROTQMBY(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::ROTQMBY(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[spu.gpr[op.rb]._s32[3] & 0x1f].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::SHLQBY(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::SHLQBY(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a};
|
||||
@ -651,12 +672,6 @@ bool spu_interpreter_precise::SHLQBY(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::SHLQBY(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[spu.gpr[op.rb]._u32[3] & 0x1f].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter::ORX(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt] = v128::from32r(spu.gpr[op.ra]._u32[0] | spu.gpr[op.ra]._u32[1] | spu.gpr[op.ra]._u32[2] | spu.gpr[op.ra]._u32[3]);
|
||||
@ -739,7 +754,7 @@ bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::ROTQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::ROTQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(32) const __m128i buf[2]{a, a};
|
||||
@ -747,13 +762,7 @@ bool spu_interpreter_precise::ROTQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::ROTQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.rldq_pshufb[op.i7 & 0xf].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::ROTQMBYI(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::ROTQMBYI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()};
|
||||
@ -761,13 +770,7 @@ bool spu_interpreter_precise::ROTQMBYI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::ROTQMBYI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.srdq_pshufb[op.i7 & 0x1f].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::SHLQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
bool spu_interpreter::SHLQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto a = spu.gpr[op.ra].vi;
|
||||
alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a};
|
||||
@ -775,12 +778,6 @@ bool spu_interpreter_precise::SHLQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::SHLQBYI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vi = _mm_shuffle_epi8(spu.gpr[op.ra].vi, g_spu_imm.sldq_pshufb[op.i7 & 0x1f].vi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter::NOP(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
return true;
|
||||
@ -1637,64 +1634,100 @@ bool spu_interpreter::SELB(SPUThread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_precise::SHUFB(SPUThread& spu, spu_opcode_t op)
|
||||
static bool SHUFB_(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
alignas(16) static thread_local u8 s_lut[256]
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
};
|
||||
|
||||
const auto _a = spu.gpr[op.ra].vi;
|
||||
const auto _b = spu.gpr[op.rb].vi;
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x00), _a);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x10), _b);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x20), _a);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x30), _b);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x40), _a);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x50), _b);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x60), _a);
|
||||
_mm_store_si128((__m128i*)(s_lut + 0x70), _b);
|
||||
v128 mask = v128::fromV(_mm_xor_si128(spu.gpr[op.rc].vi, _mm_set1_epi8(0xf)));
|
||||
auto& t = spu.gpr[op.rt4];
|
||||
__m128i ab[2]{spu.gpr[op.rb].vi, spu.gpr[op.ra].vi};
|
||||
v128 c = spu.gpr[op.rc];
|
||||
v128 x = v128::fromV(_mm_andnot_si128(c.vi, _mm_set1_epi8(0x1f)));
|
||||
v128 res;
|
||||
|
||||
// Select bytes
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
t._u8[i] = s_lut[mask._u8[i]];
|
||||
res._u8[i] = ((u8*)+ab)[x._u8[i]];
|
||||
}
|
||||
|
||||
// Select special values
|
||||
const auto xc0 = _mm_set1_epi8(0xc0);
|
||||
const auto xe0 = _mm_set1_epi8(0xe0);
|
||||
const auto cmp0 = _mm_cmpgt_epi8(_mm_setzero_si128(), c.vi);
|
||||
const auto cmp1 = _mm_cmpeq_epi8(_mm_and_si128(c.vi, xc0), xc0);
|
||||
const auto cmp2 = _mm_cmpeq_epi8(_mm_and_si128(c.vi, xe0), xc0);
|
||||
spu.gpr[op.rt4].vi = _mm_or_si128(_mm_andnot_si128(cmp0, res.vi), _mm_avg_epu8(cmp1, cmp2));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter_fast::SHUFB(SPUThread& spu, spu_opcode_t op)
|
||||
const spu_inter_func_t spu_interpreter::SHUFB = !utils::has_ssse3() ? &SHUFB_ : build_function_asm<spu_inter_func_t>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
const auto index = _mm_xor_si128(spu.gpr[op.rc].vi, _mm_set1_epi32(0x0f0f0f0f));
|
||||
const auto res1 = _mm_shuffle_epi8(spu.gpr[op.ra].vi, index);
|
||||
const auto bit4 = _mm_set1_epi32(0x10101010);
|
||||
const auto k1 = _mm_cmpeq_epi8(_mm_and_si128(index, bit4), bit4);
|
||||
const auto res2 = _mm_or_si128(_mm_and_si128(k1, _mm_shuffle_epi8(spu.gpr[op.rb].vi, index)), _mm_andnot_si128(k1, res1));
|
||||
const auto bit67 = _mm_set1_epi32(0xc0c0c0c0);
|
||||
const auto k2 = _mm_cmpeq_epi8(_mm_and_si128(index, bit67), bit67);
|
||||
const auto res3 = _mm_or_si128(res2, k2);
|
||||
const auto bit567 = _mm_set1_epi32(0xe0e0e0e0);
|
||||
const auto k3 = _mm_cmpeq_epi8(_mm_and_si128(index, bit567), bit567);
|
||||
spu.gpr[op.rt4].vi = _mm_sub_epi8(res3, _mm_and_si128(k3, _mm_set1_epi32(0x7f7f7f7f)));
|
||||
return true;
|
||||
}
|
||||
using namespace asmjit;
|
||||
|
||||
const auto& va = x86::xmm0;
|
||||
const auto& vb = x86::xmm1;
|
||||
const auto& vc = x86::xmm2;
|
||||
const auto& vt = x86::xmm3;
|
||||
const auto& vm = x86::xmm4;
|
||||
const auto& v5 = x86::xmm5;
|
||||
|
||||
Label xc0 = c.newLabel();
|
||||
Label xe0 = c.newLabel();
|
||||
Label x0f = c.newLabel();
|
||||
|
||||
build_spu_gpr_load(c, va, decltype(spu_opcode_t::ra)());
|
||||
build_spu_gpr_load(c, vb, decltype(spu_opcode_t::rb)());
|
||||
build_spu_gpr_load(c, vc, decltype(spu_opcode_t::rc)());
|
||||
|
||||
if (utils::has_avx())
|
||||
{
|
||||
c.vpand(v5, vc, x86::oword_ptr(xe0));
|
||||
c.vpxor(vc, vc, x86::oword_ptr(x0f));
|
||||
c.vpshufb(va, va, vc);
|
||||
c.vpslld(vt, vc, 3);
|
||||
c.vmovdqa(vm, x86::oword_ptr(xc0));
|
||||
c.vpcmpeqb(v5, v5, vm);
|
||||
c.vpshufb(vb, vb, vc);
|
||||
c.vpand(vc, vc, vm);
|
||||
c.vpblendvb(vb, va, vb, vt);
|
||||
c.vpcmpeqb(vt, vc, vm);
|
||||
c.vpavgb(vt, vt, v5);
|
||||
c.vpor(vt, vt, vb);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movdqa(v5, vc);
|
||||
c.pand(v5, x86::oword_ptr(xe0));
|
||||
c.movdqa(vt, vc);
|
||||
c.movdqa(vm, x86::oword_ptr(xc0));
|
||||
c.pand(vt, vm);
|
||||
c.pxor(vc, x86::oword_ptr(x0f));
|
||||
c.pshufb(va, vc);
|
||||
c.pshufb(vb, vc);
|
||||
c.pslld(vc, 3);
|
||||
c.pcmpeqb(v5, vm);
|
||||
c.pcmpeqb(vt, vm);
|
||||
c.pcmpeqb(vm, vm);
|
||||
c.pcmpgtb(vc, vm);
|
||||
c.pand(va, vc);
|
||||
c.pandn(vc, vb);
|
||||
c.por(vc, va);
|
||||
c.pavgb(vt, v5);
|
||||
c.por(vt, vc);
|
||||
}
|
||||
|
||||
build_spu_gpr_store(c, vt, decltype(spu_opcode_t::rt4)());
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.align(kAlignData, 16);
|
||||
c.bind(xc0);
|
||||
c.dq(0xc0c0c0c0c0c0c0c0);
|
||||
c.dq(0xc0c0c0c0c0c0c0c0);
|
||||
c.bind(xe0);
|
||||
c.dq(0xe0e0e0e0e0e0e0e0);
|
||||
c.dq(0xe0e0e0e0e0e0e0e0);
|
||||
c.bind(x0f);
|
||||
c.dq(0x0f0f0f0f0f0f0f0f);
|
||||
c.dq(0x0f0f0f0f0f0f0f0f);
|
||||
});
|
||||
|
||||
bool spu_interpreter::MPYA(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
@ -2551,3 +2584,7 @@ bool spu_interpreter_precise::FNMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu,
|
||||
bool spu_interpreter_precise::FMA(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; }
|
||||
|
||||
bool spu_interpreter_precise::FMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; }
|
||||
|
||||
extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise{};
|
||||
|
||||
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast{};
|
||||
|
@ -66,6 +66,9 @@ struct spu_interpreter
|
||||
static bool FSMH(SPUThread&, spu_opcode_t);
|
||||
static bool FSMB(SPUThread&, spu_opcode_t);
|
||||
static bool LQX(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool CBX(SPUThread&, spu_opcode_t);
|
||||
static bool CHX(SPUThread&, spu_opcode_t);
|
||||
static bool CWX(SPUThread&, spu_opcode_t);
|
||||
@ -73,6 +76,9 @@ struct spu_interpreter
|
||||
static bool ROTQBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBY(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBY(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBY(SPUThread&, spu_opcode_t);
|
||||
static bool ORX(SPUThread&, spu_opcode_t);
|
||||
static bool CBD(SPUThread&, spu_opcode_t);
|
||||
static bool CHD(SPUThread&, spu_opcode_t);
|
||||
@ -81,6 +87,9 @@ struct spu_interpreter
|
||||
static bool ROTQBII(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBII(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBII(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBYI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBYI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBYI(SPUThread&, spu_opcode_t);
|
||||
static bool NOP(SPUThread&, spu_opcode_t);
|
||||
static bool CGT(SPUThread&, spu_opcode_t);
|
||||
static bool XOR(SPUThread&, spu_opcode_t);
|
||||
@ -166,6 +175,7 @@ struct spu_interpreter
|
||||
static bool HBRR(SPUThread&, spu_opcode_t);
|
||||
static bool ILA(SPUThread&, spu_opcode_t);
|
||||
static bool SELB(SPUThread&, spu_opcode_t);
|
||||
static const spu_inter_func_t SHUFB;
|
||||
static bool MPYA(SPUThread&, spu_opcode_t);
|
||||
static bool DFCGT(SPUThread&, spu_opcode_t);
|
||||
static bool DFCMGT(SPUThread&, spu_opcode_t);
|
||||
@ -176,17 +186,6 @@ struct spu_interpreter
|
||||
|
||||
struct spu_interpreter_fast final : spu_interpreter
|
||||
{
|
||||
static bool ROTQBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBY(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBY(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBY(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBYI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBYI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBYI(SPUThread&, spu_opcode_t);
|
||||
static bool SHUFB(SPUThread&, spu_opcode_t);
|
||||
|
||||
static bool FREST(SPUThread&, spu_opcode_t);
|
||||
static bool FRSQEST(SPUThread&, spu_opcode_t);
|
||||
static bool FCGT(SPUThread&, spu_opcode_t);
|
||||
@ -219,17 +218,6 @@ struct spu_interpreter_fast final : spu_interpreter
|
||||
|
||||
struct spu_interpreter_precise final : spu_interpreter
|
||||
{
|
||||
static bool ROTQBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBYBI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBY(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBY(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBY(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQBYI(SPUThread&, spu_opcode_t);
|
||||
static bool ROTQMBYI(SPUThread&, spu_opcode_t);
|
||||
static bool SHLQBYI(SPUThread&, spu_opcode_t);
|
||||
static bool SHUFB(SPUThread&, spu_opcode_t);
|
||||
|
||||
static bool FREST(SPUThread&, spu_opcode_t);
|
||||
static bool FRSQEST(SPUThread&, spu_opcode_t);
|
||||
static bool FCGT(SPUThread&, spu_opcode_t);
|
||||
|
@ -45,63 +45,12 @@ bool operator ==(const u128& lhs, const u128& rhs)
|
||||
extern u64 get_timebased_time();
|
||||
extern u64 get_system_time();
|
||||
|
||||
extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise;
|
||||
|
||||
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast;
|
||||
|
||||
extern thread_local u64 g_tls_fault_spu;
|
||||
|
||||
// Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions
|
||||
const std::pair<spu_inter_func_t, spu_inter_func_t> s_spu_dispatch_table[]
|
||||
{
|
||||
#define FUNC(x) {&spu_interpreter_precise::x, &spu_interpreter_fast::x}
|
||||
FUNC(ROTQBYBI),
|
||||
FUNC(ROTQMBYBI),
|
||||
FUNC(SHLQBYBI),
|
||||
FUNC(ROTQBY),
|
||||
FUNC(ROTQMBY),
|
||||
FUNC(SHLQBY),
|
||||
FUNC(ROTQBYI),
|
||||
FUNC(ROTQMBYI),
|
||||
FUNC(SHLQBYI),
|
||||
FUNC(SHUFB),
|
||||
#undef FUNC
|
||||
};
|
||||
|
||||
extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise([](auto& table)
|
||||
{
|
||||
if (s_use_ssse3)
|
||||
{
|
||||
for (auto& func : table)
|
||||
{
|
||||
for (const auto& pair : s_spu_dispatch_table)
|
||||
{
|
||||
if (pair.first == func)
|
||||
{
|
||||
func = pair.second;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast([](auto& table)
|
||||
{
|
||||
if (!s_use_ssse3)
|
||||
{
|
||||
for (auto& func : table)
|
||||
{
|
||||
for (const auto& pair : s_spu_dispatch_table)
|
||||
{
|
||||
if (pair.second == func)
|
||||
{
|
||||
func = pair.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
std::atomic<u64> g_num_spu_threads{0ull};
|
||||
|
||||
template <>
|
||||
void fmt_class_string<spu_decoder_type>::format(std::string& out, u64 arg)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user