Finalize constexpr ppu_decoder<> thing

Move SSSE3 checks to runtime in PPUInterpreter.cpp
This commit is contained in:
Nekotekina 2020-03-24 20:07:35 +03:00
parent fd3522436a
commit 471db3219d
4 changed files with 29 additions and 143 deletions

View File

@ -5,6 +5,7 @@
#include "Emu/system_config.h"
#include "PPUThread.h"
#include "Utilities/asm.h"
#include "Utilities/sysinfo.h"
#include "Emu/Cell/Common.h"
#include <cmath>
@ -21,6 +22,8 @@
#define SSSE3_FUNC __attribute__((__target__("ssse3")))
#endif
const bool s_use_ssse3 = utils::has_ssse3();
inline u64 dup32(u32 x) { return x | static_cast<u64>(x) << 32; }
// Write values to CR field
@ -123,7 +126,7 @@ extern __m128 sse_log2_ps(__m128 A)
return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8));
}
extern __m128i sse_pshufb(__m128i data, __m128i index)
extern SAFE_BUFFERS __m128i sse_pshufb(__m128i data, __m128i index)
{
v128 m = v128::fromV(_mm_and_si128(index, _mm_set1_epi8(0xf)));
v128 a = v128::fromV(data);
@ -146,7 +149,7 @@ extern SSSE3_FUNC __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C)
return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb));
}
extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C)
extern SAFE_BUFFERS __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C)
{
__m128i ab[2]{B, A};
v128 index = v128::fromV(_mm_andnot_si128(C, _mm_set1_epi8(0x1f)));
@ -1427,15 +1430,11 @@ bool ppu_interpreter::VOR(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::VPERM(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::VPERM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd].vi = sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
return true;
}
bool ppu_interpreter_fast::VPERM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd].vi = sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
ppu.vr[op.vd].vi = s_use_ssse3
? sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi)
: sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
return true;
}
@ -3959,17 +3958,10 @@ bool ppu_interpreter::DIVW(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::LVLX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvlx_v0(addr);
return true;
}
bool ppu_interpreter_fast::LVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvlx(addr);
ppu.vr[op.vd].vi = s_use_ssse3 ? sse_cellbe_lvlx(addr) : sse_cellbe_lvlx_v0(addr);
return true;
}
@ -4030,17 +4022,10 @@ bool ppu_interpreter::SRD(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::LVRX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvrx_v0(addr);
return true;
}
bool ppu_interpreter_fast::LVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvrx(addr);
ppu.vr[op.vd].vi = s_use_ssse3 ? sse_cellbe_lvrx(addr) : sse_cellbe_lvrx_v0(addr);
return true;
}
@ -4105,17 +4090,10 @@ bool ppu_interpreter::LFDUX(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::STVLX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi);
return true;
}
bool ppu_interpreter_fast::STVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi);
s_use_ssse3 ? sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi);
return true;
}
@ -4160,17 +4138,10 @@ bool ppu_interpreter::STFSX(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::STVRX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi);
return true;
}
bool ppu_interpreter_fast::STVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi);
s_use_ssse3 ? sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi);
return true;
}
@ -4227,12 +4198,7 @@ bool ppu_interpreter::STFDUX(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVLX(ppu, op);
}
bool ppu_interpreter_fast::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVLX(ppu, op);
}
@ -4282,12 +4248,7 @@ bool ppu_interpreter::SRAD(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVRX(ppu, op);
}
bool ppu_interpreter_fast::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVRX(ppu, op);
}
@ -4324,12 +4285,7 @@ bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVLX(ppu, op);
}
bool ppu_interpreter_fast::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVLX(ppu, op);
}
@ -4348,12 +4304,7 @@ bool ppu_interpreter::EXTSH(ppu_thread& ppu, ppu_opcode_t op)
return true;
}
bool ppu_interpreter_precise::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVRX(ppu, op);
}
bool ppu_interpreter_fast::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVRX(ppu, op);
}

View File

@ -76,6 +76,7 @@ struct ppu_interpreter
static bool VNMSUBFP(ppu_thread&, ppu_opcode_t);
static bool VNOR(ppu_thread&, ppu_opcode_t);
static bool VOR(ppu_thread&, ppu_opcode_t);
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool VPKPX(ppu_thread&, ppu_opcode_t);
static bool VPKUHUM(ppu_thread&, ppu_opcode_t);
static bool VPKUWUM(ppu_thread&, ppu_opcode_t);
@ -328,12 +329,6 @@ struct ppu_interpreter
static bool FCTIDZ(ppu_thread&, ppu_opcode_t);
static bool FCFID(ppu_thread&, ppu_opcode_t);
static bool UNK(ppu_thread&, ppu_opcode_t);
};
struct ppu_interpreter_precise final : ppu_interpreter
{
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
@ -343,6 +338,11 @@ struct ppu_interpreter_precise final : ppu_interpreter
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);
static bool UNK(ppu_thread&, ppu_opcode_t);
};
struct ppu_interpreter_precise final : ppu_interpreter
{
static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
static bool VPKSWSS(ppu_thread&, ppu_opcode_t);
@ -400,16 +400,6 @@ struct ppu_interpreter_precise final : ppu_interpreter
struct ppu_interpreter_fast final : ppu_interpreter
{
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
static bool LVRXL(ppu_thread&, ppu_opcode_t);
static bool STVLX(ppu_thread&, ppu_opcode_t);
static bool STVLXL(ppu_thread&, ppu_opcode_t);
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);
static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
static bool VPKSWSS(ppu_thread&, ppu_opcode_t);

View File

@ -574,12 +574,6 @@ public:
});
}
template <typename F>
ppu_decoder(F&& init) : ppu_decoder()
{
init(m_table);
}
const std::array<T, 0x20000>& get_table() const
{
return m_table;

View File

@ -101,57 +101,8 @@ void fmt_class_string<ppu_decoder_type>::format(std::string& out, u64 arg)
});
}
// Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions
const std::pair<ppu_inter_func_t, ppu_inter_func_t> s_ppu_dispatch_table[]
{
#define FUNC(x) {&ppu_interpreter_precise::x, &ppu_interpreter_fast::x}
FUNC(VPERM),
FUNC(LVLX),
FUNC(LVLXL),
FUNC(LVRX),
FUNC(LVRXL),
FUNC(STVLX),
FUNC(STVLXL),
FUNC(STVRX),
FUNC(STVRXL),
#undef FUNC
};
static const ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise([](auto& table)
{
if (s_use_ssse3)
{
for (auto& func : table)
{
for (const auto& pair : s_ppu_dispatch_table)
{
if (pair.first == func)
{
func = pair.second;
break;
}
}
}
}
});
static const ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast([](auto& table)
{
if (!s_use_ssse3)
{
for (auto& func : table)
{
for (const auto& pair : s_ppu_dispatch_table)
{
if (pair.second == func)
{
func = pair.first;
break;
}
}
}
}
});
constexpr ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise;
constexpr ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast;
extern void ppu_initialize();
extern void ppu_initialize(const ppu_module& info);