SPU: use XOP instructions

This commit is contained in:
Nekotekina 2018-01-16 14:32:57 +03:00
parent 84103b69cf
commit e2439e962c
3 changed files with 140 additions and 0 deletions

View File

@ -39,6 +39,12 @@ bool utils::has_512()
return g_value;
}
bool utils::has_xop()
{
static const bool g_value = has_avx() && get_cpuid(0x80000001, 0)[2] & 0x800;
return g_value;
}
std::string utils::get_system_info()
{
std::string result;
@ -92,6 +98,11 @@ std::string utils::get_system_info()
{
result += '+';
}
if (has_xop())
{
result += 'x';
}
}
if (has_rtm())

View File

@ -26,6 +26,8 @@ namespace utils
bool has_512();
bool has_xop();
inline bool transaction_enter()
{
while (true)

View File

@ -615,6 +615,16 @@ void spu_recompiler::ROT(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->vprotd(vt, va, vb);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](u32* t, const u32* a, const s32* b) noexcept
{
for (u32 i = 0; i < 4; i++)
@ -654,6 +664,22 @@ void spu_recompiler::ROTM(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->psubd(vb, XmmConst(_mm_set1_epi32(1)));
c->pandn(vb, XmmConst(_mm_set1_epi32(0x3f)));
c->pxor(vt, vt);
c->psubd(vt, vb);
c->pcmpgtd(vb, XmmConst(_mm_set1_epi32(31)));
c->vpshld(vt, va, vt);
c->vpandn(vt, vb, vt);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](u32* t, const u32* a, const u32* b) noexcept
{
for (u32 i = 0; i < 4; i++)
@ -694,6 +720,21 @@ void spu_recompiler::ROTMA(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->psubd(vb, XmmConst(_mm_set1_epi32(1)));
c->pandn(vb, XmmConst(_mm_set1_epi32(0x3f)));
c->pxor(vt, vt);
c->pminud(vb, XmmConst(_mm_set1_epi32(31)));
c->psubd(vt, vb);
c->vpshad(vt, va, vt);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](s32* t, const s32* a, const u32* b) noexcept
{
for (u32 i = 0; i < 4; i++)
@ -733,6 +774,19 @@ void spu_recompiler::SHL(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->pand(vb, XmmConst(_mm_set1_epi32(0x3f)));
c->vpcmpgtd(vt, vb, XmmConst(_mm_set1_epi32(31)));
c->vpshld(vb, va, vb);
c->pandn(vt, vb);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](u32* t, const u32* a, const u32* b) noexcept
{
for (u32 i = 0; i < 4; i++)
@ -777,6 +831,16 @@ void spu_recompiler::ROTH(spu_opcode_t op) //nf
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->vprotw(vt, va, vb);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](u16* t, const u16* a, const u16* b) noexcept
{
for (u32 i = 0; i < 8; i++)
@ -836,6 +900,22 @@ void spu_recompiler::ROTHM(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->psubw(vb, XmmConst(_mm_set1_epi16(1)));
c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f)));
c->pxor(vt, vt);
c->psubw(vt, vb);
c->pcmpgtw(vb, XmmConst(_mm_set1_epi16(15)));
c->vpshlw(vt, va, vt);
c->vpandn(vt, vb, vt);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](u16* t, const u16* a, const u16* b) noexcept
{
for (u32 i = 0; i < 8; i++)
@ -898,6 +978,21 @@ void spu_recompiler::ROTMAH(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->psubw(vb, XmmConst(_mm_set1_epi16(1)));
c->pandn(vb, XmmConst(_mm_set1_epi16(0x1f)));
c->pxor(vt, vt);
c->pminuw(vb, XmmConst(_mm_set1_epi16(15)));
c->psubw(vt, vb);
c->vpshaw(vt, va, vt);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](s16* t, const s16* a, const u16* b) noexcept
{
for (u32 i = 0; i < 8; i++)
@ -956,6 +1051,19 @@ void spu_recompiler::SHLH(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
const XmmLink& vt = XmmAlloc();
c->pand(vb, XmmConst(_mm_set1_epi16(0x1f)));
c->vpcmpgtw(vt, vb, XmmConst(_mm_set1_epi16(15)));
c->vpshlw(vb, va, vb);
c->pandn(vt, vb);
c->movdqa(SPU_OFF_128(gpr, op.rt), vt);
return;
}
auto body = [](u16* t, const u16* a, const u16* b) noexcept
{
for (u32 i = 0; i < 8; i++)
@ -994,6 +1102,14 @@ void spu_recompiler::ROTI(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
c->vprotd(va, va, s);
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
return;
}
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& v1 = XmmAlloc();
c->movdqa(v1, va);
@ -3351,6 +3467,13 @@ void spu_recompiler::SELB(spu_opcode_t op)
return;
}
if (utils::has_xop())
{
c->vpcmov(vc, vb, SPU_OFF_128(gpr, op.ra), vc);
c->movdqa(SPU_OFF_128(gpr, op.rt4), vc);
return;
}
c->pand(vb, vc);
c->pandn(vc, SPU_OFF_128(gpr, op.ra));
c->por(vb, vc);
@ -3475,6 +3598,10 @@ void spu_recompiler::SHUFB(spu_opcode_t op)
{
c->vpternlogd(vc, va, vb, 0xca /* A?B:C */);
}
else if (utils::has_xop())
{
c->vpcmov(vc, va, vb, vc);
}
else
{
c->pand(va, vc);