SPU: rewrite FSM/FSMH/FSMB instructions

Remove lookup tables
This commit is contained in:
Nekotekina 2018-05-28 20:02:02 +03:00
parent 55e9d437a9
commit 759370ea1b
4 changed files with 53 additions and 54 deletions

View File

@ -2815,35 +2815,47 @@ void spu_recompiler::GBB(spu_opcode_t op)
void spu_recompiler::FSM(spu_opcode_t op)
{
const XmmLink& vr = XmmAlloc();
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsm));
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0xf);
c->shl(*addr, 4);
c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64()));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vm = XmmAlloc();
c->pshufd(va, va, 0xff);
c->movdqa(vm, XmmConst(_mm_set_epi32(8, 4, 2, 1)));
c->pand(va, vm);
c->pcmpeqd(va, vm);
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
}
void spu_recompiler::FSMH(spu_opcode_t op)
{
const XmmLink& vr = XmmAlloc();
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmh));
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0xff);
c->shl(*addr, 4);
c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64()));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vm = XmmAlloc();
c->punpckhwd(va, va);
c->pshufd(va, va, 0xaa);
c->movdqa(vm, XmmConst(_mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1)));
c->pand(va, vm);
c->pcmpeqw(va, vm);
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
}
void spu_recompiler::FSMB(spu_opcode_t op)
{
const XmmLink& vr = XmmAlloc();
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmb));
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0xffff);
c->shl(*addr, 4);
c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64()));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
const XmmLink& vm = XmmAlloc();
if (utils::has_ssse3())
{
c->pshufb(va, XmmConst(_mm_set_epi8(13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12)));
}
else
{
c->punpckhbw(va, va);
c->pshufhw(va, va, 0x50);
c->pshufd(va, va, 0xfa);
}
c->movdqa(vm, XmmConst(_mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1)));
c->pand(va, vm);
c->pcmpeqb(va, vm);
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
}
void spu_recompiler::FREST(spu_opcode_t op)
@ -4268,8 +4280,12 @@ void spu_recompiler::BR(spu_opcode_t op)
void spu_recompiler::FSMBI(spu_opcode_t op)
{
v128 data;
for (u32 i = 0; i < 16; i++)
data._u8[i] = op.i16 & (1u << i) ? 0xff : 0;
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(g_spu_imm.fsmb[op.i16]));
c->movdqa(vr, XmmConst(data));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
}

View File

@ -513,19 +513,27 @@ bool spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op)
bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op)
{
spu.gpr[op.rt] = g_spu_imm.fsm[spu.gpr[op.ra]._u32[3] & 0xf];
const auto bits = _mm_shuffle_epi32(spu.gpr[op.ra].vi, 0xff);
const auto mask = _mm_set_epi32(8, 4, 2, 1);
spu.gpr[op.rt].vi = _mm_cmpeq_epi32(_mm_and_si128(bits, mask), mask);
return true;
}
bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op)
{
spu.gpr[op.rt] = g_spu_imm.fsmh[spu.gpr[op.ra]._u32[3] & 0xff];
const auto vsrc = spu.gpr[op.ra].vi;
const auto bits = _mm_shuffle_epi32(_mm_unpackhi_epi16(vsrc, vsrc), 0xaa);
const auto mask = _mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1);
spu.gpr[op.rt].vi = _mm_cmpeq_epi16(_mm_and_si128(bits, mask), mask);
return true;
}
bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op)
{
spu.gpr[op.rt] = g_spu_imm.fsmb[spu.gpr[op.ra]._u32[3] & 0xffff];
const auto vsrc = spu.gpr[op.ra].vi;
const auto bits = _mm_shuffle_epi32(_mm_shufflehi_epi16(_mm_unpackhi_epi8(vsrc, vsrc), 0x50), 0xfa);
const auto mask = _mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1);
spu.gpr[op.rt].vi = _mm_cmpeq_epi8(_mm_and_si128(bits, mask), mask);
return true;
}
@ -1382,7 +1390,10 @@ bool spu_interpreter::BR(SPUThread& spu, spu_opcode_t op)
bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
{
spu.gpr[op.rt] = g_spu_imm.fsmb[op.i16];
const auto vsrc = _mm_set_epi32(0, 0, 0, op.i16);
const auto bits = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_unpacklo_epi8(vsrc, vsrc), 0x50), 0x50);
const auto mask = _mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1);
spu.gpr[op.rt].vi = _mm_cmpeq_epi8(_mm_and_si128(bits, mask), mask);
return true;
}

View File

@ -361,30 +361,6 @@ spu_imm_table_t::scale_table_t::scale_table_t()
spu_imm_table_t::spu_imm_table_t()
{
for (u32 i = 0; i < sizeof(fsm) / sizeof(fsm[0]); i++)
{
for (u32 j = 0; j < 4; j++)
{
fsm[i]._u32[j] = (i & (1 << j)) ? 0xffffffff : 0;
}
}
for (u32 i = 0; i < sizeof(fsmh) / sizeof(fsmh[0]); i++)
{
for (u32 j = 0; j < 8; j++)
{
fsmh[i]._u16[j] = (i & (1 << j)) ? 0xffff : 0;
}
}
for (u32 i = 0; i < sizeof(fsmb) / sizeof(fsmb[0]); i++)
{
for (u32 j = 0; j < 16; j++)
{
fsmb[i]._u8[j] = (i & (1 << j)) ? 0xff : 0;
}
}
for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++)
{
for (u32 j = 0; j < 16; j++)

View File

@ -378,10 +378,6 @@ struct spu_int_ctrl_t
struct spu_imm_table_t
{
v128 fsmb[65536]; // table for FSMB, FSMBI instructions
v128 fsmh[256]; // table for FSMH instruction
v128 fsm[16]; // table for FSM instruction
v128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions
v128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions
v128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions