mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-12-28 00:18:23 +00:00
SPU: rewrite FSM/FSMH/FSMB instructions
Remove lookup tables
This commit is contained in:
parent
55e9d437a9
commit
759370ea1b
@ -2815,35 +2815,47 @@ void spu_recompiler::GBB(spu_opcode_t op)
|
||||
|
||||
void spu_recompiler::FSM(spu_opcode_t op)
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsm));
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0xf);
|
||||
c->shl(*addr, 4);
|
||||
c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64()));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
||||
const XmmLink& vm = XmmAlloc();
|
||||
c->pshufd(va, va, 0xff);
|
||||
c->movdqa(vm, XmmConst(_mm_set_epi32(8, 4, 2, 1)));
|
||||
c->pand(va, vm);
|
||||
c->pcmpeqd(va, vm);
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
|
||||
}
|
||||
|
||||
void spu_recompiler::FSMH(spu_opcode_t op)
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmh));
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0xff);
|
||||
c->shl(*addr, 4);
|
||||
c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64()));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
||||
const XmmLink& vm = XmmAlloc();
|
||||
c->punpckhwd(va, va);
|
||||
c->pshufd(va, va, 0xaa);
|
||||
c->movdqa(vm, XmmConst(_mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1)));
|
||||
c->pand(va, vm);
|
||||
c->pcmpeqw(va, vm);
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
|
||||
}
|
||||
|
||||
void spu_recompiler::FSMB(spu_opcode_t op)
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmb));
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0xffff);
|
||||
c->shl(*addr, 4);
|
||||
c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64()));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
||||
const XmmLink& vm = XmmAlloc();
|
||||
|
||||
if (utils::has_ssse3())
|
||||
{
|
||||
c->pshufb(va, XmmConst(_mm_set_epi8(13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12)));
|
||||
}
|
||||
else
|
||||
{
|
||||
c->punpckhbw(va, va);
|
||||
c->pshufhw(va, va, 0x50);
|
||||
c->pshufd(va, va, 0xfa);
|
||||
}
|
||||
|
||||
c->movdqa(vm, XmmConst(_mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1)));
|
||||
c->pand(va, vm);
|
||||
c->pcmpeqb(va, vm);
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), va);
|
||||
}
|
||||
|
||||
void spu_recompiler::FREST(spu_opcode_t op)
|
||||
@ -4268,8 +4280,12 @@ void spu_recompiler::BR(spu_opcode_t op)
|
||||
|
||||
void spu_recompiler::FSMBI(spu_opcode_t op)
|
||||
{
|
||||
v128 data;
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
data._u8[i] = op.i16 & (1u << i) ? 0xff : 0;
|
||||
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->movdqa(vr, XmmConst(g_spu_imm.fsmb[op.i16]));
|
||||
c->movdqa(vr, XmmConst(data));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
}
|
||||
|
||||
|
@ -513,19 +513,27 @@ bool spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op)
|
||||
|
||||
bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt] = g_spu_imm.fsm[spu.gpr[op.ra]._u32[3] & 0xf];
|
||||
const auto bits = _mm_shuffle_epi32(spu.gpr[op.ra].vi, 0xff);
|
||||
const auto mask = _mm_set_epi32(8, 4, 2, 1);
|
||||
spu.gpr[op.rt].vi = _mm_cmpeq_epi32(_mm_and_si128(bits, mask), mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt] = g_spu_imm.fsmh[spu.gpr[op.ra]._u32[3] & 0xff];
|
||||
const auto vsrc = spu.gpr[op.ra].vi;
|
||||
const auto bits = _mm_shuffle_epi32(_mm_unpackhi_epi16(vsrc, vsrc), 0xaa);
|
||||
const auto mask = _mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1);
|
||||
spu.gpr[op.rt].vi = _mm_cmpeq_epi16(_mm_and_si128(bits, mask), mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt] = g_spu_imm.fsmb[spu.gpr[op.ra]._u32[3] & 0xffff];
|
||||
const auto vsrc = spu.gpr[op.ra].vi;
|
||||
const auto bits = _mm_shuffle_epi32(_mm_shufflehi_epi16(_mm_unpackhi_epi8(vsrc, vsrc), 0x50), 0xfa);
|
||||
const auto mask = _mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1);
|
||||
spu.gpr[op.rt].vi = _mm_cmpeq_epi8(_mm_and_si128(bits, mask), mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1382,7 +1390,10 @@ bool spu_interpreter::BR(SPUThread& spu, spu_opcode_t op)
|
||||
|
||||
bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt] = g_spu_imm.fsmb[op.i16];
|
||||
const auto vsrc = _mm_set_epi32(0, 0, 0, op.i16);
|
||||
const auto bits = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_unpacklo_epi8(vsrc, vsrc), 0x50), 0x50);
|
||||
const auto mask = _mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1);
|
||||
spu.gpr[op.rt].vi = _mm_cmpeq_epi8(_mm_and_si128(bits, mask), mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -361,30 +361,6 @@ spu_imm_table_t::scale_table_t::scale_table_t()
|
||||
|
||||
spu_imm_table_t::spu_imm_table_t()
|
||||
{
|
||||
for (u32 i = 0; i < sizeof(fsm) / sizeof(fsm[0]); i++)
|
||||
{
|
||||
for (u32 j = 0; j < 4; j++)
|
||||
{
|
||||
fsm[i]._u32[j] = (i & (1 << j)) ? 0xffffffff : 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < sizeof(fsmh) / sizeof(fsmh[0]); i++)
|
||||
{
|
||||
for (u32 j = 0; j < 8; j++)
|
||||
{
|
||||
fsmh[i]._u16[j] = (i & (1 << j)) ? 0xffff : 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < sizeof(fsmb) / sizeof(fsmb[0]); i++)
|
||||
{
|
||||
for (u32 j = 0; j < 16; j++)
|
||||
{
|
||||
fsmb[i]._u8[j] = (i & (1 << j)) ? 0xff : 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++)
|
||||
{
|
||||
for (u32 j = 0; j < 16; j++)
|
||||
|
@ -378,10 +378,6 @@ struct spu_int_ctrl_t
|
||||
|
||||
struct spu_imm_table_t
|
||||
{
|
||||
v128 fsmb[65536]; // table for FSMB, FSMBI instructions
|
||||
v128 fsmh[256]; // table for FSMH instruction
|
||||
v128 fsm[16]; // table for FSM instruction
|
||||
|
||||
v128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions
|
||||
v128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions
|
||||
v128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions
|
||||
|
Loading…
Reference in New Issue
Block a user