diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 540f5c391a..3918eda2a4 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -2815,35 +2815,47 @@ void spu_recompiler::GBB(spu_opcode_t op) void spu_recompiler::FSM(spu_opcode_t op) { - const XmmLink& vr = XmmAlloc(); - c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsm)); - c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - c->and_(*addr, 0xf); - c->shl(*addr, 4); - c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64())); - c->movdqa(SPU_OFF_128(gpr, op.rt), vr); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vm = XmmAlloc(); + c->pshufd(va, va, 0xff); + c->movdqa(vm, XmmConst(_mm_set_epi32(8, 4, 2, 1))); + c->pand(va, vm); + c->pcmpeqd(va, vm); + c->movdqa(SPU_OFF_128(gpr, op.rt), va); } void spu_recompiler::FSMH(spu_opcode_t op) { - const XmmLink& vr = XmmAlloc(); - c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmh)); - c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - c->and_(*addr, 0xff); - c->shl(*addr, 4); - c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64())); - c->movdqa(SPU_OFF_128(gpr, op.rt), vr); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vm = XmmAlloc(); + c->punpckhwd(va, va); + c->pshufd(va, va, 0xaa); + c->movdqa(vm, XmmConst(_mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1))); + c->pand(va, vm); + c->pcmpeqw(va, vm); + c->movdqa(SPU_OFF_128(gpr, op.rt), va); } void spu_recompiler::FSMB(spu_opcode_t op) { - const XmmLink& vr = XmmAlloc(); - c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmb)); - c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - c->and_(*addr, 0xffff); - c->shl(*addr, 4); - c->movdqa(vr, asmjit::x86::oword_ptr(*qw0, addr->r64())); - c->movdqa(SPU_OFF_128(gpr, op.rt), vr); + const XmmLink& va = XmmGet(op.ra, XmmType::Int); + const XmmLink& vm = XmmAlloc(); + + if (utils::has_ssse3()) + { + c->pshufb(va, XmmConst(_mm_set_epi8(13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12))); + } + else + { + c->punpckhbw(va, va); + c->pshufhw(va, va, 0x50); + c->pshufd(va, va, 0xfa); + } + + c->movdqa(vm, XmmConst(_mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1))); + c->pand(va, vm); + c->pcmpeqb(va, vm); + c->movdqa(SPU_OFF_128(gpr, op.rt), va); } void spu_recompiler::FREST(spu_opcode_t op) @@ -4268,8 +4280,12 @@ void spu_recompiler::BR(spu_opcode_t op) void spu_recompiler::FSMBI(spu_opcode_t op) { + v128 data; + for (u32 i = 0; i < 16; i++) + data._u8[i] = op.i16 & (1u << i) ? 0xff : 0; + const XmmLink& vr = XmmAlloc(); - c->movdqa(vr, XmmConst(g_spu_imm.fsmb[op.i16])); + c->movdqa(vr, XmmConst(data)); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); } diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index e8fb3eccdb..a0911f8a77 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -513,19 +513,27 @@ bool spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op) bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op) { - spu.gpr[op.rt] = g_spu_imm.fsm[spu.gpr[op.ra]._u32[3] & 0xf]; + const auto bits = _mm_shuffle_epi32(spu.gpr[op.ra].vi, 0xff); + const auto mask = _mm_set_epi32(8, 4, 2, 1); + spu.gpr[op.rt].vi = _mm_cmpeq_epi32(_mm_and_si128(bits, mask), mask); return true; } bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op) { - spu.gpr[op.rt] = g_spu_imm.fsmh[spu.gpr[op.ra]._u32[3] & 0xff]; + const auto vsrc = spu.gpr[op.ra].vi; + const auto bits = _mm_shuffle_epi32(_mm_unpackhi_epi16(vsrc, vsrc), 0xaa); + const auto mask = _mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1); + spu.gpr[op.rt].vi = _mm_cmpeq_epi16(_mm_and_si128(bits, mask), mask); return true; } bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op) { - spu.gpr[op.rt] = g_spu_imm.fsmb[spu.gpr[op.ra]._u32[3] & 0xffff]; + const auto vsrc = spu.gpr[op.ra].vi; + const auto bits = _mm_shuffle_epi32(_mm_shufflehi_epi16(_mm_unpackhi_epi8(vsrc, vsrc), 0x50), 0xfa); + const auto mask = _mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1); + spu.gpr[op.rt].vi = _mm_cmpeq_epi8(_mm_and_si128(bits, mask), mask); return true; } @@ -1382,7 +1390,10 @@ bool spu_interpreter::BR(SPUThread& spu, spu_opcode_t op) bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op) { - spu.gpr[op.rt] = g_spu_imm.fsmb[op.i16]; + const auto vsrc = _mm_set_epi32(0, 0, 0, op.i16); + const auto bits = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_unpacklo_epi8(vsrc, vsrc), 0x50), 0x50); + const auto mask = _mm_set_epi8(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1); + spu.gpr[op.rt].vi = _mm_cmpeq_epi8(_mm_and_si128(bits, mask), mask); return true; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 96f99f8691..fdaf6a3f68 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -361,30 +361,6 @@ spu_imm_table_t::scale_table_t::scale_table_t() spu_imm_table_t::spu_imm_table_t() { - for (u32 i = 0; i < sizeof(fsm) / sizeof(fsm[0]); i++) - { - for (u32 j = 0; j < 4; j++) - { - fsm[i]._u32[j] = (i & (1 << j)) ? 0xffffffff : 0; - } - } - - for (u32 i = 0; i < sizeof(fsmh) / sizeof(fsmh[0]); i++) - { - for (u32 j = 0; j < 8; j++) - { - fsmh[i]._u16[j] = (i & (1 << j)) ? 0xffff : 0; - } - } - - for (u32 i = 0; i < sizeof(fsmb) / sizeof(fsmb[0]); i++) - { - for (u32 j = 0; j < 16; j++) - { - fsmb[i]._u8[j] = (i & (1 << j)) ? 0xff : 0; - } - } - for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++) { for (u32 j = 0; j < 16; j++) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index d84e8e6d68..061adee884 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -378,10 +378,6 @@ struct spu_int_ctrl_t struct spu_imm_table_t { - v128 fsmb[65536]; // table for FSMB, FSMBI instructions - v128 fsmh[256]; // table for FSMH instruction - v128 fsm[16]; // table for FSM instruction - v128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions v128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions v128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions