Some stuff

This commit is contained in:
Nekotekina 2014-04-20 23:36:53 +04:00
parent e1bbedd4bf
commit 525084e7cc
3 changed files with 366 additions and 210 deletions

View File

@ -181,7 +181,7 @@ private:
CPU.VSCR.VSCR = CPU.VPR[vb]._u32[0];
CPU.VSCR.X = CPU.VSCR.Y = 0;
}
void VADDCUW(u32 vd, u32 va, u32 vb)
void VADDCUW(u32 vd, u32 va, u32 vb) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -195,7 +195,7 @@ private:
CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] + CPU.VPR[vb]._f[w];
}
}
void VADDSBS(u32 vd, u32 va, u32 vb)
void VADDSBS(u32 vd, u32 va, u32 vb) //nf
{
for(u32 b=0; b<16; ++b)
{
@ -235,7 +235,7 @@ private:
CPU.VPR[vd]._s16[h] = result;
}
}
void VADDSWS(u32 vd, u32 va, u32 vb)
void VADDSWS(u32 vd, u32 va, u32 vb) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -335,21 +335,21 @@ private:
CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] & (~CPU.VPR[vb]._u32[w]);
}
}
void VAVGSB(u32 vd, u32 va, u32 vb)
void VAVGSB(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
{
CPU.VPR[vd]._s8[b] = (CPU.VPR[va]._s8[b] + CPU.VPR[vb]._s8[b] + 1) >> 1;
}
}
void VAVGSH(u32 vd, u32 va, u32 vb)
void VAVGSH(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._s16[h] = (CPU.VPR[va]._s16[h] + CPU.VPR[vb]._s16[h] + 1) >> 1;
}
}
void VAVGSW(u32 vd, u32 va, u32 vb)
void VAVGSW(u32 vd, u32 va, u32 vb) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -361,14 +361,14 @@ private:
for (uint b = 0; b < 16; b++)
CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] + CPU.VPR[vb]._u8[b] + 1) >> 1;
}
void VAVGUH(u32 vd, u32 va, u32 vb)
void VAVGUH(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._u16[h] = (CPU.VPR[va]._u16[h] + CPU.VPR[vb]._u16[h] + 1) >> 1;
}
}
void VAVGUW(u32 vd, u32 va, u32 vb)
void VAVGUW(u32 vd, u32 va, u32 vb) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -487,14 +487,14 @@ private:
CPU.CR.cr6 = all_equal | none_equal;
}
void VCMPEQUH(u32 vd, u32 va, u32 vb)
void VCMPEQUH(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] == CPU.VPR[vb]._u16[h] ? 0xffff : 0;
}
}
void VCMPEQUH_(u32 vd, u32 va, u32 vb)
void VCMPEQUH_(u32 vd, u32 va, u32 vb) //nf
{
int all_equal = 0x8;
int none_equal = 0x2;
@ -599,7 +599,7 @@ private:
CPU.CR.cr6 = all_ge | none_ge;
}
void VCMPGTSB(u32 vd, u32 va, u32 vb)
void VCMPGTSB(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
{
@ -833,7 +833,7 @@ private:
CPU.VPR[vd]._f[w] = max(CPU.VPR[va]._f[w], CPU.VPR[vb]._f[w]);
}
}
void VMAXSB(u32 vd, u32 va, u32 vb)
void VMAXSB(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
CPU.VPR[vd]._s8[b] = max(CPU.VPR[va]._s8[b], CPU.VPR[vb]._s8[b]);
@ -918,7 +918,7 @@ private:
CPU.VPR[vd]._f[w] = min(CPU.VPR[va]._f[w], CPU.VPR[vb]._f[w]);
}
}
void VMINSB(u32 vd, u32 va, u32 vb)
void VMINSB(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
{
@ -1021,7 +1021,7 @@ private:
CPU.VPR[vd]._u32[3 - d*2 - 1] = CPU.VPR[vb]._u32[1 - d];
}
}
void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc)
void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -1036,7 +1036,7 @@ private:
CPU.VPR[vd]._s32[w] = result;
}
}
void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc)
void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -1051,7 +1051,7 @@ private:
CPU.VPR[vd]._s32[w] = result;
}
}
void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc)
void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -1096,7 +1096,7 @@ private:
CPU.VPR[vd]._u32[w] = result;
}
}
void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc)
void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -1111,7 +1111,7 @@ private:
CPU.VPR[vd]._u32[w] = result;
}
}
void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc)
void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -1136,7 +1136,7 @@ private:
CPU.VPR[vd]._u32[w] = saturated;
}
}
void VMULESB(u32 vd, u32 va, u32 vb)
void VMULESB(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
@ -1164,7 +1164,7 @@ private:
CPU.VPR[vd]._u32[w] = (u32)CPU.VPR[va]._u16[w*2+1] * (u32)CPU.VPR[vb]._u16[w*2+1];
}
}
void VMULOSB(u32 vd, u32 va, u32 vb)
void VMULOSB(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
@ -1243,7 +1243,7 @@ private:
CPU.VPR[vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24;
}
}
void VPKSHSS(u32 vd, u32 va, u32 vb)
void VPKSHSS(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 8; b++)
{
@ -1348,7 +1348,7 @@ private:
CPU.VPR[vd]._s16[h] = result;
}
}
void VPKSWUS(u32 vd, u32 va, u32 vb)
void VPKSWUS(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 4; h++)
{
@ -1383,7 +1383,7 @@ private:
CPU.VPR[vd]._u16[h] = result;
}
}
void VPKUHUM(u32 vd, u32 va, u32 vb)
void VPKUHUM(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 8; b++)
{
@ -1424,7 +1424,7 @@ private:
CPU.VPR[vd]._u16[h ] = CPU.VPR[vb]._u16[h*2];
}
}
void VPKUWUS(u32 vd, u32 va, u32 vb)
void VPKUWUS(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 4; h++)
{
@ -1486,7 +1486,7 @@ private:
CPU.VPR[vd]._f[w] = f;
}
}
void VRLB(u32 vd, u32 va, u32 vb)
void VRLB(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
{
@ -1495,7 +1495,7 @@ private:
CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] << nRot) | (CPU.VPR[va]._u8[b] >> (8 - nRot));
}
}
void VRLH(u32 vd, u32 va, u32 vb)
void VRLH(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
@ -1524,7 +1524,7 @@ private:
CPU.VPR[vd]._u8[b] = (CPU.VPR[vb]._u8[b] & CPU.VPR[vc]._u8[b]) | (CPU.VPR[va]._u8[b] & (~CPU.VPR[vc]._u8[b]));
}
}
void VSL(u32 vd, u32 va, u32 vb)
void VSL(u32 vd, u32 va, u32 vb) //nf
{
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
@ -1648,7 +1648,7 @@ private:
CPU.VPR[vd]._u32[w] = word;
}
}
void VSR(u32 vd, u32 va, u32 vb)
void VSR(u32 vd, u32 va, u32 vb) //nf
{
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
u32 t = 1;
@ -1676,7 +1676,7 @@ private:
CPU.VPR[vd]._u32[3] = 0xCDCDCDCD;
}
}
void VSRAB(u32 vd, u32 va, u32 vb)
void VSRAB(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
{
@ -1729,7 +1729,7 @@ private:
CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f);
}
}
void VSUBCUW(u32 vd, u32 va, u32 vb)
void VSUBCUW(u32 vd, u32 va, u32 vb) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -1743,7 +1743,7 @@ private:
CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] - CPU.VPR[vb]._f[w];
}
}
void VSUBSBS(u32 vd, u32 va, u32 vb)
void VSUBSBS(u32 vd, u32 va, u32 vb) //nf
{
for (uint b = 0; b < 16; b++)
{
@ -1832,7 +1832,7 @@ private:
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] - CPU.VPR[vb]._u16[h];
}
}
void VSUBUHS(u32 vd, u32 va, u32 vb)
void VSUBUHS(u32 vd, u32 va, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{
@ -1915,7 +1915,7 @@ private:
CPU.VPR[vd]._s32[1] = 0;
CPU.VPR[vd]._s32[3] = 0;
}
void VSUM4SBS(u32 vd, u32 va, u32 vb)
void VSUM4SBS(u32 vd, u32 va, u32 vb) //nf
{
for (uint w = 0; w < 4; w++)
{
@ -2019,7 +2019,7 @@ private:
CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[8 + w*2 + 1] & 0x1f;
}
}
void VUPKLSB(u32 vd, u32 vb)
void VUPKLSB(u32 vd, u32 vb) //nf
{
for (uint h = 0; h < 8; h++)
{

View File

@ -14,6 +14,58 @@ using namespace asmjit::host;
#define UNIMPLEMENTED() UNK(__FUNCTION__)
struct g_imm_table_struct
{
u16 cntb_table[65536];
__m128i fsmb_table[65536];
__m128i fsmh_table[256];
__m128i fsm_table[16];
__m128i sldq_pshufb[32];
__m128i srdq_pshufb[32];
__m128i rldq_pshufb[16];
g_imm_table_struct()
{
static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0");
for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++)
{
u32 cnt_low = 0, cnt_high = 0;
for (u32 j = 0; j < 8; j++)
{
cnt_low += (i >> j) & 1;
cnt_high += (i >> (j + 8)) & 1;
}
cntb_table[i] = (cnt_high << 8) | cnt_low;
}
for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++)
{
for (u32 j = 0; j < 4; j++) fsm_table[i].m128i_u32[j] = (i & (1 << j)) ? ~0 : 0;
}
for (u32 i = 0; i < sizeof(fsmh_table) / sizeof(fsmh_table[0]); i++)
{
for (u32 j = 0; j < 8; j++) fsmh_table[i].m128i_u16[j] = (i & (1 << j)) ? ~0 : 0;
}
for (u32 i = 0; i < sizeof(fsmb_table) / sizeof(fsmb_table[0]); i++)
{
for (u32 j = 0; j < 16; j++) fsmb_table[i].m128i_u8[j] = (i & (1 << j)) ? ~0 : 0;
}
for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++)
{
for (u32 j = 0; j < 16; j++) sldq_pshufb[i].m128i_u8[j] = (u8)(j - i);
}
for (u32 i = 0; i < sizeof(srdq_pshufb) / sizeof(srdq_pshufb[0]); i++)
{
for (u32 j = 0; j < 16; j++) srdq_pshufb[i].m128i_u8[j] = (j + i > 15) ? 0xff : (u8)(j + i);
}
for (u32 i = 0; i < sizeof(rldq_pshufb) / sizeof(rldq_pshufb[0]); i++)
{
for (u32 j = 0; j < 16; j++) rldq_pshufb[i].m128i_u8[j] = (u8)(j - i) & 0xf;
}
}
};
class SPURecompiler;
class SPURecompilerCore : public CPUDecoder
@ -57,6 +109,9 @@ public:
#define cpu_word(x) word_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 2) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 2")
#define cpu_byte(x) byte_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 1) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 1")
#define g_imm_xmm(x) oword_ptr(*g_imm_var, offsetof(g_imm_table_struct, x))
#define g_imm2_xmm(x, y) oword_ptr(*g_imm_var, y, 0, offsetof(g_imm_table_struct, x))
#define LOG_OPCODE(...) //ConLog.Write("Compiled "__FUNCTION__"(): "__VA_ARGS__)
#define LOG3_OPCODE(...) //ConLog.Write("Linked "__FUNCTION__"(): "__VA_ARGS__)
@ -97,12 +152,14 @@ public:
GpVar* cpu_var;
GpVar* ls_var;
GpVar* imm_var;
// (input) output:
GpVar* g_imm_var;
// output:
GpVar* pos_var;
// temporary:
GpVar* addr;
GpVar* qw0;
GpVar* qw1;
GpVar* qw2;
struct XmmLink
{
@ -578,30 +635,41 @@ private:
}
void ROT(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << (CPU.GPR[rb]._u32[0] & 0x1f)) | (CPU.GPR[ra]._u32[0] >> (32 - (CPU.GPR[rb]._u32[0] & 0x1f)));
CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << (CPU.GPR[rb]._u32[1] & 0x1f)) | (CPU.GPR[ra]._u32[1] >> (32 - (CPU.GPR[rb]._u32[1] & 0x1f)));
CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << (CPU.GPR[rb]._u32[2] & 0x1f)) | (CPU.GPR[ra]._u32[2] >> (32 - (CPU.GPR[rb]._u32[2] & 0x1f)));
CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << (CPU.GPR[rb]._u32[3] & 0x1f)) | (CPU.GPR[ra]._u32[3] >> (32 - (CPU.GPR[rb]._u32[3] & 0x1f)));
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 4; i++)
{
c.mov(qw0->r32(), cpu_dword(GPR[ra]._u32[i]));
c.mov(*addr, cpu_dword(GPR[rb]._u32[i]));
c.rol(qw0->r32(), *addr);
c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32());
}
LOG_OPCODE();
}
void ROTM(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[0] >> ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) : 0;
CPU.GPR[rt]._u32[1] = ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[1] >> ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) : 0;
CPU.GPR[rt]._u32[2] = ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[2] >> ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) : 0;
CPU.GPR[rt]._u32[3] = ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[3] >> ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) : 0;
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 4; i++)
{
c.mov(qw0->r32(), cpu_dword(GPR[ra]._u32[i]));
c.mov(*addr, cpu_dword(GPR[rb]._u32[i]));
c.neg(*addr);
c.shr(*qw0, *addr);
c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32());
}
LOG_OPCODE();
}
void ROTMA(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) : CPU.GPR[ra]._i32[0] >> 31;
CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) : CPU.GPR[ra]._i32[1] >> 31;
CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) : CPU.GPR[ra]._i32[2] >> 31;
CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) : CPU.GPR[ra]._i32[3] >> 31;
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 4; i++)
{
c.movsxd(*qw0, cpu_dword(GPR[ra]._u32[i]));
c.mov(*addr, cpu_dword(GPR[rb]._u32[i]));
c.neg(*addr);
c.sar(*qw0, *addr);
c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32());
}
LOG_OPCODE();
}
void SHL(u32 rt, u32 ra, u32 rb)
{
@ -617,31 +685,53 @@ private:
}
void ROTH(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u16[h] << (CPU.GPR[rb]._u16[h] & 0xf)) | (CPU.GPR[ra]._u16[h] >> (16 - (CPU.GPR[rb]._u16[h] & 0xf)));
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++)
{
c.movzx(qw0->r32(), cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, cpu_word(GPR[rb]._u16[i]));
c.rol(qw0->r16(), *addr);
c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16());
}
LOG_OPCODE();
}
void ROTHM(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) < 16 ? CPU.GPR[ra]._u16[h] >> ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) : 0;
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++)
{
c.movzx(qw0->r32(), cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, cpu_word(GPR[rb]._u16[i]));
c.neg(*addr);
c.shr(qw0->r32(), *addr);
c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16());
}
LOG_OPCODE();
}
void ROTMAH(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._i16[h] = ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) < 16 ? CPU.GPR[ra]._i16[h] >> ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) : CPU.GPR[ra]._i16[h] >> 15;
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++)
{
c.movsx(qw0->r32(), cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, cpu_word(GPR[rb]._u16[i]));
c.neg(*addr);
c.sar(qw0->r32(), *addr);
c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16());
}
LOG_OPCODE();
}
void SHLH(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (CPU.GPR[rb]._u16[h] & 0x1f) > 15 ? 0 : CPU.GPR[ra]._u16[h] << (CPU.GPR[rb]._u16[h] & 0x1f);
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++)
{
c.movzx(qw0->r32(), cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, cpu_word(GPR[rb]._u16[i]));
c.shl(qw0->r32(), *addr);
c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16());
}
LOG_OPCODE();
}
void ROTI(u32 rt, u32 ra, s32 i7)
{
@ -1186,27 +1276,33 @@ private:
}
void FSM(u32 rt, u32 ra)
{
WRAPPER_BEGIN(rt, ra, yy, zz);
const u32 pref = CPU.GPR[ra]._u32[3];
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = (pref & (1 << w)) ? ~0 : 0;
WRAPPER_END(rt, ra, 0, 0);
const XmmLink& vr = XmmAlloc(rt);
c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
c.and_(*addr, 0xf);
c.shl(*addr, 4);
c.movdqa(vr.get(), g_imm2_xmm(fsm_table[0], *addr));
XmmFinalize(vr, rt);
LOG_OPCODE();
}
void FSMH(u32 rt, u32 ra)
{
WRAPPER_BEGIN(rt, ra, yy, zz);
const u32 pref = CPU.GPR[ra]._u32[3];
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (pref & (1 << h)) ? ~0 : 0;
WRAPPER_END(rt, ra, 0, 0);
const XmmLink& vr = XmmAlloc(rt);
c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
c.and_(*addr, 0xff);
c.shl(*addr, 4);
c.movdqa(vr.get(), g_imm2_xmm(fsmh_table[0], *addr));
XmmFinalize(vr, rt);
LOG_OPCODE();
}
void FSMB(u32 rt, u32 ra)
{
WRAPPER_BEGIN(rt, ra, yy, zz);
const u32 pref = CPU.GPR[ra]._u32[3];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = (pref & (1 << b)) ? ~0 : 0;
WRAPPER_END(rt, ra, 0, 0);
const XmmLink& vr = XmmAlloc(rt);
c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
c.and_(*addr, 0xffff);
c.shl(*addr, 4);
c.movdqa(vr.get(), g_imm2_xmm(fsmb_table[0], *addr));
XmmFinalize(vr, rt);
LOG_OPCODE();
}
void FREST(u32 rt, u32 ra)
{
@ -1247,32 +1343,35 @@ private:
}
void ROTQBYBI(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf;
const SPU_GPR_hdr temp = CPU.GPR[ra];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
WRAPPER_END(rt, ra, rb, 0);
const XmmLink& va = XmmGet(ra, rt);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.and_(*addr, 0xf << 3);
c.shl(*addr, 1);
c.pshufb(va.get(), g_imm2_xmm(rldq_pshufb[0], *addr));
XmmFinalize(va, rt);
LOG_OPCODE();
}
void ROTQMBYBI(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b + s) & 0xf];
WRAPPER_END(rt, ra, rb, 0);
const XmmLink& va = XmmGet(ra, rt);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.shr(*addr, 3);
c.neg(*addr);
c.and_(*addr, 0x1f);
c.shl(*addr, 4);
c.pshufb(va.get(), g_imm2_xmm(srdq_pshufb[0], *addr));
XmmFinalize(va, rt);
LOG_OPCODE();
}
void SHLQBYBI(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
WRAPPER_END(rt, ra, rb, 0);
const XmmLink& va = XmmGet(ra, rt);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.and_(*addr, 0x1f << 3);
c.shl(*addr, 1);
c.pshufb(va.get(), g_imm2_xmm(sldq_pshufb[0], *addr));
XmmFinalize(va, rt);
LOG_OPCODE();
}
void CBX(u32 rt, u32 ra, u32 rb)
{
@ -1361,73 +1460,89 @@ private:
}
void ROTQBI(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int t = CPU.GPR[rb]._u32[3] & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t));
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t));
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
c.mov(*qw0, cpu_qword(GPR[ra]._u64[0]));
c.mov(*qw1, cpu_qword(GPR[ra]._u64[1]));
c.mov(*qw2, *qw0);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.and_(*addr, 7);
c.shld(*qw0, *qw1, *addr);
c.shld(*qw1, *qw2, *addr);
c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1);
LOG_OPCODE();
}
void ROTQMBI(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t));
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t));
CPU.GPR[rt]._u32[3] = (temp._u32[3] >> t);
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
c.mov(*qw0, cpu_qword(GPR[ra]._u64[0]));
c.mov(*qw1, cpu_qword(GPR[ra]._u64[1]));
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.neg(*addr);
c.and_(*addr, 7);
c.shrd(*qw0, *qw1, *addr);
c.shr(*qw1, *addr);
c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1);
LOG_OPCODE();
}
void SHLQBI(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int t = CPU.GPR[rb]._u32[3] & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t);
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t));
WRAPPER_END(rt, ra, rb, 0);
XmmInvalidate(rt);
c.mov(*qw0, cpu_qword(GPR[ra]._u64[0]));
c.mov(*qw1, cpu_qword(GPR[ra]._u64[1]));
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.and_(*addr, 7);
c.shld(*qw1, *qw0, *addr);
c.shl(*qw0, *addr);
c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1);
LOG_OPCODE();
}
void ROTQBY(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = CPU.GPR[rb]._u32[3] & 0xf;
const SPU_GPR_hdr temp = CPU.GPR[ra];
for (int b = 0; b < 16; ++b)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
WRAPPER_END(rt, ra, rb, 0);
const XmmLink& va = XmmGet(ra, rt);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.and_(*addr, 0xf);
c.shl(*addr, 4);
c.pshufb(va.get(), g_imm2_xmm(rldq_pshufb[0], *addr));
XmmFinalize(va, rt);
LOG_OPCODE();
}
void ROTQMBY(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b + s) & 0xf];
WRAPPER_END(rt, ra, rb, 0);
const XmmLink& va = XmmGet(ra, rt);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.neg(*addr);
c.and_(*addr, 0x1f);
c.shl(*addr, 4);
c.pshufb(va.get(), g_imm2_xmm(srdq_pshufb[0], *addr));
XmmFinalize(va, rt);
LOG_OPCODE();
}
void SHLQBY(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = CPU.GPR[rb]._u32[3] & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
WRAPPER_END(rt, ra, rb, 0);
const XmmLink& va = XmmGet(ra, rt);
c.mov(*addr, cpu_dword(GPR[rb]._u32[3]));
c.and_(*addr, 0x1f);
c.shl(*addr, 4);
c.pshufb(va.get(), g_imm2_xmm(sldq_pshufb[0], *addr));
XmmFinalize(va, rt);
LOG_OPCODE();
}
void ORX(u32 rt, u32 ra)
{
WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3];
CPU.GPR[rt]._u32[2] = 0;
CPU.GPR[rt]._u64[0] = 0;
WRAPPER_END(rt, ra, 0, 0);
XmmInvalidate(rt);
c.mov(*addr, cpu_dword(GPR[ra]._u32[0]));
c.or_(*addr, cpu_dword(GPR[ra]._u32[1]));
c.or_(*addr, cpu_dword(GPR[ra]._u32[2]));
c.or_(*addr, cpu_dword(GPR[ra]._u32[3]));
c.mov(cpu_dword(GPR[rt]._u32[3]), *addr);
c.xor_(*addr, *addr);
c.mov(cpu_dword(GPR[rt]._u32[0]), *addr);
c.mov(cpu_dword(GPR[rt]._u32[1]), *addr);
c.mov(cpu_dword(GPR[rt]._u32[2]), *addr);
LOG_OPCODE();
}
void CBD(u32 rt, u32 ra, s32 i7)
{
@ -1488,36 +1603,37 @@ private:
}
void ROTQBII(u32 rt, u32 ra, s32 i7)
{
WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = i7 & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s));
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s));
WRAPPER_END(rt, ra, i7, 0);
XmmInvalidate(rt);
c.mov(*qw0, cpu_qword(GPR[ra]._u64[0]));
c.mov(*qw1, cpu_qword(GPR[ra]._u64[1]));
c.mov(*qw2, *qw0);
c.shld(*qw0, *qw1, i7 & 0x7);
c.shld(*qw1, *qw2, i7 & 0x7);
c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1);
LOG_OPCODE();
}
void ROTQMBII(u32 rt, u32 ra, s32 i7)
{
WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = (0 - (s32)i7) & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s));
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s));
CPU.GPR[rt]._u32[3] = (temp._u32[3] >> s);
WRAPPER_END(rt, ra, i7, 0);
XmmInvalidate(rt);
c.mov(*qw0, cpu_qword(GPR[ra]._u64[0]));
c.mov(*qw1, cpu_qword(GPR[ra]._u64[1]));
c.shrd(*qw0, *qw1, (0 - i7) & 0x7);
c.shr(*qw1, (0 - i7) & 0x7);
c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1);
LOG_OPCODE();
}
void SHLQBII(u32 rt, u32 ra, s32 i7)
{
WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = i7 & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s);
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s));
WRAPPER_END(rt, ra, i7, 0);
XmmInvalidate(rt);
c.mov(*qw0, cpu_qword(GPR[ra]._u64[0]));
c.mov(*qw1, cpu_qword(GPR[ra]._u64[1]));
c.shld(*qw1, *qw0, i7 & 0x7);
c.shl(*qw0, i7 & 0x7);
c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1);
LOG_OPCODE();
}
void ROTQBYI(u32 rt, u32 ra, s32 i7)
{
@ -1729,7 +1845,7 @@ private:
}
void SUMB(u32 rt, u32 ra, u32 rb)
{
WRAPPER_BEGIN(rt, ra, rb, zz);
/*WRAPPER_BEGIN(rt, ra, rb, zz);
const SPU_GPR_hdr _a = CPU.GPR[ra];
const SPU_GPR_hdr _b = CPU.GPR[rb];
for (int w = 0; w < 4; w++)
@ -1737,7 +1853,46 @@ private:
CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3];
CPU.GPR[rt]._u16[w*2 + 1] = _b._u8[w*4] + _b._u8[w*4 + 1] + _b._u8[w*4 + 2] + _b._u8[w*4 + 3];
}
WRAPPER_END(rt, ra, rb, 0);
WRAPPER_END(rt, ra, rb, 0);*/
const XmmLink& va = XmmGet(ra);
const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb);
const XmmLink& v1 = XmmCopy(vb, rt);
const XmmLink& v2 = XmmCopy(vb);
const XmmLink& vFF = XmmAlloc();
c.movdqa(vFF.get(), XmmConst(_mm_set1_epi32(0xff)));
c.pand(v1.get(), vFF.get());
c.psrld(v2.get(), 8);
c.pand(v2.get(), vFF.get());
c.paddd(v1.get(), v2.get());
c.movdqa(v2.get(), vb.get());
c.psrld(v2.get(), 16);
c.pand(v2.get(), vFF.get());
c.paddd(v1.get(), v2.get());
c.movdqa(v2.get(), vb.get());
c.psrld(v2.get(), 24);
c.paddd(v1.get(), v2.get());
c.pslld(v1.get(), 16);
c.movdqa(v2.get(), va.get());
c.pand(v2.get(), vFF.get());
c.por(v1.get(), v2.get());
c.movdqa(v2.get(), va.get());
c.psrld(v2.get(), 8);
c.pand(v2.get(), vFF.get());
c.paddd(v1.get(), v2.get());
c.movdqa(v2.get(), va.get());
c.psrld(v2.get(), 16);
c.pand(v2.get(), vFF.get());
c.paddd(v1.get(), v2.get());
c.movdqa(v2.get(), va.get());
c.psrld(v2.get(), 24);
c.paddd(v1.get(), v2.get());
XmmFinalize(vb);
XmmFinalize(va);
XmmFinalize(v1, rt);
XmmFinalize(v2);
XmmFinalize(vFF);
LOG_OPCODE();
}
//HGT uses signed values. HLGT uses unsigned values
void HGT(u32 rt, s32 ra, s32 rb)
@ -1754,18 +1909,16 @@ private:
}
void CLZ(u32 rt, u32 ra)
{
WRAPPER_BEGIN(rt, ra, yy, zz);
for (int w = 0; w < 4; w++)
XmmInvalidate(rt);
for (u32 i = 0; i < 4; i++)
{
int nPos;
for (nPos = 0; nPos < 32; nPos++)
if (CPU.GPR[ra]._u32[w] & (1 << (31 - nPos)))
break;
CPU.GPR[rt]._u32[w] = nPos;
c.bsr(*addr, cpu_dword(GPR[ra]._u32[i]));
c.cmovz(*addr, dword_ptr(*g_imm_var, offsetof(g_imm_table_struct, fsmb_table[0xffff]))); // load 0xffffffff
c.neg(*addr);
c.add(*addr, 31);
c.mov(cpu_dword(GPR[rt]._u32[i]), *addr);
}
WRAPPER_END(rt, ra, 0, 0);
LOG_OPCODE();
}
void XSWD(u32 rt, u32 ra)
{
@ -1786,13 +1939,14 @@ private:
}
void CNTB(u32 rt, u32 ra)
{
WRAPPER_BEGIN(rt, ra, yy, zz);
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16; b++)
for (int i = 0; i < 8; i++)
CPU.GPR[rt]._u8[b] += (temp._u8[b] & (1 << i)) ? 1 : 0;
WRAPPER_END(rt, ra, 0, 0);
XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++)
{
c.movzx(*addr, cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, word_ptr(*g_imm_var, *addr, 1, offsetof(g_imm_table_struct, cntb_table[0])));
c.mov(cpu_word(GPR[rt]._u16[i]), addr->r16());
}
LOG_OPCODE();
}
void XSBH(u32 rt, u32 ra)
{
@ -2228,14 +2382,14 @@ private:
XmmFinalize(vt);
LOG_OPCODE();
}
void CGX(u32 rt, u32 ra, u32 rb)
void CGX(u32 rt, u32 ra, u32 rb) //nf
{
WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = ((u64)CPU.GPR[ra]._u32[w] + (u64)CPU.GPR[rb]._u32[w] + (u64)(CPU.GPR[rt]._u32[w] & 1)) >> 32;
WRAPPER_END(rt, ra, rb, 0);
}
void BGX(u32 rt, u32 ra, u32 rb)
void BGX(u32 rt, u32 ra, u32 rb) //nf
{
WRAPPER_BEGIN(rt, ra, rb, zz);
s64 nResult;
@ -2299,7 +2453,7 @@ private:
{
UNIMPLEMENTED();
}
void DFTSV(u32 rt, u32 ra, s32 i7)
void DFTSV(u32 rt, u32 ra, s32 i7) //nf
{
WRAPPER_BEGIN(rt, ra, i7, zz);
const u64 DoubleExpMask = 0x7ff0000000000000;
@ -2721,12 +2875,7 @@ private:
else
{
const XmmLink& vr = XmmAlloc(rt);
__m128i fsmbi_mask;
for (u32 j = 0; j < 16; j++)
{
fsmbi_mask.m128i_i8[j] = ((i16 >> j) & 0x1) ? 0xff : 0;
}
c.movdqa(vr.get(), XmmConst(fsmbi_mask));
c.movdqa(vr.get(), g_imm_xmm(fsmb_table[i16 & 0xffff]));
XmmFinalize(vr, rt);
}
LOG_OPCODE();

View File

@ -4,6 +4,8 @@
#include "SPUInterpreter.h"
#include "SPURecompiler.h"
static const g_imm_table_struct g_imm_table;
SPURecompilerCore::SPURecompilerCore(SPUThread& cpu)
: m_enc(new SPURecompiler(cpu, *this))
, inter(new SPUInterpreter(cpu))
@ -58,16 +60,21 @@ void SPURecompilerCore::Compile(u16 pos)
compiler.alloc(imm_var);
m_enc->imm_var = &imm_var;
GpVar pos_var(compiler, kVarTypeUInt32, "pos");
compiler.setArg(3, pos_var);
m_enc->pos_var = &pos_var;
GpVar g_imm_var(compiler, kVarTypeIntPtr, "g_imm");
compiler.setArg(3, g_imm_var);
compiler.alloc(g_imm_var);
m_enc->g_imm_var = &g_imm_var;
GpVar pos_var(compiler, kVarTypeUInt32, "pos");
m_enc->pos_var = &pos_var;
GpVar addr_var(compiler, kVarTypeUInt32, "addr");
m_enc->addr = &addr_var;
GpVar qw0_var(compiler, kVarTypeUInt64, "qw0");
m_enc->qw0 = &qw0_var;
GpVar qw1_var(compiler, kVarTypeUInt64, "qw1");
m_enc->qw1 = &qw1_var;
GpVar qw2_var(compiler, kVarTypeUInt64, "qw2");
m_enc->qw2 = &qw2_var;
for (u32 i = 0; i < 16; i++)
{
@ -198,7 +205,7 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
return 0;
}
typedef u32(*Func)(void* _cpu, void* _ls, const void* _imm, u32 _pos);
typedef u32(*Func)(const void* _cpu, const void* _ls, const void* _imm, const void* _g_imm);
Func func = asmjit_cast<Func>(entry[pos].pointer);
@ -215,7 +222,7 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
}
u32 res = pos;
res = func(cpu, &Memory[m_offset], imm_table.data(), res);
res = func(cpu, &Memory[m_offset], imm_table.data(), &g_imm_table);
if (res > 0xffff)
{