PPU: Implemented some instructions in the recompiler. Fixed some bugs in the interpreter.

This commit is contained in:
S Gopal Rajagopal 2014-11-29 01:39:59 +05:30
parent 205e1d88b3
commit 6ea50567b6
4 changed files with 241 additions and 85 deletions

View File

@ -834,11 +834,11 @@ private:
} }
void VCTSXS(u32 vd, u32 uimm5, u32 vb) void VCTSXS(u32 vd, u32 uimm5, u32 vb)
{ {
int nScale = 1 << uimm5; u32 nScale = 1 << uimm5;
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
float result = CPU.VPR[vb]._f[w] * nScale; double result = (double)CPU.VPR[vb]._f[w] * nScale;
if (result > 0x7fffffff) if (result > 0x7fffffff)
{ {
@ -856,12 +856,12 @@ private:
} }
void VCTUXS(u32 vd, u32 uimm5, u32 vb) void VCTUXS(u32 vd, u32 uimm5, u32 vb)
{ {
int nScale = 1 << uimm5; u32 nScale = 1 << uimm5;
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
// C rounding = Round towards 0 // C rounding = Round towards 0
float result = CPU.VPR[vb]._f[w] * nScale; double result = (double)CPU.VPR[vb]._f[w] * nScale;
if (result > 0xffffffffu) if (result > 0xffffffffu)
{ {
@ -1078,26 +1078,32 @@ private:
} }
void VMRGLB(u32 vd, u32 va, u32 vb) void VMRGLB(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++) for (uint h = 0; h < 8; h++)
{ {
CPU.VPR[vd]._u8[15 - h*2] = CPU.VPR[va]._u8[7 - h]; CPU.VPR[vd]._u8[15 - h*2] = VA._u8[7 - h];
CPU.VPR[vd]._u8[15 - h*2 - 1] = CPU.VPR[vb]._u8[7 - h]; CPU.VPR[vd]._u8[15 - h*2 - 1] = VB._u8[7 - h];
} }
} }
void VMRGLH(u32 vd, u32 va, u32 vb) void VMRGLH(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._u16[7 - w*2] = CPU.VPR[va]._u16[3 - w]; CPU.VPR[vd]._u16[7 - w*2] = VA._u16[3 - w];
CPU.VPR[vd]._u16[7 - w*2 - 1] = CPU.VPR[vb]._u16[3 - w]; CPU.VPR[vd]._u16[7 - w*2 - 1] = VB._u16[3 - w];
} }
} }
void VMRGLW(u32 vd, u32 va, u32 vb) void VMRGLW(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint d = 0; d < 2; d++) for (uint d = 0; d < 2; d++)
{ {
CPU.VPR[vd]._u32[3 - d*2] = CPU.VPR[va]._u32[1 - d]; CPU.VPR[vd]._u32[3 - d*2] = VA._u32[1 - d];
CPU.VPR[vd]._u32[3 - d*2 - 1] = CPU.VPR[vb]._u32[1 - d]; CPU.VPR[vd]._u32[3 - d*2 - 1] = VB._u32[1 - d];
} }
} }
void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) //nf void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) //nf
@ -1168,7 +1174,7 @@ private:
for (uint b = 0; b < 4; b++) for (uint b = 0; b < 4; b++)
{ {
result += CPU.VPR[va]._u8[w*4 + b] * CPU.VPR[vb]._u8[w*4 + b]; result += (u32)CPU.VPR[va]._u8[w*4 + b] * (u32)CPU.VPR[vb]._u8[w*4 + b];
} }
result += CPU.VPR[vc]._u32[w]; result += CPU.VPR[vc]._u32[w];
@ -1183,7 +1189,7 @@ private:
for (uint h = 0; h < 2; h++) for (uint h = 0; h < 2; h++)
{ {
result += CPU.VPR[va]._u16[w*2 + h] * CPU.VPR[vb]._u16[w*2 + h]; result += (u32)CPU.VPR[va]._u16[w*2 + h] * (u32)CPU.VPR[vb]._u16[w*2 + h];
} }
result += CPU.VPR[vc]._u32[w]; result += CPU.VPR[vc]._u32[w];
@ -1199,7 +1205,7 @@ private:
for (uint h = 0; h < 2; h++) for (uint h = 0; h < 2; h++)
{ {
result += CPU.VPR[va]._u16[w*2 + h] * CPU.VPR[vb]._u16[w*2 + h]; result += (u64)CPU.VPR[va]._u16[w*2 + h] * (u64)CPU.VPR[vb]._u16[w*2 + h];
} }
result += CPU.VPR[vc]._u32[w]; result += CPU.VPR[vc]._u32[w];
@ -1307,16 +1313,18 @@ private:
} }
void VPKPX(u32 vd, u32 va, u32 vb) void VPKPX(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++) for (uint h = 0; h < 4; h++)
{ {
u16 bb7 = CPU.VPR[vb]._u8[15 - (h*4 + 0)] & 0x1; u16 bb7 = VB._u8[15 - (h*4 + 0)] & 0x1;
u16 bb8 = CPU.VPR[vb]._u8[15 - (h*4 + 1)] >> 3; u16 bb8 = VB._u8[15 - (h*4 + 1)] >> 3;
u16 bb16 = CPU.VPR[vb]._u8[15 - (h*4 + 2)] >> 3; u16 bb16 = VB._u8[15 - (h*4 + 2)] >> 3;
u16 bb24 = CPU.VPR[vb]._u8[15 - (h*4 + 3)] >> 3; u16 bb24 = VB._u8[15 - (h*4 + 3)] >> 3;
u16 ab7 = CPU.VPR[va]._u8[15 - (h*4 + 0)] & 0x1; u16 ab7 = VA._u8[15 - (h*4 + 0)] & 0x1;
u16 ab8 = CPU.VPR[va]._u8[15 - (h*4 + 1)] >> 3; u16 ab8 = VA._u8[15 - (h*4 + 1)] >> 3;
u16 ab16 = CPU.VPR[va]._u8[15 - (h*4 + 2)] >> 3; u16 ab16 = VA._u8[15 - (h*4 + 2)] >> 3;
u16 ab24 = CPU.VPR[va]._u8[15 - (h*4 + 3)] >> 3; u16 ab24 = VA._u8[15 - (h*4 + 3)] >> 3;
CPU.VPR[vd]._u16[3 - h] = (bb7 << 15) | (bb8 << 10) | (bb16 << 5) | bb24; CPU.VPR[vd]._u16[3 - h] = (bb7 << 15) | (bb8 << 10) | (bb16 << 5) | bb24;
CPU.VPR[vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24; CPU.VPR[vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24;
@ -1324,9 +1332,11 @@ private:
} }
void VPKSHSS(u32 vd, u32 va, u32 vb) //nf void VPKSHSS(u32 vd, u32 va, u32 vb) //nf
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++) for (uint b = 0; b < 8; b++)
{ {
s16 result = CPU.VPR[va]._s16[b]; s16 result = VA._s16[b];
if (result > INT8_MAX) if (result > INT8_MAX)
{ {
@ -1341,7 +1351,7 @@ private:
CPU.VPR[vd]._s8[b+8] = (s8)result; CPU.VPR[vd]._s8[b+8] = (s8)result;
result = CPU.VPR[vb]._s16[b]; result = VB._s16[b];
if (result > INT8_MAX) if (result > INT8_MAX)
{ {
@ -1359,9 +1369,11 @@ private:
} }
void VPKSHUS(u32 vd, u32 va, u32 vb) void VPKSHUS(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++) for (uint b = 0; b < 8; b++)
{ {
s16 result = CPU.VPR[va]._s16[b]; s16 result = VA._s16[b];
if (result > UINT8_MAX) if (result > UINT8_MAX)
{ {
@ -1376,7 +1388,7 @@ private:
CPU.VPR[vd]._u8[b+8] = (u8)result; CPU.VPR[vd]._u8[b+8] = (u8)result;
result = CPU.VPR[vb]._s16[b]; result = VB._s16[b];
if (result > UINT8_MAX) if (result > UINT8_MAX)
{ {
@ -1394,9 +1406,11 @@ private:
} }
void VPKSWSS(u32 vd, u32 va, u32 vb) void VPKSWSS(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++) for (uint h = 0; h < 4; h++)
{ {
s32 result = CPU.VPR[va]._s32[h]; s32 result = VA._s32[h];
if (result > INT16_MAX) if (result > INT16_MAX)
{ {
@ -1411,7 +1425,7 @@ private:
CPU.VPR[vd]._s16[h+4] = result; CPU.VPR[vd]._s16[h+4] = result;
result = CPU.VPR[vb]._s32[h]; result = VB._s32[h];
if (result > INT16_MAX) if (result > INT16_MAX)
{ {
@ -1429,9 +1443,11 @@ private:
} }
void VPKSWUS(u32 vd, u32 va, u32 vb) //nf void VPKSWUS(u32 vd, u32 va, u32 vb) //nf
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++) for (uint h = 0; h < 4; h++)
{ {
s32 result = CPU.VPR[va]._s32[h]; s32 result = VA._s32[h];
if (result > UINT16_MAX) if (result > UINT16_MAX)
{ {
@ -1446,7 +1462,7 @@ private:
CPU.VPR[vd]._u16[h+4] = result; CPU.VPR[vd]._u16[h+4] = result;
result = CPU.VPR[vb]._s32[h]; result = VB._s32[h];
if (result > UINT16_MAX) if (result > UINT16_MAX)
{ {
@ -1464,17 +1480,21 @@ private:
} }
void VPKUHUM(u32 vd, u32 va, u32 vb) //nf void VPKUHUM(u32 vd, u32 va, u32 vb) //nf
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++) for (uint b = 0; b < 8; b++)
{ {
CPU.VPR[vd]._u8[b+8] = CPU.VPR[va]._u8[b*2]; CPU.VPR[vd]._u8[b+8] = VA._u8[b*2];
CPU.VPR[vd]._u8[b ] = CPU.VPR[vb]._u8[b*2]; CPU.VPR[vd]._u8[b ] = VB._u8[b*2];
} }
} }
void VPKUHUS(u32 vd, u32 va, u32 vb) void VPKUHUS(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint b = 0; b < 8; b++) for (uint b = 0; b < 8; b++)
{ {
u16 result = CPU.VPR[va]._u16[b]; u16 result = VA._u16[b];
if (result > UINT8_MAX) if (result > UINT8_MAX)
{ {
@ -1484,7 +1504,7 @@ private:
CPU.VPR[vd]._u8[b+8] = (u8)result; CPU.VPR[vd]._u8[b+8] = (u8)result;
result = CPU.VPR[vb]._u16[b]; result = VB._u16[b];
if (result > UINT8_MAX) if (result > UINT8_MAX)
{ {
@ -1497,17 +1517,21 @@ private:
} }
void VPKUWUM(u32 vd, u32 va, u32 vb) void VPKUWUM(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++) for (uint h = 0; h < 4; h++)
{ {
CPU.VPR[vd]._u16[h+4] = CPU.VPR[va]._u16[h*2]; CPU.VPR[vd]._u16[h+4] = VA._u16[h*2];
CPU.VPR[vd]._u16[h ] = CPU.VPR[vb]._u16[h*2]; CPU.VPR[vd]._u16[h ] = VB._u16[h*2];
} }
} }
void VPKUWUS(u32 vd, u32 va, u32 vb) //nf void VPKUWUS(u32 vd, u32 va, u32 vb) //nf
{ {
u128 VA = CPU.VPR[va];
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 4; h++) for (uint h = 0; h < 4; h++)
{ {
u32 result = CPU.VPR[va]._u32[h]; u32 result = VA._u32[h];
if (result > UINT16_MAX) if (result > UINT16_MAX)
{ {
@ -1517,7 +1541,7 @@ private:
CPU.VPR[vd]._u16[h+4] = result; CPU.VPR[vd]._u16[h+4] = result;
result = CPU.VPR[vb]._u32[h]; result = VB._u32[h];
if (result > UINT16_MAX) if (result > UINT16_MAX)
{ {
@ -1539,30 +1563,28 @@ private:
{ {
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._f[w] = floor(CPU.VPR[vb]._f[w]); CPU.VPR[vd]._f[w] = floorf(CPU.VPR[vb]._f[w]);
} }
} }
void VRFIN(u32 vd, u32 vb) void VRFIN(u32 vd, u32 vb)
{ {
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._f[w] = floor(CPU.VPR[vb]._f[w] + 0.5f); CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]);
} }
} }
void VRFIP(u32 vd, u32 vb) void VRFIP(u32 vd, u32 vb)
{ {
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._f[w] = ceil(CPU.VPR[vb]._f[w]); CPU.VPR[vd]._f[w] = ceilf(CPU.VPR[vb]._f[w]);
} }
} }
void VRFIZ(u32 vd, u32 vb) void VRFIZ(u32 vd, u32 vb)
{ {
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
float f; CPU.VPR[vd]._f[w] = truncf(CPU.VPR[vb]._f[w]);
modff(CPU.VPR[vb]._f[w], &f);
CPU.VPR[vd]._f[w] = f;
} }
} }
void VRLB(u32 vd, u32 va, u32 vb) //nf void VRLB(u32 vd, u32 va, u32 vb) //nf
@ -1605,12 +1627,13 @@ private:
} }
void VSL(u32 vd, u32 va, u32 vb) //nf void VSL(u32 vd, u32 va, u32 vb) //nf
{ {
u128 VA = CPU.VPR[va];
u8 sh = CPU.VPR[vb]._u8[0] & 0x7; u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
CPU.VPR[vd]._u8[0] = CPU.VPR[va]._u8[0] << sh; CPU.VPR[vd]._u8[0] = VA._u8[0] << sh;
for (uint b = 1; b < 16; b++) for (uint b = 1; b < 16; b++)
{ {
CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] << sh) | (CPU.VPR[va]._u8[b-1] >> (8 - sh)); CPU.VPR[vd]._u8[b] = (VA._u8[b] << sh) | (VA._u8[b-1] >> (8 - sh));
} }
} }
void VSLB(u32 vd, u32 va, u32 vb) void VSLB(u32 vd, u32 va, u32 vb)
@ -1635,18 +1658,19 @@ private:
{ {
for (uint h = 0; h < 8; h++) for (uint h = 0; h < 8; h++)
{ {
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] << (CPU.VPR[vb]._u8[h*2] & 0xf); CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] << (CPU.VPR[vb]._u16[h] & 0xf);
} }
} }
void VSLO(u32 vd, u32 va, u32 vb) void VSLO(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf; u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
CPU.VPR[vd].clear(); CPU.VPR[vd].clear();
for (u8 b = 0; b < 16 - nShift; b++) for (u8 b = 0; b < 16 - nShift; b++)
{ {
CPU.VPR[vd]._u8[15 - b] = CPU.VPR[va]._u8[15 - (b + nShift)]; CPU.VPR[vd]._u8[15 - b] = VA._u8[15 - (b + nShift)];
} }
} }
void VSLW(u32 vd, u32 va, u32 vb) void VSLW(u32 vd, u32 va, u32 vb)
@ -1710,12 +1734,13 @@ private:
} }
void VSR(u32 vd, u32 va, u32 vb) //nf void VSR(u32 vd, u32 va, u32 vb) //nf
{ {
u128 VA = CPU.VPR[va];
u8 sh = CPU.VPR[vb]._u8[0] & 0x7; u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
CPU.VPR[vd]._u8[15] = CPU.VPR[va]._u8[15] >> sh; CPU.VPR[vd]._u8[15] = VA._u8[15] >> sh;
for (uint b = 14; ~b; b--) for (uint b = 14; ~b; b--)
{ {
CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] >> sh) | (CPU.VPR[va]._u8[b+1] << (8 - sh)); CPU.VPR[vd]._u8[b] = (VA._u8[b] >> sh) | (VA._u8[b+1] << (8 - sh));
} }
} }
void VSRAB(u32 vd, u32 va, u32 vb) //nf void VSRAB(u32 vd, u32 va, u32 vb) //nf
@ -1729,14 +1754,14 @@ private:
{ {
for (uint h = 0; h < 8; h++) for (uint h = 0; h < 8; h++)
{ {
CPU.VPR[vd]._s16[h] = CPU.VPR[va]._s16[h] >> (CPU.VPR[vb]._u8[h*2] & 0xf); CPU.VPR[vd]._s16[h] = CPU.VPR[va]._s16[h] >> (CPU.VPR[vb]._u16[h] & 0xf);
} }
} }
void VSRAW(u32 vd, u32 va, u32 vb) void VSRAW(u32 vd, u32 va, u32 vb)
{ {
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._s32[w] = CPU.VPR[va]._s32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f); CPU.VPR[vd]._s32[w] = CPU.VPR[va]._s32[w] >> (CPU.VPR[vb]._u32[w] & 0x1f);
} }
} }
void VSRB(u32 vd, u32 va, u32 vb) void VSRB(u32 vd, u32 va, u32 vb)
@ -1750,25 +1775,26 @@ private:
{ {
for (uint h = 0; h < 8; h++) for (uint h = 0; h < 8; h++)
{ {
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] >> (CPU.VPR[vb]._u8[h*2] & 0xf); CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] >> (CPU.VPR[vb]._u16[h] & 0xf);
} }
} }
void VSRO(u32 vd, u32 va, u32 vb) void VSRO(u32 vd, u32 va, u32 vb)
{ {
u128 VA = CPU.VPR[va];
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf; u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
CPU.VPR[vd].clear(); CPU.VPR[vd].clear();
for (u8 b = 0; b < 16 - nShift; b++) for (u8 b = 0; b < 16 - nShift; b++)
{ {
CPU.VPR[vd]._u8[b] = CPU.VPR[va]._u8[b + nShift]; CPU.VPR[vd]._u8[b] = VA._u8[b + nShift];
} }
} }
void VSRW(u32 vd, u32 va, u32 vb) void VSRW(u32 vd, u32 va, u32 vb)
{ {
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f); CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u32[w] & 0x1f);
} }
} }
void VSUBCUW(u32 vd, u32 va, u32 vb) //nf void VSUBCUW(u32 vd, u32 va, u32 vb) //nf
@ -2029,50 +2055,56 @@ private:
} }
void VUPKHPX(u32 vd, u32 vb) void VUPKHPX(u32 vd, u32 vb)
{ {
u128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._s8[(3 - w)*4 + 3] = CPU.VPR[vb]._s8[w*2 + 0] >> 7; // signed shift sign extends CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[8 + w*2 + 1] >> 7; // signed shift sign extends
CPU.VPR[vd]._u8[(3 - w)*4 + 2] = (CPU.VPR[vb]._u8[w*2 + 0] >> 2) & 0x1f; CPU.VPR[vd]._u8[w*4 + 2] = (VB._u8[8 + w*2 + 1] >> 2) & 0x1f;
CPU.VPR[vd]._u8[(3 - w)*4 + 1] = ((CPU.VPR[vb]._u8[w*2 + 0] & 0x3) << 3) | ((CPU.VPR[vb]._u8[w*2 + 1] >> 5) & 0x7); CPU.VPR[vd]._u8[w*4 + 1] = ((VB._u8[8 + w*2 + 1] & 0x3) << 3) | ((VB._u8[8 + w*2 + 0] >> 5) & 0x7);
CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[w*2 + 1] & 0x1f; CPU.VPR[vd]._u8[w*4 + 0] = VB._u8[8 + w*2 + 0] & 0x1f;
} }
} }
void VUPKHSB(u32 vd, u32 vb) void VUPKHSB(u32 vd, u32 vb)
{ {
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++) for (uint h = 0; h < 8; h++)
{ {
CPU.VPR[vd]._s16[h] = CPU.VPR[vb]._s8[h]; CPU.VPR[vd]._s16[h] = VB._s8[8 + h];
} }
} }
void VUPKHSH(u32 vd, u32 vb) void VUPKHSH(u32 vd, u32 vb)
{ {
u128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._s32[w] = CPU.VPR[vb]._s16[w]; CPU.VPR[vd]._s32[w] = VB._s16[4 + w];
} }
} }
void VUPKLPX(u32 vd, u32 vb) void VUPKLPX(u32 vd, u32 vb)
{ {
u128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._s8[(3 - w)*4 + 3] = CPU.VPR[vb]._s8[8 + w*2 + 0] >> 7; // signed shift sign extends CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[w*2 + 1] >> 7; // signed shift sign extends
CPU.VPR[vd]._u8[(3 - w)*4 + 2] = (CPU.VPR[vb]._u8[8 + w*2 + 0] >> 2) & 0x1f; CPU.VPR[vd]._u8[w*4 + 2] = (VB._u8[w*2 + 1] >> 2) & 0x1f;
CPU.VPR[vd]._u8[(3 - w)*4 + 1] = ((CPU.VPR[vb]._u8[8 + w*2 + 0] & 0x3) << 3) | ((CPU.VPR[vb]._u8[8 + w*2 + 1] >> 5) & 0x7); CPU.VPR[vd]._u8[w*4 + 1] = ((VB._u8[w*2 + 1] & 0x3) << 3) | ((VB._u8[w*2 + 0] >> 5) & 0x7);
CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[8 + w*2 + 1] & 0x1f; CPU.VPR[vd]._u8[w*4 + 0] = VB._u8[w*2 + 0] & 0x1f;
} }
} }
void VUPKLSB(u32 vd, u32 vb) //nf void VUPKLSB(u32 vd, u32 vb) //nf
{ {
u128 VB = CPU.VPR[vb];
for (uint h = 0; h < 8; h++) for (uint h = 0; h < 8; h++)
{ {
CPU.VPR[vd]._s16[h] = CPU.VPR[vb]._s8[8 + h]; CPU.VPR[vd]._s16[h] = VB._s8[h];
} }
} }
void VUPKLSH(u32 vd, u32 vb) void VUPKLSH(u32 vd, u32 vb)
{ {
u128 VB = CPU.VPR[vb];
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
CPU.VPR[vd]._s32[w] = CPU.VPR[vb]._s16[4 + w]; CPU.VPR[vd]._s32[w] = VB._s16[w];
} }
} }
void VXOR(u32 vd, u32 va, u32 vb) void VXOR(u32 vd, u32 va, u32 vb)
@ -2792,7 +2824,7 @@ private:
return; return;
} }
const u8 eb = (addr & 0xf) >> 1; const u8 eb = (addr & 0xf) >> 1;
vm::write16((u32)addr, CPU.VPR[vs]._u16[7 - eb]); vm::write16((u32)addr & 0xFFFFFFFE, CPU.VPR[vs]._u16[7 - eb]);
} }
void STDUX(u32 rs, u32 ra, u32 rb) void STDUX(u32 rs, u32 ra, u32 rb)
{ {
@ -2828,7 +2860,7 @@ private:
return; return;
} }
const u8 eb = (addr & 0xf) >> 2; const u8 eb = (addr & 0xf) >> 2;
vm::write32((u32)addr, CPU.VPR[vs]._u32[3 - eb]); vm::write32((u32)addr & 0xFFFFFFFC, CPU.VPR[vs]._u32[3 - eb]);
} }
void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) void ADDZE(u32 rd, u32 ra, u32 oe, bool rc)
{ {

View File

@ -1016,7 +1016,26 @@ void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) {
} }
void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) {
InterpreterCall("VMSUMSHS", &PPUInterpreter::VMSUMSHS, vd, va, vb, vc); auto va_v8i16 = GetVrAsIntVec(va, 16);
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
auto vc_v4i32 = GetVrAsIntVec(vc, 32);
auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16);
auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31);
tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF)));
auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32);
tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32);
auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32);
auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32);
tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32);
tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31);
auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8);
SetVr(vd, res_v16i8);
// TODO: Set VSCR.SAT
} }
void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) {
@ -1074,7 +1093,31 @@ void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) {
} }
void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) {
InterpreterCall("VMSUMUHS", &PPUInterpreter::VMSUMUHS, vd, va, vb, vc); auto va_v8i16 = GetVrAsIntVec(va, 16);
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32);
auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8));
u32 mask1_v4i32[4] = {0, 2, 4, 6};
auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
u32 mask2_v4i32[4] = {1, 3, 5, 7};
auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
auto vc_v4i32 = GetVrAsIntVec(vc, 32);
auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32);
auto cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, tmp1_v4i32);
auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32);
res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vc_v4i32);
cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32);
SetVr(vd, res_v4i32);
// TODO: Set VSCR.SAT
} }
void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { void Compiler::VMULESB(u32 vd, u32 va, u32 vb) {
@ -1204,7 +1247,37 @@ void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) {
} }
void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { void Compiler::VPKPX(u32 vd, u32 va, u32 vb) {
InterpreterCall("VPKPX", &PPUInterpreter::VPKPX, vd, va, vb); auto va_v4i32 = GetVrAsIntVec(va, 32);
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7)));
tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000)));
va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000)));
tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32);
tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000)));
va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000)));
tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32);
auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8));
auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7)));
tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000)));
vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000)));
tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32);
tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000)));
vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000)));
tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32);
auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8));
u32 mask_v8i32[8] = {1, 3, 5, 7, 9, 11, 13, 15};
auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
SetVr(vd, res_v8i16);
// TODO: Implement with pext on CPUs with BMI
} }
void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) {
@ -1669,27 +1742,69 @@ void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) {
} }
void Compiler::VUPKHPX(u32 vd, u32 vb) { void Compiler::VUPKHPX(u32 vd, u32 vb) {
InterpreterCall("VUPKHPX", &PPUInterpreter::VUPKHPX, vd, vb); auto vb_v8i16 = GetVrAsIntVec(vb, 16);
u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 };
vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00)));
auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6)));
tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F)));
auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000)));
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32);
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32);
SetVr(vd, res_v4i32);
} }
void Compiler::VUPKHSB(u32 vd, u32 vb) { void Compiler::VUPKHSB(u32 vd, u32 vb) {
InterpreterCall("VUPKHSB", &PPUInterpreter::VUPKHSB, vd, vb); auto vb_v16i8 = GetVrAsIntVec(vb, 8);
u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8));
SetVr(vd, res_v8i16);
} }
void Compiler::VUPKHSH(u32 vd, u32 vb) { void Compiler::VUPKHSH(u32 vd, u32 vb) {
InterpreterCall("VUPKHSH", &PPUInterpreter::VUPKHSH, vd, vb); auto vb_v8i16 = GetVrAsIntVec(vb, 16);
u32 mask_v4i32[4] = { 4, 5, 6, 7 };
auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32));
auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
SetVr(vd, res_v4i32);
} }
void Compiler::VUPKLPX(u32 vd, u32 vb) { void Compiler::VUPKLPX(u32 vd, u32 vb) {
InterpreterCall("VUPKLPX", &PPUInterpreter::VUPKLPX, vd, vb); auto vb_v8i16 = GetVrAsIntVec(vb, 16);
u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00)));
auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6)));
tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F)));
auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000)));
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32);
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32);
SetVr(vd, res_v4i32);
} }
void Compiler::VUPKLSB(u32 vd, u32 vb) { void Compiler::VUPKLSB(u32 vd, u32 vb) {
InterpreterCall("VUPKLSB", &PPUInterpreter::VUPKLSB, vd, vb); auto vb_v16i8 = GetVrAsIntVec(vb, 8);
u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8));
SetVr(vd, res_v8i16);
} }
void Compiler::VUPKLSH(u32 vd, u32 vb) { void Compiler::VUPKLSH(u32 vd, u32 vb) {
InterpreterCall("VUPKLSH", &PPUInterpreter::VUPKLSH, vd, vb); auto vb_v8i16 = GetVrAsIntVec(vb, 16);
u32 mask_v4i32[4] = { 0, 1, 2, 3 };
auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32));
auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
SetVr(vd, res_v4i32);
} }
void Compiler::VXOR(u32 vd, u32 va, u32 vb) { void Compiler::VXOR(u32 vd, u32 va, u32 vb) {
@ -5250,9 +5365,9 @@ std::shared_ptr<RecompilationEngine> RecompilationEngine::s_the_instance = nullp
RecompilationEngine::RecompilationEngine() RecompilationEngine::RecompilationEngine()
: ThreadBase("PPU Recompilation Engine") : ThreadBase("PPU Recompilation Engine")
, m_log(nullptr)
, m_next_ordinal(0) , m_next_ordinal(0)
, m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) , m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) {
, m_log(nullptr) {
m_compiler.RunAllTests(); m_compiler.RunAllTests();
} }

View File

@ -1022,6 +1022,9 @@ namespace ppu_recompiler_llvm {
}; };
}; };
/// Log
llvm::raw_fd_ostream * m_log;
/// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue.
std::mutex m_pending_execution_traces_lock; std::mutex m_pending_execution_traces_lock;
@ -1047,9 +1050,6 @@ namespace ppu_recompiler_llvm {
/// PPU Compiler /// PPU Compiler
Compiler m_compiler; Compiler m_compiler;
/// Log
llvm::raw_fd_ostream * m_log;
/// Executable lookup table /// Executable lookup table
Executable m_executable_lookup[10000]; // TODO: Adjust size Executable m_executable_lookup[10000]; // TODO: Adjust size

View File

@ -432,9 +432,10 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHS, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHS, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0, 1, 2);
@ -443,9 +444,11 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKPX, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0, 1, 2);
@ -494,6 +497,12 @@ void Compiler::RunAllTests() {
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLPX, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSB, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSH, 0, 5, 0, 1);
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0, 1, 2); VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0, 1, 2);
// TODO: Rest of the vector instructions // TODO: Rest of the vector instructions