diff --git a/asmjit.vcxproj b/asmjit.vcxproj
index 80ffab87f4..98dbfb40b4 100644
--- a/asmjit.vcxproj
+++ b/asmjit.vcxproj
@@ -92,12 +92,12 @@
- .\libs\$(Configuration)\
+ .\libs\$(Configuration)_x86\
- .\libs\$(Configuration)\
+ .\libs\$(Configuration)_x86\
@@ -115,7 +115,7 @@
Level3
Disabled
- true
+ false
ASMJIT_STATIC;_MBCS;%(PreprocessorDefinitions)
@@ -139,7 +139,7 @@
MaxSpeed
true
true
- true
+ false
ASMJIT_STATIC;_UNICODE;UNICODE;%(PreprocessorDefinitions)
@@ -154,7 +154,7 @@
MaxSpeed
true
true
- true
+ false
ASMJIT_STATIC;_UNICODE;UNICODE;%(PreprocessorDefinitions)
diff --git a/rpcs3/Emu/Cell/PPCDecoder.h b/rpcs3/Emu/Cell/PPCDecoder.h
index 3185a441a9..fa54d4112c 100644
--- a/rpcs3/Emu/Cell/PPCDecoder.h
+++ b/rpcs3/Emu/Cell/PPCDecoder.h
@@ -12,19 +12,19 @@ public:
template
-static InstrList<1 << CodeField::size, TO>* new_list(const CodeField& func, InstrCaller* error_func = nullptr)
+static InstrList<(1 << (CodeField::size)), TO>* new_list(const CodeField& func, InstrCaller* error_func = nullptr)
{
- return new InstrList<1 << CodeField::size, TO>(func, error_func);
+ return new InstrList<(1 << (CodeField::size)), TO>(func, error_func);
}
template
-static InstrList<1 << CodeField::size, TO>* new_list(InstrList* parent, int opcode, const CodeField& func, InstrCaller* error_func = nullptr)
+static InstrList<(1 << (CodeField::size)), TO>* new_list(InstrList* parent, int opcode, const CodeField& func, InstrCaller* error_func = nullptr)
{
- return connect_list(parent, new InstrList<1 << CodeField::size, TO>(func, error_func), opcode);
+ return connect_list(parent, new InstrList<(1 << (CodeField::size)), TO>(func, error_func), opcode);
}
template
-static InstrList<1 << CodeField::size, TO>* new_list(InstrList* parent, const CodeField& func, InstrCaller* error_func = nullptr)
+static InstrList<(1 << (CodeField::size)), TO>* new_list(InstrList* parent, const CodeField& func, InstrCaller* error_func = nullptr)
{
- return connect_list(parent, new InstrList<1 << CodeField::size, TO>(func, error_func));
+ return connect_list(parent, new InstrList<(1 << (CodeField::size)), TO>(func, error_func));
}
\ No newline at end of file
diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h
index dfca767c9c..fe55ed219e 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.h
+++ b/rpcs3/Emu/Cell/SPUInterpreter.h
@@ -32,121 +32,19 @@ private:
//0 - 10
void STOP(u32 code)
{
- CPU.SetExitStatus(code); // exit code (not status)
-
- switch (code)
- {
- case 0x110: /* ===== sys_spu_thread_receive_event ===== */
- {
- u32 spuq = 0;
- if (!CPU.SPU.Out_MBox.Pop(spuq))
- {
- ConLog.Error("sys_spu_thread_receive_event: cannot read Out_MBox");
- CPU.SPU.In_MBox.PushUncond(CELL_EINVAL); // ???
- return;
- }
-
- if (CPU.SPU.In_MBox.GetCount())
- {
- ConLog.Error("sys_spu_thread_receive_event(spuq=0x%x): In_MBox is not empty", spuq);
- CPU.SPU.In_MBox.PushUncond(CELL_EBUSY); // ???
- return;
- }
-
- if (Ini.HLELogging.GetValue())
- {
- ConLog.Write("sys_spu_thread_receive_event(spuq=0x%x)", spuq);
- }
-
- EventQueue* eq;
- if (!CPU.SPUQs.GetEventQueue(FIX_SPUQ(spuq), eq))
- {
- CPU.SPU.In_MBox.PushUncond(CELL_EINVAL); // TODO: check error value
- return;
- }
-
- u32 tid = GetCurrentSPUThread().GetId();
-
- eq->sq.push(tid); // add thread to sleep queue
-
- while (true)
- {
- switch (eq->owner.trylock(tid))
- {
- case SMR_OK:
- if (!eq->events.count())
- {
- eq->owner.unlock(tid);
- break;
- }
- else
- {
- u32 next = (eq->protocol == SYS_SYNC_FIFO) ? eq->sq.pop() : eq->sq.pop_prio();
- if (next != tid)
- {
- eq->owner.unlock(tid, next);
- break;
- }
- }
- case SMR_SIGNAL:
- {
- sys_event_data event;
- eq->events.pop(event);
- eq->owner.unlock(tid);
- CPU.SPU.In_MBox.PushUncond(CELL_OK);
- CPU.SPU.In_MBox.PushUncond(event.data1);
- CPU.SPU.In_MBox.PushUncond(event.data2);
- CPU.SPU.In_MBox.PushUncond(event.data3);
- return;
- }
- case SMR_FAILED: break;
- default: eq->sq.invalidate(tid); CPU.SPU.In_MBox.PushUncond(CELL_ECANCELED); return;
- }
-
- Sleep(1);
- if (Emu.IsStopped())
- {
- ConLog.Warning("sys_spu_thread_receive_event(spuq=0x%x) aborted", spuq);
- eq->sq.invalidate(tid);
- return;
- }
- }
- }
- break;
- case 0x102:
- if (!CPU.SPU.Out_MBox.GetCount())
- {
- ConLog.Error("sys_spu_thread_exit (no status, code 0x102)");
- }
- else if (Ini.HLELogging.GetValue())
- {
- // the real exit status
- ConLog.Write("sys_spu_thread_exit (status=0x%x)", CPU.SPU.Out_MBox.GetValue());
- }
- CPU.Stop();
- break;
- default:
- if (!CPU.SPU.Out_MBox.GetCount())
- {
- ConLog.Error("Unknown STOP code: 0x%x (no message)", code);
- }
- else
- {
- ConLog.Error("Unknown STOP code: 0x%x (message=0x%x)", code, CPU.SPU.Out_MBox.GetValue());
- }
- CPU.Stop();
- break;
- }
+ CPU.DoStop(code);
}
void LNOP()
{
}
void SYNC(u32 Cbit)
{
+ // This instruction must be used following a store instruction that modifies the instruction stream.
_mm_mfence();
}
void DSYNC()
{
+ // This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
_mm_mfence();
}
void MFSPR(u32 rt, u32 sa)
@@ -389,6 +287,7 @@ private:
}
void STOPD(u32 rc, u32 ra, u32 rb)
{
+ UNIMPLEMENTED();
Emu.Pause();
}
void STQX(u32 rt, u32 ra, u32 rb)
diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h
index f78cdf8339..478d5d3d16 100644
--- a/rpcs3/Emu/Cell/SPURecompiler.h
+++ b/rpcs3/Emu/Cell/SPURecompiler.h
@@ -16,8 +16,12 @@ using namespace asmjit::host;
struct SPUImmTable
{
- __m128i s19_to_s32[1 << 18];
+ __m128i s19_to_s32[1 << 19];
__m128i fsmbi_mask[1 << 16];
+ __m128 scale_to_float[256];
+ __m128 scale_to_int[256];
+ __m128i min_int;
+ __m128i max_int;
SPUImmTable()
{
@@ -38,6 +42,34 @@ struct SPUImmTable
fsmbi_mask[i].m128i_i8[j] = ((i >> j) & 0x1) ? 0xff : 0;
}
}
+ // scale table for (u)int -> float conversion
+ for (s32 i = 0; i < sizeof(scale_to_float) / sizeof(__m128); i++)
+ {
+ const float v = pow(2, i - 155);
+ scale_to_float[i].m128_f32[0] = v;
+ scale_to_float[i].m128_f32[1] = v;
+ scale_to_float[i].m128_f32[2] = v;
+ scale_to_float[i].m128_f32[3] = v;
+ }
+ // scale table for float -> (u)int conversion
+ for (s32 i = 0; i < sizeof(scale_to_int) / sizeof(__m128); i++)
+ {
+ const float v = pow(2, 173 - i);
+ scale_to_int[i].m128_f32[0] = v;
+ scale_to_int[i].m128_f32[1] = v;
+ scale_to_int[i].m128_f32[2] = v;
+ scale_to_int[i].m128_f32[3] = v;
+ }
+ // sign bit
+ min_int.m128i_u32[0] = 0x80000000;
+ min_int.m128i_u32[1] = 0x80000000;
+ min_int.m128i_u32[2] = 0x80000000;
+ min_int.m128i_u32[3] = 0x80000000;
+ //
+ max_int.m128i_u32[0] = 0x7fffffff;
+ max_int.m128i_u32[1] = 0x7fffffff;
+ max_int.m128i_u32[2] = 0x7fffffff;
+ max_int.m128i_u32[3] = 0x7fffffff;
}
};
@@ -46,10 +78,10 @@ class SPURecompiler;
class SPURecompilerCore : public CPUDecoder
{
SPURecompiler* m_enc;
- SPUInterpreter* m_inter;
SPUThread& CPU;
public:
+ SPUInterpreter* inter;
JitRuntime runtime;
Compiler compiler;
@@ -74,14 +106,29 @@ public:
virtual u8 DecodeMemory(const u64 address);
};
-#define cpu_xmm(x) oword_ptr(*cpu_var, offsetof(SPUThread, x))
-#define cpu_qword(x) qword_ptr(*cpu_var, offsetof(SPUThread, x))
-#define cpu_dword(x,...) dword_ptr(*cpu_var, __VA_ARGS__, offsetof(SPUThread, x))
-#define cpu_word(x) word_ptr(*cpu_var, offsetof(SPUThread, x))
-#define cpu_byte(x) byte_ptr(*cpu_var, offsetof(SPUThread, x))
+#define cpu_xmm(x) oword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 16) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 16")
+#define cpu_qword(x) qword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 8) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 8")
+#define cpu_dword(x) dword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 4) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 4")
+#define cpu_word(x) word_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 2) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 2")
+#define cpu_byte(x) byte_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 1) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 1")
#define imm_xmm(x) oword_ptr(*imm_var, offsetof(SPUImmTable, x))
+#define WRAPPER_BEGIN(a0, a1, a2, a3) struct opcode_wrapper \
+{ \
+ static void opcode(u32 a0, u32 a1, u32 a2, u32 a3) \
+{ \
+ SPUThread& CPU = *(SPUThread*)GetCurrentCPUThread();
+
+#define WRAPPER_END(a0, a1, a2, a3) } \
+}; \
+ X86X64CallNode* call = c.call(imm_ptr(&opcode_wrapper::opcode), kFuncConvHost, FuncBuilder4()); \
+ call->setArg(0, imm_u(a0)); \
+ call->setArg(1, imm_u(a1)); \
+ call->setArg(2, imm_u(a2)); \
+ call->setArg(3, imm_u(a3));
+
+
class SPURecompiler : public SPUOpcodes
{
private:
@@ -94,6 +141,7 @@ public:
GpVar* cpu_var;
GpVar* ls_var;
GpVar* imm_var;
+ GpVar* pos_var;
SPURecompiler(SPUThread& cpu, SPURecompilerCore& rec) : CPU(cpu), rec(rec), c(rec.compiler)
{
@@ -103,19 +151,31 @@ private:
//0 - 10
void STOP(u32 code)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(code, xx, yy, zz);
+ CPU.DoStop(code);
+ WRAPPER_END(code, 0, 0, 0);
+ c.mov(*pos_var, (CPU.PC >> 2) + 1);
+ do_finalize = true;
+ ConLog.Write("STOP(code=%d)", code);
}
void LNOP()
{
- UNIMPLEMENTED();
+ /*c.mov(*pos_var, (CPU.PC >> 2) + 1);
+ do_finalize = true;
+ ConLog.Write("LNOP()");*/
}
void SYNC(u32 Cbit)
{
- UNIMPLEMENTED();
+ // This instruction must be used following a store instruction that modifies the instruction stream.
+ c.mfence();
+ c.mov(*pos_var, (CPU.PC >> 2) + 1);
+ do_finalize = true;
+ ConLog.Write("SYNC()");
}
void DSYNC()
{
- UNIMPLEMENTED();
+ // This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
+ c.mfence();
}
void MFSPR(u32 rt, u32 sa)
{
@@ -134,233 +194,326 @@ private:
}
void RDCH(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.ReadChannel(CPU.GPR[rt], ra);
+ WRAPPER_END(rt, ra, 0, 0);
+ // TODO
}
void RCHCNT(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt].Reset();
CPU.GPR[rt]._u32[3] = CPU.GetChannelCount(ra);
+ WRAPPER_END(rt, ra, 0, 0);
+ // TODO
}
void SF(u32 rt, u32 ra, u32 rb)
{
- XmmVar v0(c);
+ WRAPPER_BEGIN(rt, ra, rb, zz);
+ CPU.GPR[rt]._u32[0] = CPU.GPR[rb]._u32[0] - CPU.GPR[ra]._u32[0];
+ CPU.GPR[rt]._u32[1] = CPU.GPR[rb]._u32[1] - CPU.GPR[ra]._u32[1];
+ CPU.GPR[rt]._u32[2] = CPU.GPR[rb]._u32[2] - CPU.GPR[ra]._u32[2];
+ CPU.GPR[rt]._u32[3] = CPU.GPR[rb]._u32[3] - CPU.GPR[ra]._u32[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
if (ra == rb)
{
// zero
c.xorps(v0, v0);
c.movaps(cpu_xmm(GPR[rt]), v0);
}
+ else
{
// sub from
c.movdqa(v0, cpu_xmm(GPR[rb]));
c.psubd(v0, cpu_xmm(GPR[ra]));
c.movdqa(cpu_xmm(GPR[rt]), v0);
- }
+ }*/
}
void OR(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] | CPU.GPR[rb]._u32[0];
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] | CPU.GPR[rb]._u32[1];
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] | CPU.GPR[rb]._u32[2];
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] | CPU.GPR[rb]._u32[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void BG(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] > CPU.GPR[rb]._u32[0] ? 0 : 1;
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] > CPU.GPR[rb]._u32[1] ? 0 : 1;
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] > CPU.GPR[rb]._u32[2] ? 0 : 1;
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] > CPU.GPR[rb]._u32[3] ? 0 : 1;
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
+ if (ra == rb)
+ {
+ // load {1,1,1,1}
+ c.movaps(v0, imm_xmm(s19_to_s32[1]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }
+ else
+ {
+ // compare if-greater-then
+ c.movdqa(v0, cpu_xmm(GPR[rb]));
+ c.psubd(v0, cpu_xmm(GPR[ra]));
+ c.psrad(v0, 32);
+ c.paddd(v0, imm_xmm(s19_to_s32[1]));
+ c.movdqa(cpu_xmm(GPR[rt]), v0);
+ }*/
}
void SFH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = CPU.GPR[rb]._u16[h] - CPU.GPR[ra]._u16[h];
+ WRAPPER_END(rt, ra, rb, 0);
}
void NOR(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = ~(CPU.GPR[ra]._u32[0] | CPU.GPR[rb]._u32[0]);
CPU.GPR[rt]._u32[1] = ~(CPU.GPR[ra]._u32[1] | CPU.GPR[rb]._u32[1]);
CPU.GPR[rt]._u32[2] = ~(CPU.GPR[ra]._u32[2] | CPU.GPR[rb]._u32[2]);
CPU.GPR[rt]._u32[3] = ~(CPU.GPR[ra]._u32[3] | CPU.GPR[rb]._u32[3]);
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void ABSDB(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = CPU.GPR[rb]._u8[b] > CPU.GPR[ra]._u8[b] ? CPU.GPR[rb]._u8[b] - CPU.GPR[ra]._u8[b] : CPU.GPR[ra]._u8[b] - CPU.GPR[rb]._u8[b];
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << (CPU.GPR[rb]._u32[0] & 0x1f)) | (CPU.GPR[ra]._u32[0] >> (32 - (CPU.GPR[rb]._u32[0] & 0x1f)));
CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << (CPU.GPR[rb]._u32[1] & 0x1f)) | (CPU.GPR[ra]._u32[1] >> (32 - (CPU.GPR[rb]._u32[1] & 0x1f)));
CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << (CPU.GPR[rb]._u32[2] & 0x1f)) | (CPU.GPR[ra]._u32[2] >> (32 - (CPU.GPR[rb]._u32[2] & 0x1f)));
CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << (CPU.GPR[rb]._u32[3] & 0x1f)) | (CPU.GPR[ra]._u32[3] >> (32 - (CPU.GPR[rb]._u32[3] & 0x1f)));
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTM(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = ((0 - CPU.GPR[rb]._u32[0]) % 64) < 32 ? CPU.GPR[ra]._u32[0] >> ((0 - CPU.GPR[rb]._u32[0]) % 64) : 0;
CPU.GPR[rt]._u32[1] = ((0 - CPU.GPR[rb]._u32[1]) % 64) < 32 ? CPU.GPR[ra]._u32[1] >> ((0 - CPU.GPR[rb]._u32[1]) % 64) : 0;
CPU.GPR[rt]._u32[2] = ((0 - CPU.GPR[rb]._u32[2]) % 64) < 32 ? CPU.GPR[ra]._u32[2] >> ((0 - CPU.GPR[rb]._u32[2]) % 64) : 0;
CPU.GPR[rt]._u32[3] = ((0 - CPU.GPR[rb]._u32[3]) % 64) < 32 ? CPU.GPR[ra]._u32[3] >> ((0 - CPU.GPR[rb]._u32[3]) % 64) : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTMA(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._i32[0]) % 64) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._i32[0]) % 64) : CPU.GPR[ra]._i32[0] >> 31;
CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._i32[1]) % 64) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._i32[1]) % 64) : CPU.GPR[ra]._i32[1] >> 31;
CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._i32[2]) % 64) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._i32[2]) % 64) : CPU.GPR[ra]._i32[2] >> 31;
CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._i32[3]) % 64) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._i32[3]) % 64) : CPU.GPR[ra]._i32[3] >> 31;
+ WRAPPER_END(rt, ra, rb, 0);
}
void SHL(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = (CPU.GPR[rb]._u32[0] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[0] << (CPU.GPR[rb]._u32[0] & 0x3f);
CPU.GPR[rt]._u32[1] = (CPU.GPR[rb]._u32[1] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[1] << (CPU.GPR[rb]._u32[1] & 0x3f);
CPU.GPR[rt]._u32[2] = (CPU.GPR[rb]._u32[2] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[2] << (CPU.GPR[rb]._u32[2] & 0x3f);
CPU.GPR[rt]._u32[3] = (CPU.GPR[rb]._u32[3] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[3] << (CPU.GPR[rb]._u32[3] & 0x3f);
+ WRAPPER_END(rt, ra, rb, 0);
+ // AVX2: masking with 0x3f + VPSLLVD may be better
+ /*for (u32 i = 0; i < 4; i++)
+ {
+ GpVar v0(c, kVarTypeUInt32);
+ c.mov(v0, cpu_dword(GPR[ra]._u32[i]));
+ GpVar shift(c, kVarTypeUInt32);
+ c.mov(shift, cpu_dword(GPR[rb]._u32[i]));
+ GpVar z(c);
+ c.xor_(z, z);
+ c.test(shift, 0x20);
+ c.cmovnz(v0, z);
+ c.shl(v0, shift);
+ c.mov(cpu_dword(GPR[rt]._u32[i]), v0);
+ }*/
}
void ROTH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u16[h] << (CPU.GPR[rb]._u16[h] & 0xf)) | (CPU.GPR[ra]._u16[h] >> (16 - (CPU.GPR[rb]._u16[h] & 0xf)));
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTHM(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = ((0 - CPU.GPR[rb]._u16[h]) % 32) < 16 ? CPU.GPR[ra]._u16[h] >> ((0 - CPU.GPR[rb]._u16[h]) % 32) : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTMAH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._i16[h] = ((0 - CPU.GPR[rb]._i16[h]) % 32) < 16 ? CPU.GPR[ra]._i16[h] >> ((0 - CPU.GPR[rb]._i16[h]) % 32) : CPU.GPR[ra]._i16[h] >> 15;
+ WRAPPER_END(rt, ra, rb, 0);
}
void SHLH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (CPU.GPR[rb]._u16[h] & 0x1f) > 15 ? 0 : CPU.GPR[ra]._u16[h] << (CPU.GPR[rb]._u16[h] & 0x1f);
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int nRot = i7 & 0x1f;
CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << nRot) | (CPU.GPR[ra]._u32[0] >> (32 - nRot));
CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << nRot) | (CPU.GPR[ra]._u32[1] >> (32 - nRot));
CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << nRot) | (CPU.GPR[ra]._u32[2] >> (32 - nRot));
CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << nRot) | (CPU.GPR[ra]._u32[3] >> (32 - nRot));
+ WRAPPER_END(rt, ra, i7, 0);
}
void ROTMI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
- const int nRot = (0 - i7) % 64;
+ WRAPPER_BEGIN(rt, ra, i7, zz);
+ const int nRot = (0 - (s32)i7) % 64;
CPU.GPR[rt]._u32[0] = nRot < 32 ? CPU.GPR[ra]._u32[0] >> nRot : 0;
CPU.GPR[rt]._u32[1] = nRot < 32 ? CPU.GPR[ra]._u32[1] >> nRot : 0;
CPU.GPR[rt]._u32[2] = nRot < 32 ? CPU.GPR[ra]._u32[2] >> nRot : 0;
CPU.GPR[rt]._u32[3] = nRot < 32 ? CPU.GPR[ra]._u32[3] >> nRot : 0;
+ WRAPPER_END(rt, ra, i7, 0);
+ // TODO
}
void ROTMAI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
- const int nRot = (0 - i7) % 64;
+ WRAPPER_BEGIN(rt, ra, i7, zz);
+ const int nRot = (0 - (s32)i7) % 64;
CPU.GPR[rt]._i32[0] = nRot < 32 ? CPU.GPR[ra]._i32[0] >> nRot : CPU.GPR[ra]._i32[0] >> 31;
CPU.GPR[rt]._i32[1] = nRot < 32 ? CPU.GPR[ra]._i32[1] >> nRot : CPU.GPR[ra]._i32[1] >> 31;
CPU.GPR[rt]._i32[2] = nRot < 32 ? CPU.GPR[ra]._i32[2] >> nRot : CPU.GPR[ra]._i32[2] >> 31;
CPU.GPR[rt]._i32[3] = nRot < 32 ? CPU.GPR[ra]._i32[3] >> nRot : CPU.GPR[ra]._i32[3] >> 31;
+ WRAPPER_END(rt, ra, i7, 0);
}
void SHLI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const u32 s = i7 & 0x3f;
-
for (u32 j = 0; j < 4; ++j)
CPU.GPR[rt]._u32[j] = CPU.GPR[ra]._u32[j] << s;
+ WRAPPER_END(rt, ra, i7, 0);
+ // TODO
}
void ROTHI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int nRot = i7 & 0xf;
-
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u16[h] << nRot) | (CPU.GPR[ra]._u16[h] >> (16 - nRot));
+ WRAPPER_END(rt, ra, i7, 0);
}
void ROTHMI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
- const int nRot = (0 - i7) % 32;
-
+ WRAPPER_BEGIN(rt, ra, i7, zz);
+ const int nRot = (0 - (s32)i7) % 32;
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = nRot < 16 ? CPU.GPR[ra]._u16[h] >> nRot : 0;
+ WRAPPER_END(rt, ra, i7, 0);
}
void ROTMAHI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
- const int nRot = (0 - i7) % 32;
-
+ WRAPPER_BEGIN(rt, ra, i7, zz);
+ const int nRot = (0 - (s32)i7) % 32;
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._i16[h] = nRot < 16 ? CPU.GPR[ra]._i16[h] >> nRot : CPU.GPR[ra]._i16[h] >> 15;
+ WRAPPER_END(rt, ra, i7, 0);
}
void SHLHI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int nRot = i7 & 0x1f;
-
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[0] = nRot > 15 ? 0 : CPU.GPR[ra]._u16[0] << nRot;
+ WRAPPER_END(rt, ra, i7, 0);
}
void A(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] + CPU.GPR[rb]._u32[0];
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] + CPU.GPR[rb]._u32[1];
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] + CPU.GPR[rb]._u32[2];
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
+ c.movdqa(v0, cpu_xmm(GPR[ra]));
+ c.paddd(v0, cpu_xmm(GPR[rb]));
+ c.movdqa(cpu_xmm(GPR[rt]), v0);*/
}
void AND(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] & CPU.GPR[rb]._u32[0];
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] & CPU.GPR[rb]._u32[1];
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] & CPU.GPR[rb]._u32[2];
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] & CPU.GPR[rb]._u32[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
+ if (ra == rb)
+ {
+ if (rt == ra)
+ {
+ // nop
+ }
+ else
+ {
+ // mov
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }
+ }
+ else
+ {
+ // and
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.andps(v0, cpu_xmm(GPR[rb]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }*/
}
void CG(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = ((CPU.GPR[ra]._u32[0] + CPU.GPR[rb]._u32[0]) < CPU.GPR[ra]._u32[0]) ? 1 : 0;
CPU.GPR[rt]._u32[1] = ((CPU.GPR[ra]._u32[1] + CPU.GPR[rb]._u32[1]) < CPU.GPR[ra]._u32[1]) ? 1 : 0;
CPU.GPR[rt]._u32[2] = ((CPU.GPR[ra]._u32[2] + CPU.GPR[rb]._u32[2]) < CPU.GPR[ra]._u32[2]) ? 1 : 0;
CPU.GPR[rt]._u32[3] = ((CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) < CPU.GPR[ra]._u32[3]) ? 1 : 0;
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void AH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] + CPU.GPR[rb]._u16[h];
+ WRAPPER_END(rt, ra, rb, 0);
}
void NAND(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = ~(CPU.GPR[ra]._u32[0] & CPU.GPR[rb]._u32[0]);
CPU.GPR[rt]._u32[1] = ~(CPU.GPR[ra]._u32[1] & CPU.GPR[rb]._u32[1]);
CPU.GPR[rt]._u32[2] = ~(CPU.GPR[ra]._u32[2] & CPU.GPR[rb]._u32[2]);
CPU.GPR[rt]._u32[3] = ~(CPU.GPR[ra]._u32[3] & CPU.GPR[rb]._u32[3]);
+ WRAPPER_END(rt, ra, rb, 0);
}
void AVGB(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = (CPU.GPR[ra]._u8[b] + CPU.GPR[rb]._u8[b] + 1) >> 1;
+ WRAPPER_END(rt, ra, rb, 0);
}
void MTSPR(u32 rt, u32 sa)
{
@@ -373,8 +526,40 @@ private:
}
void WRCH(u32 ra, u32 rt)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(ra, rt, yy, zz);
CPU.WriteChannel(ra, CPU.GPR[rt]);
+ WRAPPER_END(ra, rt, 0, 0);
+ /*GpVar v(c, kVarTypeUInt32);
+ c.mov(v, cpu_dword(GPR[rt]._u32[3]));
+ switch (ra)
+ {
+ case MFC_LSA:
+ c.mov(cpu_dword(MFC1.LSA.m_value[0]), v);
+ break;
+
+ case MFC_EAH:
+ c.mov(cpu_dword(MFC1.EAH.m_value[0]), v);
+ break;
+
+ case MFC_EAL:
+ c.mov(cpu_dword(MFC1.EAL.m_value[0]), v);
+ break;
+
+ case MFC_Size:
+ c.mov(cpu_word(MFC1.Size_Tag.m_val16[1]), v);
+ break;
+
+ case MFC_TagID:
+ c.mov(cpu_word(MFC1.Size_Tag.m_val16[0]), v);
+ break;
+
+ default:
+ {
+ X86X64CallNode* call = c.call(imm_ptr(&WRCH_wrapper::WRCH), kFuncConvHost, FuncBuilder2());
+ call->setArg(0, imm_u(ra));
+ call->setArg(1, v);
+ }
+ }*/
}
void BIZ(u32 rt, u32 ra)
{
@@ -407,7 +592,7 @@ private:
}
void STQX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0x3fff0;
if(!CPU.IsGoodLSA(lsa))
{
@@ -417,19 +602,27 @@ private:
}
CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
+ WRAPPER_END(rt, ra, rb, 0);
}
void BI(u32 ra)
{
- UNIMPLEMENTED();
- CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0));
+ do_finalize = true;
+ c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+ c.shr(*pos_var, 2);
+ //ConLog.Write("BI(ra=%d)", ra);
}
void BISL(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
- const u32 NewPC = CPU.GPR[ra]._u32[3];
- CPU.GPR[rt].Reset();
- CPU.GPR[rt]._u32[3] = CPU.PC + 4;
- CPU.SetBranch(branchTarget(NewPC, 0));
+ do_finalize = true;
+ c.int3();
+ c.xor_(*pos_var, *pos_var);
+ c.mov(cpu_dword(GPR[rt]._u32[0]), *pos_var);
+ c.mov(cpu_dword(GPR[rt]._u32[1]), *pos_var);
+ c.mov(cpu_dword(GPR[rt]._u32[2]), *pos_var);
+ c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+ c.mov(cpu_dword(GPR[rt]._u32[3]), (CPU.PC >> 2) + 1);
+ c.shr(*pos_var, 2);
+ ConLog.Write("BISL(rt=%d,ra=%d)", rt, ra);
}
void IRET(u32 ra)
{
@@ -442,77 +635,90 @@ private:
}
void HBR(u32 p, u32 ro, u32 ra)
{
- UNIMPLEMENTED();
}
void GB(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[0] & 1) |
((CPU.GPR[ra]._u32[1] & 1) << 1) |
((CPU.GPR[ra]._u32[2] & 1) << 2) |
((CPU.GPR[ra]._u32[3] & 1) << 3);
CPU.GPR[rt]._u32[2] = 0;
CPU.GPR[rt]._u64[0] = 0;
+ WRAPPER_END(rt, ra, 0, 0);
+ // TODO
}
void GBH(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
u32 temp = 0;
for (int h = 0; h < 8; h++)
temp |= (CPU.GPR[ra]._u16[h] & 1) << h;
CPU.GPR[rt]._u32[3] = temp;
CPU.GPR[rt]._u32[2] = 0;
CPU.GPR[rt]._u64[0] = 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void GBB(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
u32 temp = 0;
for (int b = 0; b < 16; b++)
temp |= (CPU.GPR[ra]._u8[b] & 1) << b;
CPU.GPR[rt]._u32[3] = temp;
CPU.GPR[rt]._u32[2] = 0;
CPU.GPR[rt]._u64[0] = 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void FSM(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
const u32 pref = CPU.GPR[ra]._u32[3];
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = (pref & (1 << w)) ? ~0 : 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void FSMH(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
const u32 pref = CPU.GPR[ra]._u32[3];
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = (pref & (1 << h)) ? ~0 : 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void FSMB(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
const u32 pref = CPU.GPR[ra]._u32[3];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = (pref & (1 << b)) ? ~0 : 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void FREST(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
- //CPU.GPR[rt]._m128 = _mm_rcp_ps(CPU.GPR[ra]._m128);
+ WRAPPER_BEGIN(rt, ra, yy, zz);
for (int i = 0; i < 4; i++)
CPU.GPR[rt]._f[i] = 1 / CPU.GPR[ra]._f[i];
+ WRAPPER_END(rt, ra, 0, 0);
+ /*XmmVar v0(c);
+ c.rcpps(v0, cpu_xmm(GPR[ra]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void FRSQEST(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
- //const __u32x4 FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
- //CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, FloatAbsMask.m128));
+ WRAPPER_BEGIN(rt, ra, yy, zz);
for (int i = 0; i < 4; i++)
CPU.GPR[rt]._f[i] = 1 / sqrt(abs(CPU.GPR[ra]._f[i]));
+ WRAPPER_END(rt, ra, 0, 0);
+ /*XmmVar v0(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.andps(v0, imm_xmm(max_int));
+ c.rsqrtps(v0, v0);
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void LQX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
u32 a = CPU.GPR[ra]._u32[3], b = CPU.GPR[rb]._u32[3];
u32 lsa = (a + b) & 0x3fff0;
@@ -525,218 +731,290 @@ private:
}
CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa);
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTQBYBI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf;
const SPU_GPR_hdr temp = CPU.GPR[ra];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTQMBYBI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
+ WRAPPER_END(rt, ra, rb, 0);
}
void SHLQBYBI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
+ WRAPPER_END(rt, ra, rb, 0);
}
void CBX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0xF;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u8[15 - t] = 0x03;
+ WRAPPER_END(rt, ra, rb, 0);
}
void CHX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0xE;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u16[7 - (t >> 1)] = 0x0203;
+ WRAPPER_END(rt, ra, rb, 0);
}
void CWX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const u32 t = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xC;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u32[3 - (t >> 2)] = 0x00010203;
+ WRAPPER_END(rt, ra, rb, 0);
}
void CDX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0x8;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u64[1 - (t >> 3)] = (u64)0x0001020304050607;
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTQBI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int t = CPU.GPR[rb]._u32[3] & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t));
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t));
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTQMBI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t));
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t));
CPU.GPR[rt]._u32[3] = (temp._u32[3] >> t);
+ WRAPPER_END(rt, ra, rb, 0);
}
void SHLQBI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int t = CPU.GPR[rb]._u32[3] & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t);
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t));
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTQBY(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = CPU.GPR[rb]._u32[3] & 0xf;
const SPU_GPR_hdr temp = CPU.GPR[ra];
for (int b = 0; b < 16; ++b)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
+ WRAPPER_END(rt, ra, rb, 0);
}
void ROTQMBY(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
+ WRAPPER_END(rt, ra, rb, 0);
}
void SHLQBY(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const int s = CPU.GPR[rb]._u32[3] & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = s; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
+ WRAPPER_END(rt, ra, rb, 0);
}
void ORX(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3];
CPU.GPR[rt]._u32[2] = 0;
CPU.GPR[rt]._u64[0] = 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void CBD(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xF;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u8[15 - t] = 0x03;
+ WRAPPER_END(rt, ra, i7, 0);
}
void CHD(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xE;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u16[7 - (t >> 1)] = 0x0203;
+ WRAPPER_END(rt, ra, i7, 0);
}
void CWD(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xC;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u32[3 - (t >> 2)] = 0x00010203;
+ WRAPPER_END(rt, ra, i7, 0);
+ // TODO
}
void CDD(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int t = (CPU.GPR[ra]._u32[3] + i7) & 0x8;
CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F;
CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617;
CPU.GPR[rt]._u64[1 - (t >> 3)] = (u64)0x0001020304050607;
+ WRAPPER_END(rt, ra, i7, 0);
}
void ROTQBII(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = i7 & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s));
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s));
+ WRAPPER_END(rt, ra, i7, 0);
}
void ROTQMBII(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
- const int s = (0 - i7) & 0x7;
+ WRAPPER_BEGIN(rt, ra, i7, zz);
+ const int s = (0 - (s32)i7) & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s));
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s));
CPU.GPR[rt]._u32[3] = (temp._u32[3] >> s);
+ WRAPPER_END(rt, ra, i7, 0);
}
void SHLQBII(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = i7 & 0x7;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s);
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s));
+ WRAPPER_END(rt, ra, i7, 0);
}
void ROTQBYI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = i7 & 0xf;
const SPU_GPR_hdr temp = CPU.GPR[ra];
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
+ WRAPPER_END(rt, ra, i7, 0);
+ /*const int s = i7 & 0xf;
+
+ XmmVar v0(c);
+ XmmVar v1(c);
+ c.movdqa(v0, cpu_xmm(GPR[ra]));
+ c.movdqa(v1, v0);
+ c.pslldq(v0, s);
+ c.psrldq(v1, 0xf - s);
+ c.por(v0, v1);
+ c.movdqa(cpu_xmm(GPR[rt]), v0);*/
}
void ROTQMBYI(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
- const int s = (0 - i7) & 0x1f;
+ WRAPPER_BEGIN(rt, ra, i7, zz);
+ const int s = (0 - (s32)i7) & 0x1f;
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16 - s; b++)
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
+ WRAPPER_END(rt, ra, i7, 0);
+ /*const int s = (0 - i7) & 0x1f;
+
+ XmmVar v0(c);
+ if (s == 0)
+ {
+ if (ra == rt)
+ {
+ // nop
+ }
+ else
+ {
+ // mov
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }
+ }
+ else if (s > 15)
+ {
+ // zero
+ c.xorps(v0, v0);
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }
+ else
+ {
+ // shift right
+ c.movdqa(v0, cpu_xmm(GPR[ra]));
+ c.psrldq(v0, s);
+ c.movdqa(cpu_xmm(GPR[rt]), v0);
+ }*/
}
void SHLQBYI(u32 rt, u32 ra, s32 i7)
{
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const int s = i7 & 0x1f;
+ const SPU_GPR_hdr temp = CPU.GPR[ra];
+ CPU.GPR[rt].Reset();
+ for (int b = s; b < 16; b++)
+ CPU.GPR[rt]._u8[b] = temp._u8[b - s];
+ WRAPPER_END(rt, ra, i7, 0);
+ /*const int s = i7 & 0x1f;
+
XmmVar v0(c);
if (s == 0)
{
@@ -763,45 +1041,49 @@ private:
c.movdqa(v0, cpu_xmm(GPR[ra]));
c.pslldq(v0, s);
c.movdqa(cpu_xmm(GPR[rt]), v0);
- }
+ }*/
}
void NOP(u32 rt)
{
- UNIMPLEMENTED();
}
void CGT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > CPU.GPR[rb]._i32[w] ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void XOR(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] ^ CPU.GPR[rb]._u32[w];
+ WRAPPER_END(rt, ra, rb, 0);
}
void CGTH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] > CPU.GPR[rb]._i16[h] ? 0xffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void EQV(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] ^ (~CPU.GPR[rb]._u32[w]);
+ WRAPPER_END(rt, ra, rb, 0);
}
void CGTB(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._i8[b] > CPU.GPR[rb]._i8[b] ? 0xff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void SUMB(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
const SPU_GPR_hdr _a = CPU.GPR[ra];
const SPU_GPR_hdr _b = CPU.GPR[rb];
for (int w = 0; w < 4; w++)
@@ -809,6 +1091,7 @@ private:
CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3];
CPU.GPR[rt]._u16[w*2 + 1] = _b._u8[w*4] + _b._u8[w*4 + 1] + _b._u8[w*4 + 2] + _b._u8[w*4 + 3];
}
+ WRAPPER_END(rt, ra, rb, 0);
}
//HGT uses signed values. HLGT uses unsigned values
void HGT(u32 rt, s32 ra, s32 rb)
@@ -818,7 +1101,7 @@ private:
}
void CLZ(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
for (int w = 0; w < 4; w++)
{
int nPos;
@@ -829,135 +1112,178 @@ private:
CPU.GPR[rt]._u32[w] = nPos;
}
+ WRAPPER_END(rt, ra, 0, 0);
}
void XSWD(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._i64[0] = (s64)CPU.GPR[ra]._i32[0];
CPU.GPR[rt]._i64[1] = (s64)CPU.GPR[ra]._i32[2];
+ WRAPPER_END(rt, ra, 0, 0);
}
void XSHW(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = (s32)CPU.GPR[ra]._i16[w*2];
+ WRAPPER_END(rt, ra, 0, 0);
}
void CNTB(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
const SPU_GPR_hdr temp = CPU.GPR[ra];
CPU.GPR[rt].Reset();
for (int b = 0; b < 16; b++)
for (int i = 0; i < 8; i++)
CPU.GPR[rt]._u8[b] += (temp._u8[b] & (1 << i)) ? 1 : 0;
+ WRAPPER_END(rt, ra, 0, 0);
}
void XSBH(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._i16[h] = (s16)CPU.GPR[ra]._i8[h*2];
+ WRAPPER_END(rt, ra, 0, 0);
}
void CLGT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
- for(u32 i = 0; i < 4; ++i)
+ WRAPPER_BEGIN(rt, ra, rb, zz);
+ for (u32 i = 0; i < 4; ++i)
{
CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > CPU.GPR[rb]._u32[i]) ? 0xffffffff : 0x00000000;
}
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
+ if (ra == rb)
+ {
+ // zero
+ c.xorps(v0, v0);
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }
+ else
+ {
+ // compare if-greater-then
+ c.movdqa(v0, cpu_xmm(GPR[rb]));
+ c.psubd(v0, cpu_xmm(GPR[ra]));
+ c.psrad(v0, 32);
+ c.movdqa(cpu_xmm(GPR[rt]), v0);
+ }*/
}
void ANDC(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] & (~CPU.GPR[rb]._u32[w]);
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void FCGT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._f[0] > CPU.GPR[rb]._f[0] ? 0xffffffff : 0;
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._f[1] > CPU.GPR[rb]._f[1] ? 0xffffffff : 0;
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._f[2] > CPU.GPR[rb]._f[2] ? 0xffffffff : 0;
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._f[3] > CPU.GPR[rb]._f[3] ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void DFCGT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u64[0] = CPU.GPR[ra]._d[0] > CPU.GPR[rb]._d[0] ? 0xffffffffffffffff : 0;
CPU.GPR[rt]._u64[1] = CPU.GPR[ra]._d[1] > CPU.GPR[rb]._d[1] ? 0xffffffffffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);;
}
void FA(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] + CPU.GPR[rb]._f[0];
CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] + CPU.GPR[rb]._f[1];
CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] + CPU.GPR[rb]._f[2];
CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] + CPU.GPR[rb]._f[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void FS(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] - CPU.GPR[rb]._f[0];
CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] - CPU.GPR[rb]._f[1];
CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] - CPU.GPR[rb]._f[2];
CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] - CPU.GPR[rb]._f[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void FM(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0];
CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1];
CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2];
CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3];
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.mulps(v0, cpu_xmm(GPR[rb]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void CLGTH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] > CPU.GPR[rb]._u16[h] ? 0xffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void ORC(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] | (~CPU.GPR[rb]._u32[w]);
+ WRAPPER_END(rt, ra, rb, 0);
}
void FCMGT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = fabs(CPU.GPR[ra]._f[0]) > fabs(CPU.GPR[rb]._f[0]) ? 0xffffffff : 0;
CPU.GPR[rt]._u32[1] = fabs(CPU.GPR[ra]._f[1]) > fabs(CPU.GPR[rb]._f[1]) ? 0xffffffff : 0;
CPU.GPR[rt]._u32[2] = fabs(CPU.GPR[ra]._f[2]) > fabs(CPU.GPR[rb]._f[2]) ? 0xffffffff : 0;
CPU.GPR[rt]._u32[3] = fabs(CPU.GPR[ra]._f[3]) > fabs(CPU.GPR[rb]._f[3]) ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFCMGT(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u64[0] = fabs(CPU.GPR[ra]._d[0]) > fabs(CPU.GPR[rb]._d[0]) ? 0xffffffffffffffff : 0;
CPU.GPR[rt]._u64[1] = fabs(CPU.GPR[ra]._d[1]) > fabs(CPU.GPR[rb]._d[1]) ? 0xffffffffffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFA(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] + CPU.GPR[rb]._d[0];
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] + CPU.GPR[rb]._d[1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFS(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] - CPU.GPR[rb]._d[0];
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] - CPU.GPR[rb]._d[1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFM(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0];
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void CLGTB(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] > CPU.GPR[rb]._u8[b] ? 0xff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void HLGT(u32 rt, u32 ra, u32 rb)
{
@@ -966,61 +1292,80 @@ private:
}
void DFMA(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] += CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0];
CPU.GPR[rt]._d[1] += CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFMS(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] - CPU.GPR[rt]._d[0];
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] - CPU.GPR[rt]._d[1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFNMS(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] -= CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0];
CPU.GPR[rt]._d[1] -= CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFNMA(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._d[0] = -(CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] + CPU.GPR[rt]._d[0]);
CPU.GPR[rt]._d[1] = -(CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] + CPU.GPR[rt]._d[1]);
+ WRAPPER_END(rt, ra, rb, 0);
}
void CEQ(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] == CPU.GPR[rb]._i32[w] ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPYHHU(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2+1] * CPU.GPR[rb]._u16[w*2+1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void ADDX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] + CPU.GPR[rb]._u32[w] + (CPU.GPR[rt]._u32[w] & 1);
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void SFX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[rb]._u32[w] - CPU.GPR[ra]._u32[w] - (1 - (CPU.GPR[rt]._u32[w] & 1));
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c), v1(c), v2(c);
+ c.movdqa(v1, imm_xmm(s19_to_s32[1]));
+ c.movdqa(v0, cpu_xmm(GPR[rb]));
+ c.movdqa(v2, cpu_xmm(GPR[rt]));
+ c.psubd(v0, cpu_xmm(GPR[ra]));
+ c.pand(v2, v1);
+ c.paddd(v0, v2);
+ c.psubd(v0, v1);
+ c.movdqa(cpu_xmm(GPR[rt]), v0);*/
}
void CGX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = ((u64)CPU.GPR[ra]._u32[w] + (u64)CPU.GPR[rb]._u32[w] + (u64)(CPU.GPR[rt]._u32[w] & 1)) >> 32;
+ WRAPPER_END(rt, ra, rb, 0);
}
void BGX(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
s64 nResult;
for (int w = 0; w < 4; w++)
@@ -1028,18 +1373,21 @@ private:
nResult = (u64)CPU.GPR[rb]._u32[w] - (u64)CPU.GPR[ra]._u32[w] - (u64)(1 - (CPU.GPR[rt]._u32[w] & 1));
CPU.GPR[rt]._u32[w] = nResult < 0 ? 0 : 1;
}
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPYHHA(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] += CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2+1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPYHHAU(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] += CPU.GPR[ra]._u16[w*2+1] * CPU.GPR[rb]._u16[w*2+1];
+ WRAPPER_END(rt, ra, rb, 0);
}
//Forced bits to 0, hence the shift:
@@ -1052,17 +1400,19 @@ private:
}
void FESD(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._d[0] = (double)CPU.GPR[ra]._f[1];
CPU.GPR[rt]._d[1] = (double)CPU.GPR[ra]._f[3];
+ WRAPPER_END(rt, ra, 0, 0);
}
void FRDS(u32 rt, u32 ra)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._f[1] = (float)CPU.GPR[ra]._d[0];
CPU.GPR[rt]._u32[0] = 0x00000000;
CPU.GPR[rt]._f[3] = (float)CPU.GPR[ra]._d[1];
CPU.GPR[rt]._u32[2] = 0x00000000;
+ WRAPPER_END(rt, ra, 0, 0);
}
void FSCRWR(u32 rt, u32 ra)
{
@@ -1070,7 +1420,7 @@ private:
}
void DFTSV(u32 rt, u32 ra, s32 i7)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i7, zz);
const u64 DoubleExpMask = 0x7ff0000000000000;
const u64 DoubleFracMask = 0x000fffffffffffff;
const u64 DoubleSignMask = 0x8000000000000000;
@@ -1121,83 +1471,99 @@ private:
if ((temp._u64[i] & DoubleExpMask) == DoubleExpMask)
CPU.GPR[rt]._u64[i] = 0xffffffffffffffff;
}
+ WRAPPER_END(rt, ra, i7, 0);
}
void FCEQ(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._f[0] == CPU.GPR[rb]._f[0] ? 0xffffffff : 0;
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._f[1] == CPU.GPR[rb]._f[1] ? 0xffffffff : 0;
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._f[2] == CPU.GPR[rb]._f[2] ? 0xffffffff : 0;
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._f[3] == CPU.GPR[rb]._f[3] ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFCEQ(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u64[0] = CPU.GPR[ra]._d[0] == CPU.GPR[rb]._d[0] ? 0xffffffffffffffff : 0;
CPU.GPR[rt]._u64[1] = CPU.GPR[ra]._d[1] == CPU.GPR[rb]._d[1] ? 0xffffffffffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPY(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2];
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPYH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2]) << 16;
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void MPYHH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2+1];
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPYS(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]) >> 16;
+ WRAPPER_END(rt, ra, rb, 0);
}
void CEQH(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int h = 0; h < 8; h++)
CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] == CPU.GPR[rb]._u16[h] ? 0xffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void FCMEQ(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u32[0] = fabs(CPU.GPR[ra]._f[0]) == fabs(CPU.GPR[rb]._f[0]) ? 0xffffffff : 0;
CPU.GPR[rt]._u32[1] = fabs(CPU.GPR[ra]._f[1]) == fabs(CPU.GPR[rb]._f[1]) ? 0xffffffff : 0;
CPU.GPR[rt]._u32[2] = fabs(CPU.GPR[ra]._f[2]) == fabs(CPU.GPR[rb]._f[2]) ? 0xffffffff : 0;
CPU.GPR[rt]._u32[3] = fabs(CPU.GPR[ra]._f[3]) == fabs(CPU.GPR[rb]._f[3]) ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void DFCMEQ(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._u64[0] = fabs(CPU.GPR[ra]._d[0]) == fabs(CPU.GPR[rb]._d[0]) ? 0xffffffffffffffff : 0;
CPU.GPR[rt]._u64[1] = fabs(CPU.GPR[ra]._d[1]) == fabs(CPU.GPR[rb]._d[1]) ? 0xffffffffffffffff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void MPYU(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2] * CPU.GPR[rb]._u16[w*2];
+ WRAPPER_END(rt, ra, rb, 0);
+ // TODO
}
void CEQB(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] == CPU.GPR[rb]._u8[b] ? 0xff : 0;
+ WRAPPER_END(rt, ra, rb, 0);
}
void FI(u32 rt, u32 ra, u32 rb)
{
- UNIMPLEMENTED();
- //Floating Interpolation: ra will be ignored.
- //It should work correctly if result of preceding FREST or FRSQEST is sufficiently exact
+ WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt] = CPU.GPR[rb];
+ WRAPPER_END(rt, ra, rb, 0);
+ /*XmmVar v0(c);
+ c.movaps(v0, cpu_xmm(GPR[rb]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void HEQ(u32 rt, u32 ra, u32 rb)
{
@@ -1208,30 +1574,38 @@ private:
//0 - 9
void CFLTS(u32 rt, u32 ra, s32 i8)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i8, zz);
const u32 scale = 173 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++)
{
u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale;
- if (exp > 255)
+ if (exp > 255)
exp = 255;
CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23);
CPU.GPR[rt]._u32[i] = (u32)CPU.GPR[rt]._f[i]; //trunc
}
- //CPU.GPR[rt]._m128i = _mm_cvttps_epi32(CPU.GPR[rt]._m128);
+ WRAPPER_END(rt, ra, i8, 0);
+ /*XmmVar v0(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ if (i8 != 173)
+ {
+ c.mulps(v0, imm_xmm(scale_to_int[i8 & 0xff])); // scale
+ }
+ c.cvtps2dq(v0, v0); // convert to ints
+ c.movdqa(cpu_xmm(GPR[rt]), v0);*/
}
void CFLTU(u32 rt, u32 ra, s32 i8)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i8, zz);
const u32 scale = 173 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++)
{
u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale;
- if (exp > 255)
+ if (exp > 255)
exp = 255;
if (CPU.GPR[ra]._u32[i] & 0x80000000) //if negative, result = 0
@@ -1246,11 +1620,21 @@ private:
CPU.GPR[rt]._u32[i] = floor(CPU.GPR[rt]._f[i]);
}
}
+ WRAPPER_END(rt, ra, i8, 0);
+ /*XmmVar v0(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ if (i8 != 173)
+ {
+ c.mulps(v0, imm_xmm(scale_to_int[i8 & 0xff])); // scale
+ }
+ // TODO: handle negative values and convert to unsigned value
+ // c.int3();
+ c.cvtps2dq(v0, v0); // convert to signed ints
+ c.movdqa(cpu_xmm(GPR[rt]), v0);*/
}
void CSFLT(u32 rt, u32 ra, s32 i8)
{
- UNIMPLEMENTED();
- //CPU.GPR[rt]._m128 = _mm_cvtepi32_ps(CPU.GPR[ra]._m128i);
+ WRAPPER_BEGIN(rt, ra, i8, zz);
const u32 scale = 155 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++)
{
@@ -1263,10 +1647,19 @@ private:
CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23);
}
+ WRAPPER_END(rt, ra, i8, 0);
+ /*XmmVar v0(c);
+ c.movdqa(v0, cpu_xmm(GPR[ra]));
+ c.cvtdq2ps(v0, v0); // convert to floats
+ if (i8 != 155)
+ {
+ c.mulps(v0, imm_xmm(scale_to_float[i8 & 0xff])); // scale
+ }
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void CUFLT(u32 rt, u32 ra, s32 i8)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i8, zz);
const u32 scale = 155 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++)
{
@@ -1278,18 +1671,33 @@ private:
CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23);
}
+ WRAPPER_END(rt, ra, i8, 0);
+ /*XmmVar v0(c);
+ c.movdqa(v0, cpu_xmm(GPR[ra]));
+ // TODO: convert from unsigned value
+ // c.int3();
+ c.cvtdq2ps(v0, v0); // convert to floats as signed
+ if (i8 != 155)
+ {
+ c.mulps(v0, imm_xmm(scale_to_float[i8 & 0xff])); // scale
+ }
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
//0 - 8
void BRZ(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
- if (CPU.GPR[rt]._u32[3] == 0)
- CPU.SetBranch(branchTarget(CPU.PC, i16));
+ do_finalize = true;
+ GpVar pos_next(c, kVarTypeUInt32);
+ c.mov(pos_next, (CPU.PC >> 2) + 1);
+ c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
+ c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
+ c.cmovnz(*pos_var, pos_next);
+ //ConLog.Write("BRZ(rt=%d,i16=%d)", rt, i16);
}
void STQA(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i16, yy, zz);
u32 lsa = (i16 << 2) & 0x3fff0;
if(!CPU.IsGoodLSA(lsa))
{
@@ -1299,30 +1707,43 @@ private:
}
CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
+ WRAPPER_END(rt, i16, 0, 0);
}
void BRNZ(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
- if (CPU.GPR[rt]._u32[3] != 0)
- CPU.SetBranch(branchTarget(CPU.PC, i16));
+ do_finalize = true;
+ GpVar pos_next(c, kVarTypeUInt32);
+ c.mov(pos_next, (CPU.PC >> 2) + 1);
+ c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
+ c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
+ c.cmovz(*pos_var, pos_next);
+ //ConLog.Write("BRNZ(rt=%d,i16=%d)", rt, i16);
}
void BRHZ(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
- if (CPU.GPR[rt]._u16[6] == 0)
- CPU.SetBranch(branchTarget(CPU.PC, i16));
+ do_finalize = true;
+ GpVar pos_next(c, kVarTypeUInt32);
+ c.mov(pos_next, (CPU.PC >> 2) + 1);
+ c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
+ c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
+ c.cmovnz(*pos_var, pos_next);
+ ConLog.Write("BRHZ(rt=%d,i16=%d)", rt, i16);
}
void BRHNZ(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
- if (CPU.GPR[rt]._u16[6] != 0)
- CPU.SetBranch(branchTarget(CPU.PC, i16));
+ do_finalize = true;
+ GpVar pos_next(c, kVarTypeUInt32);
+ c.mov(pos_next, (CPU.PC >> 2) + 1);
+ c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
+ c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
+ c.cmovz(*pos_var, pos_next);
+ ConLog.Write("BRHNZ(rt=%d,i16=%d)", rt, i16);
}
void STQR(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
- u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0;
- if(!CPU.IsGoodLSA(lsa))
+ WRAPPER_BEGIN(rt, i16, PC, zz);
+ u32 lsa = branchTarget(PC, i16) & 0x3fff0;
+ if (!CPU.IsGoodLSA(lsa))
{
ConLog.Error("STQR: bad lsa (0x%x)", lsa);
Emu.Pause();
@@ -1330,6 +1751,17 @@ private:
}
CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
+ WRAPPER_END(rt, i16, CPU.PC, 0);
+ /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0;
+
+ GpVar v0(c, kVarTypeUInt64);
+ GpVar v1(c, kVarTypeUInt64);
+ c.mov(v0, cpu_qword(GPR[rt]._u64[0]));
+ c.mov(v1, cpu_qword(GPR[rt]._u64[1]));
+ c.bswap(v0);
+ c.bswap(v1);
+ c.mov(qword_ptr(*ls_var, lsa), v1);
+ c.mov(qword_ptr(*ls_var, lsa + 8), v0);*/
}
void BRA(s32 i16)
{
@@ -1338,7 +1770,7 @@ private:
}
void LQA(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i16, yy, zz);
u32 lsa = (i16 << 2) & 0x3fff0;
if(!CPU.IsGoodLSA(lsa))
{
@@ -1348,6 +1780,7 @@ private:
}
CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa);
+ WRAPPER_END(rt, i16, 0, 0);
}
void BRASL(u32 rt, s32 i16)
{
@@ -1358,25 +1791,57 @@ private:
}
void BR(s32 i16)
{
- UNIMPLEMENTED();
- CPU.SetBranch(branchTarget(CPU.PC, i16));
+ do_finalize = true;
+ c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
+ //ConLog.Write("BR(i16=%d)", i16);
}
void FSMBI(u32 rt, s32 i16)
{
- XmmVar v0(c);
+ WRAPPER_BEGIN(rt, i16, yy, zz);
+ const u32 s = i16;
+
+ for (u32 j = 0; j < 16; ++j)
+ {
+ if ((s >> j) & 0x1)
+ {
+ CPU.GPR[rt]._u8[j] = 0xFF;
+ }
+ else
+ {
+ CPU.GPR[rt]._u8[j] = 0x00;
+ }
+ }
+ WRAPPER_END(rt, i16, 0, 0);
+ /*XmmVar v0(c);
c.movaps(v0, imm_xmm(fsmbi_mask[i16 & 0xffff]));
- c.movaps(cpu_xmm(GPR[rt]), v0);
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void BRSL(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
- CPU.GPR[rt].Reset();
- CPU.GPR[rt]._u32[3] = CPU.PC + 4;
- CPU.SetBranch(branchTarget(CPU.PC, i16));
+ GpVar v0(c, kVarTypeUInt64);
+ c.xor_(v0, v0);
+ c.mov(cpu_qword(GPR[rt]._u64[1]), v0);
+ c.mov(cpu_qword(GPR[rt]._u64[0]), v0);
+ c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4);
+
+ do_finalize = true;
+ c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2);
+ //ConLog.Write("BRSL(rt=%d,i16=%d)", rt, i16);
}
void LQR(u32 rt, s32 i16)
{
- u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0;
+ WRAPPER_BEGIN(rt, i16, PC, zz);
+ u32 lsa = branchTarget(PC, i16) & 0x3fff0;
+ if (!CPU.IsGoodLSA(lsa))
+ {
+ ConLog.Error("LQR: bad lsa (0x%x)", lsa);
+ Emu.Pause();
+ return;
+ }
+
+ CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa);
+ WRAPPER_END(rt, i16, CPU.PC, 0);
+ /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0;
GpVar v0(c, kVarTypeUInt64);
GpVar v1(c, kVarTypeUInt64);
@@ -1385,47 +1850,96 @@ private:
c.bswap(v0);
c.bswap(v1);
c.mov(cpu_qword(GPR[rt]._u64[0]), v1);
- c.mov(cpu_qword(GPR[rt]._u64[1]), v0);
+ c.mov(cpu_qword(GPR[rt]._u64[1]), v0);*/
}
void IL(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i16, yy, zz);
CPU.GPR[rt]._i32[0] =
CPU.GPR[rt]._i32[1] =
CPU.GPR[rt]._i32[2] =
CPU.GPR[rt]._i32[3] = i16;
+ WRAPPER_END(rt, i16, 0, 0);
+ /*XmmVar v0(c);
+ if (i16 == 0)
+ {
+ c.xorps(v0, v0);
+ }
+ else if (i16 == -1)
+ {
+ c.cmpps(v0, v0, 0);
+ }
+ else
+ {
+ c.movaps(v0, imm_xmm(s19_to_s32[i16 & 0x7ffff]));
+ }
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void ILHU(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i16, yy, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = i16 << 16;
+ WRAPPER_END(rt, i16, 0, 0);
+ /*XmmVar v0(c);
+ if (i16 == 0)
+ {
+ c.xorps(v0, v0);
+ }
+ else if (i16 == -1)
+ {
+ c.cmpps(v0, v0, 0);
+ c.pslld(v0, 16);
+ }
+ else
+ {
+ c.movaps(v0, imm_xmm(s19_to_s32[i16 & 0x7ffff]));
+ c.pslld(v0, 16);
+ }
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void ILH(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i16, yy, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._i16[h] = i16;
+ CPU.GPR[rt]._i16[h] = (s32)i16;
+ WRAPPER_END(rt, i16, 0, 0);
}
void IOHL(u32 rt, s32 i16)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i16, yy, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] |= (i16 & 0xFFFF);
+ WRAPPER_END(rt, i16, 0, 0);
+ /*XmmVar v0(c);
+ if (i16 == 0)
+ {
+ // nop
+ }
+ else
+ {
+ c.movaps(v0, cpu_xmm(GPR[rt]));
+ c.orps(v0, imm_xmm(s19_to_s32[i16 & 0xffff]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);
+ }*/
}
//0 - 7
void ORI(u32 rt, u32 ra, s32 i10)
{
- XmmVar v0(c);
- if (i10 == 0)
+ WRAPPER_BEGIN(rt, ra, i10, zz);
+ for (u32 i = 0; i < 4; ++i)
+ CPU.GPR[rt]._i32[i] = CPU.GPR[ra]._i32[i] | (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
+ /*XmmVar v0(c);
+ if (i10 == -1)
{
- // zero
- c.xorps(v0, v0);
+ // fill with 1
+ c.cmpps(v0, v0, 0);
c.movaps(cpu_xmm(GPR[rt]), v0);
}
- else if (i10 == -1)
+ else if (i10 == 0)
{
if (rt == ra)
{
@@ -1443,53 +1957,68 @@ private:
c.movaps(v0, cpu_xmm(GPR[ra]));
c.orps(v0, imm_xmm(s19_to_s32[i10 & 0x7ffff]));
c.movaps(cpu_xmm(GPR[rt]), v0);
- }
+ }*/
}
void ORHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] | i10;
+ CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] | (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void ORBI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int b = 0; b < 16; b++)
- CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] | i10;
+ CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] | (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void SFI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++)
- CPU.GPR[rt]._i32[w] = i10 - CPU.GPR[ra]._i32[w];
+ CPU.GPR[rt]._i32[w] = (s32)i10 - CPU.GPR[ra]._i32[w];
+ WRAPPER_END(rt, ra, i10, 0);
+ // TODO
}
void SFHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._i16[h] = i10 - CPU.GPR[ra]._i16[h];
+ CPU.GPR[rt]._i16[h] = (s32)i10 - CPU.GPR[ra]._i16[h];
+ WRAPPER_END(rt, ra, i10, 0);
}
void ANDI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++)
- CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] & i10;
+ CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] & (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
+ // TODO
}
void ANDHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] & i10;
+ CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] & (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void ANDBI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int b = 0; b < 16; b++)
- CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] & i10;
+ CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] & (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void AI(u32 rt, u32 ra, s32 i10)
{
- XmmVar v0(c);
+ WRAPPER_BEGIN(rt, ra, i10, zz);
+ CPU.GPR[rt]._i32[0] = CPU.GPR[ra]._i32[0] + i10;
+ CPU.GPR[rt]._i32[1] = CPU.GPR[ra]._i32[1] + i10;
+ CPU.GPR[rt]._i32[2] = CPU.GPR[ra]._i32[2] + i10;
+ CPU.GPR[rt]._i32[3] = CPU.GPR[ra]._i32[3] + i10;
+ WRAPPER_END(rt, ra, i10, 0);
+ /*XmmVar v0(c);
if (i10 == 0)
{
if (rt == ra)
@@ -1509,17 +2038,28 @@ private:
c.movdqa(v0, cpu_xmm(GPR[ra]));
c.paddd(v0, imm_xmm(s19_to_s32[i10 & 0x7ffff]));
c.movdqa(cpu_xmm(GPR[rt]), v0);
- }
+ }*/
}
void AHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for(u32 h = 0; h < 8; ++h)
- CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] + i10;
+ CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] + (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void STQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding
{
- GpVar lsa(c, kVarTypeUInt32);
+ WRAPPER_BEGIN(rt, i10, ra, zz);
+ const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0;
+ if (!CPU.IsGoodLSA(lsa))
+ {
+ ConLog.Error("STQD: bad lsa (0x%x)", lsa);
+ Emu.Pause();
+ return;
+ }
+ CPU.WriteLS128(lsa, CPU.GPR[rt]._u128);
+ WRAPPER_END(rt, i10, ra, 0);
+ /*GpVar lsa(c, kVarTypeUInt32);
GpVar v0(c, kVarTypeUInt64);
GpVar v1(c, kVarTypeUInt64);
@@ -1531,13 +2071,13 @@ private:
c.bswap(v0);
c.bswap(v1);
c.mov(qword_ptr(*ls_var, lsa, 0, 0), v1);
- c.mov(qword_ptr(*ls_var, lsa, 0, 8), v0);
+ c.mov(qword_ptr(*ls_var, lsa, 0, 8), v0);*/
}
void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, i10, ra, zz);
const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0;
- if(!CPU.IsGoodLSA(lsa))
+ if (!CPU.IsGoodLSA(lsa))
{
ConLog.Error("LQD: bad lsa (0x%x)", lsa);
Emu.Pause();
@@ -1545,42 +2085,63 @@ private:
}
CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa);
+ WRAPPER_END(rt, i10, ra, 0);
+ /*GpVar lsa(c, kVarTypeUInt32);
+ GpVar v0(c, kVarTypeUInt64);
+ GpVar v1(c, kVarTypeUInt64);
+
+ c.mov(lsa, cpu_dword(GPR[ra]._u32[3]));
+ if (i10) c.add(lsa, i10);
+ c.and_(lsa, 0x3fff0);
+ c.mov(v0, qword_ptr(*ls_var, lsa, 0, 0));
+ c.mov(v1, qword_ptr(*ls_var, lsa, 0, 8));
+ c.bswap(v0);
+ c.bswap(v1);
+ c.mov(cpu_qword(GPR[rt]._u64[0]), v1);
+ c.mov(cpu_qword(GPR[rt]._u64[1]), v0);*/
}
void XORI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++)
- CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] ^ i10;
+ CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] ^ (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void XORHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] ^ i10;
+ CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] ^ (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void XORBI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int b = 0; b < 16; b++)
- CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] ^ i10;
+ CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] ^ (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void CGTI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++)
- CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > i10 ? 0xffffffff : 0;
+ CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > (s32)i10 ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
+ // TODO
}
void CGTHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] > i10 ? 0xffff : 0;
+ CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] > (s32)i10 ? 0xffff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
}
void CGTBI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._i8[b] > (s8)(i10 & 0xff) ? 0xff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
}
void HGTI(u32 rt, u32 ra, s32 i10)
{
@@ -1589,25 +2150,48 @@ private:
}
void CLGTI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
- for(u32 i = 0; i < 4; ++i)
+ WRAPPER_BEGIN(rt, ra, i10, zz);
+ for (int w = 0; w < 4; w++)
+ CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > (s32)i10 ? 0xffffffff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
+ /*XmmVar v0(c);
+ if (i10 == -1)
{
- CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > (u32)i10) ? 0xffffffff : 0x00000000;
+ // zero result
+ c.xorps(v0, v0);
+ c.movaps(cpu_xmm(GPR[rt]), v0);
}
+ else
+ {
+ if (i10 == 0)
+ {
+ // load zero
+ c.pxor(v0, v0);
+ }
+ else
+ {
+ c.movdqa(v0, imm_xmm(s19_to_s32[i10 & 0x7ffff]));
+ }
+ c.psubd(v0, cpu_xmm(GPR[ra]));
+ c.psrad(v0, 32);
+ c.movdqa(cpu_xmm(GPR[rt]), v0);
+ }*/
}
void CLGTHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for(u32 i = 0; i < 8; ++i)
{
- CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)i10) ? 0xffff : 0x0000;
+ CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)(s32)i10) ? 0xffff : 0x0000;
}
+ WRAPPER_END(rt, ra, i10, 0);
}
void CLGTBI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] > (u8)(i10 & 0xff) ? 0xff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
}
void HLGTI(u32 rt, u32 ra, s32 i10)
{
@@ -1616,36 +2200,43 @@ private:
}
void MPYI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++)
- CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * i10;
+ CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * (s32)i10;
+ WRAPPER_END(rt, ra, i10, 0);
}
void MPYUI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2] * (u16)(i10 & 0xffff);
+ WRAPPER_END(rt, ra, i10, 0);
}
void CEQI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for(u32 i = 0; i < 4; ++i)
- CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._i32[i] == i10) ? 0xffffffff : 0x00000000;
+ CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._i32[i] == (s32)i10) ? 0xffffffff : 0x00000000;
+ WRAPPER_END(rt, ra, i10, 0);
+ // TODO
}
void CEQHI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int h = 0; h < 8; h++)
- CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._i16[h] == (s16)i10) ? 0xffff : 0;
+ CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._i16[h] == (s16)(s32)i10) ? 0xffff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
}
void CEQBI(u32 rt, u32 ra, s32 i10)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, i10, zz);
for (int b = 0; b < 16; b++)
CPU.GPR[rt]._i8[b] = (CPU.GPR[ra]._i8[b] == (s8)(i10 & 0xff)) ? 0xff : 0;
+ WRAPPER_END(rt, ra, i10, 0);
}
void HEQI(u32 rt, u32 ra, s32 i10)
{
+ // TODO
UNIMPLEMENTED();
if(CPU.GPR[ra]._i32[3] == i10) CPU.Stop();
}
@@ -1662,35 +2253,57 @@ private:
}
void ILA(u32 rt, u32 i18)
{
- XmmVar v0(c);
- c.movaps(v0, imm_xmm(s19_to_s32[i18 & 0x3ffff]));
- c.movaps(cpu_xmm(GPR[rt]), v0);
+ WRAPPER_BEGIN(rt, i18, yy, zz);
+ CPU.GPR[rt]._u32[0] =
+ CPU.GPR[rt]._u32[1] =
+ CPU.GPR[rt]._u32[2] =
+ CPU.GPR[rt]._u32[3] = i18 & 0x3FFFF;
+ WRAPPER_END(rt, i18, 0, 0);
+ /*XmmVar v0(c);
+ if (i18 == 0)
+ {
+ c.xorps(v0, v0);
+ }
+ else
+ {
+ c.movaps(v0, imm_xmm(s19_to_s32[i18 & 0x3ffff]));
+ }
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
//0 - 3
void SELB(u32 rt, u32 ra, u32 rb, u32 rc)
{
- UNIMPLEMENTED();
- for(u64 i = 0; i < 2; ++i)
+ WRAPPER_BEGIN(rt, ra, rb, rc);
+ for (u64 i = 0; i < 2; ++i)
{
CPU.GPR[rt]._u64[i] =
- ( CPU.GPR[rc]._u64[i] & CPU.GPR[rb]._u64[i]) |
+ (CPU.GPR[rc]._u64[i] & CPU.GPR[rb]._u64[i]) |
(~CPU.GPR[rc]._u64[i] & CPU.GPR[ra]._u64[i]);
}
+ WRAPPER_END(rt, ra, rb, rc);
+ /*XmmVar v0(c);
+ XmmVar v1(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.movaps(v1, cpu_xmm(GPR[rc]));
+ c.andnps(v0, v1);
+ c.andps(v1, cpu_xmm(GPR[rb]));
+ c.orps(v0, v1);
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, rc);
const SPU_GPR_hdr _a = CPU.GPR[ra];
const SPU_GPR_hdr _b = CPU.GPR[rb];
for (int i = 0; i < 16; i++)
{
u8 b = CPU.GPR[rc]._u8[i];
- if(b & 0x80)
+ if (b & 0x80)
{
- if(b & 0x40)
+ if (b & 0x40)
{
- if(b & 0x20)
+ if (b & 0x20)
CPU.GPR[rt]._u8[i] = 0x80;
else
CPU.GPR[rt]._u8[i] = 0xFF;
@@ -1700,42 +2313,59 @@ private:
}
else
{
- if(b & 0x10)
+ if (b & 0x10)
CPU.GPR[rt]._u8[i] = _b._u8[15 - (b & 0x0F)];
else
CPU.GPR[rt]._u8[i] = _a._u8[15 - (b & 0x0F)];
}
}
+ WRAPPER_END(rt, ra, rb, rc);
+ // TODO
}
void MPYA(u32 rt, u32 ra, u32 rb, u32 rc)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, rc);
for (int w = 0; w < 4; w++)
CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2] + CPU.GPR[rc]._i32[w];
+ WRAPPER_END(rt, ra, rb, rc);
}
void FNMS(u32 rt, u32 ra, u32 rb, u32 rc)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, rc);
CPU.GPR[rt]._f[0] = CPU.GPR[rc]._f[0] - CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0];
CPU.GPR[rt]._f[1] = CPU.GPR[rc]._f[1] - CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1];
CPU.GPR[rt]._f[2] = CPU.GPR[rc]._f[2] - CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2];
CPU.GPR[rt]._f[3] = CPU.GPR[rc]._f[3] - CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3];
+ WRAPPER_END(rt, ra, rb, rc);
+ /*XmmVar v0(c), v1(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.mulps(v0, cpu_xmm(GPR[rb]));
+ c.movaps(v1, cpu_xmm(GPR[rc]));
+ c.subps(v1, v0);
+ c.movaps(cpu_xmm(GPR[rt]), v1);*/
}
void FMA(u32 rt, u32 ra, u32 rb, u32 rc)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, rc);
CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] + CPU.GPR[rc]._f[0];
CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] + CPU.GPR[rc]._f[1];
CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] + CPU.GPR[rc]._f[2];
CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] + CPU.GPR[rc]._f[3];
+ WRAPPER_END(rt, ra, rb, rc);
+ /*XmmVar v0(c);
+ c.movaps(v0, cpu_xmm(GPR[ra]));
+ c.mulps(v0, cpu_xmm(GPR[rb]));
+ c.addps(v0, cpu_xmm(GPR[rc]));
+ c.movaps(cpu_xmm(GPR[rt]), v0);*/
}
void FMS(u32 rt, u32 ra, u32 rb, u32 rc)
{
- UNIMPLEMENTED();
+ WRAPPER_BEGIN(rt, ra, rb, rc);
CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] - CPU.GPR[rc]._f[0];
CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] - CPU.GPR[rc]._f[1];
CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] - CPU.GPR[rc]._f[2];
CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] - CPU.GPR[rc]._f[3];
+ WRAPPER_END(rt, ra, rb, rc);
}
void UNK(u32 code, u32 opcode, u32 gcode)
diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp
index bf3662c399..940ef44a4a 100644
--- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp
+++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp
@@ -7,7 +7,7 @@ static const SPUImmTable g_spu_imm;
SPURecompilerCore::SPURecompilerCore(SPUThread& cpu)
: m_enc(new SPURecompiler(cpu, *this))
-, m_inter(new SPUInterpreter(cpu))
+, inter(new SPUInterpreter(cpu))
, CPU(cpu)
, compiler(&runtime)
{
@@ -17,17 +17,17 @@ SPURecompilerCore::SPURecompilerCore(SPUThread& cpu)
SPURecompilerCore::~SPURecompilerCore()
{
delete m_enc;
- delete m_inter;
+ delete inter;
}
void SPURecompilerCore::Decode(const u32 code) // decode instruction and run with interpreter
{
- (*SPU_instr::rrr_list)(m_inter, code);
+ (*SPU_instr::rrr_list)(inter, code);
}
void SPURecompilerCore::Compile(u16 pos)
{
- compiler.addFunc(kFuncConvHost, FuncBuilder4());
+ compiler.addFunc(kFuncConvHost, FuncBuilder4());
entry[pos].host = pos;
GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu");
@@ -45,15 +45,26 @@ void SPURecompilerCore::Compile(u16 pos)
compiler.alloc(imm_var);
m_enc->imm_var = &imm_var;
- GpVar pos_var(compiler, kVarTypeUInt16, "pos");
+ GpVar pos_var(compiler, kVarTypeUInt32, "pos");
compiler.setArg(3, pos_var);
compiler.alloc(pos_var);
+ m_enc->pos_var = &pos_var;
+
+ compiler.xor_(pos_var, pos_var);
+
while (true)
{
const u32 opcode = Memory.Read32(CPU.dmac.ls_offset + pos * 4);
m_enc->do_finalize = false;
- (*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode
+ if (opcode)
+ {
+ (*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode
+ }
+ else
+ {
+ m_enc->do_finalize = true;
+ }
bool fin = m_enc->do_finalize;
entry[pos].valid = opcode;
@@ -63,7 +74,6 @@ void SPURecompilerCore::Compile(u16 pos)
entry[pos].host = entry[pos - 1].host;
}
- compiler.xor_(pos_var, pos_var);
compiler.ret(pos_var);
compiler.endFunc();
entry[entry[pos].host].pointer = compiler.make();
@@ -74,6 +84,7 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
const u64 m_offset = address - CPU.PC;
const u16 pos = (CPU.PC >> 2);
+ //ConLog.Write("DecodeMemory: pos=%d", pos);
u32* ls = (u32*)Memory.VirtualToRealAddr(m_offset);
if (!pos)
@@ -115,16 +126,16 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address)
return 0;
}
// jump
- typedef u16(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u16 _pos);
+ typedef u32(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u32 _pos);
Func func = asmjit_cast(entry[entry[pos].host].pointer);
void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection
- u16 res = pos == entry[pos].host ? 0 : pos;
- res = func(cpu, ls, &g_spu_imm, res);
+ u16 res = (pos == entry[pos].host) ? 0 : pos;
+ res = (u16)func(cpu, ls, &g_spu_imm, res);
- ConLog.Write("func -> %d", res);
+ CPU.SetBranch((u64)res << 2);
return 0;
/*Decode(Memory.Read32(address));
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index e3cc2b8db0..f93e39f4d5 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -3,6 +3,7 @@
#include "Emu/Cell/SPUDecoder.h"
#include "Emu/Cell/SPUInterpreter.h"
#include "Emu/Cell/SPUDisAsm.h"
+#include "Emu/Cell/SPURecompiler.h"
SPUThread& GetCurrentSPUThread()
{
@@ -75,6 +76,8 @@ void SPUThread::DoRun()
break;
case 1:
+ m_dec = new SPURecompilerCore(*this);
+ break;
case 2:
m_dec = new SPUDecoder(*new SPUInterpreter(*this));
break;
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index 5c947664c6..a581130473 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -213,20 +213,21 @@ public:
union SPU_GPR_hdr
{
+ u32 _u32[4];
+ float _f[4];
u128 _u128;
s128 _i128;
__m128 _m128;
__m128i _m128i;
u64 _u64[2];
s64 _i64[2];
- u32 _u32[4];
s32 _i32[4];
u16 _u16[8];
s16 _i16[8];
u8 _u8[16];
s8 _i8[16];
double _d[2];
- float _f[4];
+
SPU_GPR_hdr() {}
@@ -243,9 +244,9 @@ union SPU_GPR_hdr
union SPU_SPR_hdr
{
+ u32 _u32[4];
u128 _u128;
s128 _i128;
- u32 _u32[4];
SPU_SPR_hdr() {}
@@ -299,19 +300,19 @@ public:
#else
static const bool x86 = true;
#endif
-
- private:
union _CRT_ALIGN(8) {
struct {
volatile u32 m_index;
u32 m_value[max_count];
};
+ struct {
+ volatile u32 m_index2;
+ u16 m_val16[max_count * 2];
+ };
volatile u64 m_indval;
};
std::mutex m_lock;
- public:
-
Channel()
{
Init();
@@ -586,7 +587,7 @@ public:
}
}
- Sleep(1); // hack
+ //Sleep(1); // hack
switch(cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_LIST_MASK | MFC_RESULT_MASK))
{
@@ -1125,6 +1126,115 @@ public:
if (Emu.IsStopped()) ConLog.Warning("%s(%s) aborted", __FUNCTION__, spu_ch_name[ch]);
}
+ void DoStop(u32 code)
+ {
+ SetExitStatus(code); // exit code (not status)
+
+ switch (code)
+ {
+ case 0x110: /* ===== sys_spu_thread_receive_event ===== */
+ {
+ u32 spuq = 0;
+ if (!SPU.Out_MBox.Pop(spuq))
+ {
+ ConLog.Error("sys_spu_thread_receive_event: cannot read Out_MBox");
+ SPU.In_MBox.PushUncond(CELL_EINVAL); // ???
+ return;
+ }
+
+ if (SPU.In_MBox.GetCount())
+ {
+ ConLog.Error("sys_spu_thread_receive_event(spuq=0x%x): In_MBox is not empty", spuq);
+ SPU.In_MBox.PushUncond(CELL_EBUSY); // ???
+ return;
+ }
+
+ if (Ini.HLELogging.GetValue())
+ {
+ ConLog.Write("sys_spu_thread_receive_event(spuq=0x%x)", spuq);
+ }
+
+ EventQueue* eq;
+ if (!SPUQs.GetEventQueue(FIX_SPUQ(spuq), eq))
+ {
+ SPU.In_MBox.PushUncond(CELL_EINVAL); // TODO: check error value
+ return;
+ }
+
+ u32 tid = GetId();
+
+ eq->sq.push(tid); // add thread to sleep queue
+
+ while (true)
+ {
+ switch (eq->owner.trylock(tid))
+ {
+ case SMR_OK:
+ if (!eq->events.count())
+ {
+ eq->owner.unlock(tid);
+ break;
+ }
+ else
+ {
+ u32 next = (eq->protocol == SYS_SYNC_FIFO) ? eq->sq.pop() : eq->sq.pop_prio();
+ if (next != tid)
+ {
+ eq->owner.unlock(tid, next);
+ break;
+ }
+ }
+ case SMR_SIGNAL:
+ {
+ sys_event_data event;
+ eq->events.pop(event);
+ eq->owner.unlock(tid);
+ SPU.In_MBox.PushUncond(CELL_OK);
+ SPU.In_MBox.PushUncond(event.data1);
+ SPU.In_MBox.PushUncond(event.data2);
+ SPU.In_MBox.PushUncond(event.data3);
+ return;
+ }
+ case SMR_FAILED: break;
+ default: eq->sq.invalidate(tid); SPU.In_MBox.PushUncond(CELL_ECANCELED); return;
+ }
+
+ Sleep(1);
+ if (Emu.IsStopped())
+ {
+ ConLog.Warning("sys_spu_thread_receive_event(spuq=0x%x) aborted", spuq);
+ eq->sq.invalidate(tid);
+ return;
+ }
+ }
+ }
+ break;
+ case 0x102:
+ if (!SPU.Out_MBox.GetCount())
+ {
+ ConLog.Error("sys_spu_thread_exit (no status, code 0x102)");
+ }
+ else if (Ini.HLELogging.GetValue())
+ {
+ // the real exit status
+ ConLog.Write("sys_spu_thread_exit (status=0x%x)", SPU.Out_MBox.GetValue());
+ }
+ Stop();
+ break;
+ default:
+ if (!SPU.Out_MBox.GetCount())
+ {
+ ConLog.Error("Unknown STOP code: 0x%x (no message)", code);
+ }
+ else
+ {
+ ConLog.Error("Unknown STOP code: 0x%x (message=0x%x)", code, SPU.Out_MBox.GetValue());
+ }
+ Stop();
+ break;
+ }
+ }
+
bool IsGoodLSA(const u32 lsa) const { return Memory.IsGoodAddr(lsa + m_offset) && lsa < 0x40000; }
virtual u8 ReadLS8 (const u32 lsa) const { return Memory.Read8 (lsa + m_offset); } // m_offset & 0x3fffc ?????
virtual u16 ReadLS16 (const u32 lsa) const { return Memory.Read16 (lsa + m_offset); }
diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj
index dc4dfcfc0b..cf6ff47c3d 100644
--- a/rpcs3/rpcs3.vcxproj
+++ b/rpcs3/rpcs3.vcxproj
@@ -393,6 +393,7 @@
+
diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters
index 1e00a296fe..3589bf81ee 100644
--- a/rpcs3/rpcs3.vcxproj.filters
+++ b/rpcs3/rpcs3.vcxproj.filters
@@ -702,5 +702,8 @@
Utilities
+
+ Include
+
\ No newline at end of file