diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 160a17bc2c..2722317850 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -214,6 +214,11 @@ public: void fctiwzx(UGeckoInstruction _inst); void fcmpo(UGeckoInstruction _inst); void fcmpu(UGeckoInstruction _inst); + void fnmaddx(UGeckoInstruction _inst); + void fnmaddsx(UGeckoInstruction _inst); + void fresx(UGeckoInstruction _inst); + void fselx(UGeckoInstruction _inst); + void frsqrtex(UGeckoInstruction _inst); // Floating point loadStore void lfXX(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index eb0501002c..8c600fd588 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -541,3 +541,146 @@ void JitArm::fmaddx(UGeckoInstruction inst) if (inst.Rc) Helper_UpdateCR1(vD0); } + +void JitArm::fnmaddx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + ARMReg vA0 = fpr.R0(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vC0 = fpr.R0(c); + ARMReg vD0 = fpr.R0(d, false); + + ARMReg V0 = fpr.GetReg(); + + VMOV(V0, vB0); + + VMLA(V0, vA0, vC0); + + VNEG(vD0, V0); + + fpr.Unlock(V0); + + if (inst.Rc) Helper_UpdateCR1(vD0); +} +void JitArm::fnmaddsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + ARMReg vA0 = fpr.R0(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vC0 = fpr.R0(c); + ARMReg vD0 = fpr.R0(d, false); + ARMReg vD1 = fpr.R1(d, false); + + ARMReg V0 = fpr.GetReg(); + + VMOV(V0, vB0); + + VMLA(V0, vA0, vC0); + + VNEG(vD0, V0); + VNEG(vD1, V0); + + fpr.Unlock(V0); + + if (inst.Rc) Helper_UpdateCR1(vD0); +} + +// XXX: Messes up Super Mario Sunshine title screen +void JitArm::fresx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + + u32 b = inst.FB, d = inst.FD; + + Default(inst); return; + + ARMReg vB0 = fpr.R0(b); + ARMReg vD0 = fpr.R0(d, false); + ARMReg vD1 = fpr.R1(d, false); + + ARMReg V0 = fpr.GetReg(); + MOVI2R(V0, 1.0, INVALID_REG); // temp reg isn't needed for 1.0 + + VDIV(vD1, V0, vB0); + VDIV(vD0, V0, vB0); + fpr.Unlock(V0); + + if (inst.Rc) Helper_UpdateCR1(vD0); +} + +void JitArm::fselx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff) + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + if (inst.Rc) { + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vC0 = fpr.R0(c); + ARMReg vD0 = fpr.R0(d, false); + + VCMP(vA0); + VMRS(_PC); + + FixupBranch GT0 = B_CC(CC_GE); + VMOV(vD0, vB0); + FixupBranch EQ0 = B(); + SetJumpTarget(GT0); + VMOV(vD0, vC0); + SetJumpTarget(EQ0); +} + +void JitArm::frsqrtex(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff) + + u32 b = inst.FB, d = inst.FD; + if (inst.Rc){ + Default(inst); return; + } + ARMReg vB0 = fpr.R0(b); + ARMReg vD0 = fpr.R0(d, false); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg V0 = D1; + ARMReg rA = gpr.GetReg(); + + MOVI2R(fpscrReg, (u32)&PPC_NAN); + VLDR(V0, fpscrReg, 0); + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + + VCMP(vB0); + VMRS(_PC); + FixupBranch Less0 = B_CC(CC_LT); + VMOV(vD0, V0); + SetFPException(fpscrReg, FPSCR_VXSQRT); + FixupBranch SkipOrr0 = B(); + SetJumpTarget(Less0); + FixupBranch noException = B_CC(CC_EQ); + SetFPException(fpscrReg, FPSCR_ZX); + SetJumpTarget(noException); + SetJumpTarget(SkipOrr0); + + VCVT(S0, vB0, 0); + + NEONXEmitter nemit(this); + nemit.VRSQRTE(F_32, D0, D0); + VCVT(vD0, S0, 0); + + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, rA); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index 848f5338b5..9eea997317 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -336,12 +336,12 @@ static GekkoOPTemplate table59[] = {20, &JitArm::fsubsx}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {21, &JitArm::faddsx}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, // {22, &JitArm::Default}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko - {24, &JitArm::Default}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, + {24, &JitArm::fresx}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, &JitArm::fmulsx}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitArm::Default}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitArm::fmaddsx}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &JitArm::Default}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArm::Default}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, }; static GekkoOPTemplate table63[] = @@ -370,13 +370,13 @@ static GekkoOPTemplate table63_2[] = {20, &JitArm::fsubx}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {21, &JitArm::faddx}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, {22, &JitArm::Default}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, - {23, &JitArm::Default}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, + {23, &JitArm::fselx}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, &JitArm::fmulx}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, &JitArm::Default}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, &JitArm::frsqrtex}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitArm::Default}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitArm::fmaddx}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &JitArm::Default}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, - {31, &JitArm::Default}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm::fnmaddx}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, };