From f1a67bb1a2cb8e8bbdf564c262b131deef31bd87 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 27 Oct 2016 00:32:04 +0200 Subject: [PATCH] JitArm64: Implement divwx --- Source/Core/Common/Arm64Emitter.h | 5 ++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../PowerPC/JitArm64/JitArm64_Integer.cpp | 73 +++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 60 +++++++-------- 4 files changed, 109 insertions(+), 30 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index ea69c371c3..87d89be9a2 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -651,6 +651,11 @@ public: ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR; CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1)); } + void CSETM(ARM64Reg Rd, CCFlags cond) + { + ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR; + CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1)); + } void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs); } // Data-Processing 1 source void RBIT(ARM64Reg Rd, ARM64Reg Rn); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 297bda67d9..5225ab339c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -81,6 +81,7 @@ public: void addic(UGeckoInstruction inst); void mulli(UGeckoInstruction inst); void addzex(UGeckoInstruction inst); + void divwx(UGeckoInstruction inst); void subfx(UGeckoInstruction inst); void addcx(UGeckoInstruction inst); void slwx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 3099f31017..1ba3ca5ad5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1009,6 +1009,79 @@ void JitArm64::divwux(UGeckoInstruction inst) } } +void JitArm64::divwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + s32 imm_a = gpr.GetImm(a); + s32 imm_b = gpr.GetImm(b); + s32 imm_d; + if (imm_b == 0 || ((u32)imm_a == 0x80000000 && imm_b == -1)) + { + if (((u32)imm_a & 0x80000000) && imm_b == 0) + imm_d = -1; + else + imm_d = 0; + } + else + { + imm_d = (u32)(imm_a / imm_b); + } + gpr.SetImmediate(d, imm_d); + + if (inst.Rc) + ComputeRC(imm_d); + } + else if (gpr.IsImm(b) && gpr.GetImm(b) != 0 && gpr.GetImm(b) != -1) + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, gpr.GetImm(b)); + + gpr.BindToRegister(d, d == a); + + SDIV(gpr.R(d), gpr.R(a), WA); + + gpr.Unlock(WA); + + if (inst.Rc) + ComputeRC(gpr.R(d)); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + + ARM64Reg WA = gpr.GetReg(); + + FixupBranch slow1 = CBZ(gpr.R(b)); + MOVI2R(WA, -0x80000000LL); + CMP(gpr.R(a), WA); + CCMN(gpr.R(b), 1, 0, CC_EQ); + FixupBranch slow2 = B(CC_EQ); + SDIV(gpr.R(d), gpr.R(a), gpr.R(b)); + FixupBranch done = B(); + + SetJumpTarget(slow1); + SetJumpTarget(slow2); + + CMP(gpr.R(b), 0); + CCMP(gpr.R(a), 0, 0, CC_EQ); + CSETM(gpr.R(d), CC_LT); + + SetJumpTarget(done); + + gpr.Unlock(WA); + + if (inst.Rc) + ComputeRC(gpr.R(d)); + } +} + void JitArm64::slwx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 340342665b..b928c831ee 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -178,36 +178,36 @@ static GekkoOPTemplate table19[] = { }; static GekkoOPTemplate table31[] = { - {266, &JitArm64::addx}, // addx - {778, &JitArm64::addx}, // addox - {10, &JitArm64::addcx}, // addcx - {522, &JitArm64::addcx}, // addcox - {138, &JitArm64::addex}, // addex - {650, &JitArm64::addex}, // addeox - {234, &JitArm64::FallBackToInterpreter}, // addmex - {746, &JitArm64::FallBackToInterpreter}, // addmeox - {202, &JitArm64::addzex}, // addzex - {714, &JitArm64::addzex}, // addzeox - {491, &JitArm64::FallBackToInterpreter}, // divwx - {1003, &JitArm64::FallBackToInterpreter}, // divwox - {459, &JitArm64::divwux}, // divwux - {971, &JitArm64::divwux}, // divwuox - {75, &JitArm64::mulhwx}, // mulhwx - {11, &JitArm64::mulhwux}, // mulhwux - {235, &JitArm64::mullwx}, // mullwx - {747, &JitArm64::mullwx}, // mullwox - {104, &JitArm64::negx}, // negx - {616, &JitArm64::negx}, // negox - {40, &JitArm64::subfx}, // subfx - {552, &JitArm64::subfx}, // subfox - {8, &JitArm64::subfcx}, // subfcx - {520, &JitArm64::subfcx}, // subfcox - {136, &JitArm64::subfex}, // subfex - {648, &JitArm64::subfex}, // subfeox - {232, &JitArm64::FallBackToInterpreter}, // subfmex - {744, &JitArm64::FallBackToInterpreter}, // subfmeox - {200, &JitArm64::FallBackToInterpreter}, // subfzex - {712, &JitArm64::FallBackToInterpreter}, // subfzeox + {266, &JitArm64::addx}, // addx + {778, &JitArm64::addx}, // addox + {10, &JitArm64::addcx}, // addcx + {522, &JitArm64::addcx}, // addcox + {138, &JitArm64::addex}, // addex + {650, &JitArm64::addex}, // addeox + {234, &JitArm64::FallBackToInterpreter}, // addmex + {746, &JitArm64::FallBackToInterpreter}, // addmeox + {202, &JitArm64::addzex}, // addzex + {714, &JitArm64::addzex}, // addzeox + {491, &JitArm64::divwx}, // divwx + {1003, &JitArm64::divwx}, // divwox + {459, &JitArm64::divwux}, // divwux + {971, &JitArm64::divwux}, // divwuox + {75, &JitArm64::mulhwx}, // mulhwx + {11, &JitArm64::mulhwux}, // mulhwux + {235, &JitArm64::mullwx}, // mullwx + {747, &JitArm64::mullwx}, // mullwox + {104, &JitArm64::negx}, // negx + {616, &JitArm64::negx}, // negox + {40, &JitArm64::subfx}, // subfx + {552, &JitArm64::subfx}, // subfox + {8, &JitArm64::subfcx}, // subfcx + {520, &JitArm64::subfcx}, // subfcox + {136, &JitArm64::subfex}, // subfex + {648, &JitArm64::subfex}, // subfeox + {232, &JitArm64::FallBackToInterpreter}, // subfmex + {744, &JitArm64::FallBackToInterpreter}, // subfmeox + {200, &JitArm64::FallBackToInterpreter}, // subfzex + {712, &JitArm64::FallBackToInterpreter}, // subfzeox {28, &JitArm64::boolX}, // andx {60, &JitArm64::boolX}, // andcx