From 5ee7f861997a5a3a6dafde8844eece873468d532 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Fri, 11 Aug 2017 23:32:36 +0200 Subject: [PATCH] JitArm64: Optimize rlwinmx. The new code adds fast paths for most usages which fits in one instruction with one cycle latency. --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index fd38472059..c9c3e57275 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -516,11 +516,28 @@ void JitArm64::rlwinmx(UGeckoInstruction inst) gpr.BindToRegister(a, a == s); - ARM64Reg WA = gpr.GetReg(); - ArithOption Shift(gpr.R(s), ST_ROR, 32 - inst.SH); - MOVI2R(WA, mask); - AND(gpr.R(a), WA, gpr.R(s), Shift); - gpr.Unlock(WA); + if (!inst.SH) + { + // Immediate mask + ANDI2R(gpr.R(a), gpr.R(s), mask); + } + else if (inst.ME == 31 && 31 < inst.SH + inst.MB) + { + // Bit select of the upper part + UBFX(gpr.R(a), gpr.R(s), 32 - inst.SH, 32 - inst.MB); + } + else if (inst.ME == 31 - inst.SH && 32 > inst.SH + inst.MB) + { + // Bit select of the lower part + UBFIZ(gpr.R(a), gpr.R(s), inst.SH, 32 - inst.SH - inst.MB); + } + else + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, mask); + AND(gpr.R(a), WA, gpr.R(s), ArithOption(gpr.R(s), ST_ROR, 32 - inst.SH)); + gpr.Unlock(WA); + } if (inst.Rc) ComputeRC(gpr.R(a), 0);