From 4aa0c0133a51de5b1835fe1579dff1ce4bf4c6bc Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 19:01:38 +0100 Subject: [PATCH 01/11] JitArm64: Introduce MultiplyImmediate Add a new function that will handle all the special cases regarding multiplication. It does nothing for now, but will be expanded in follow-up commits. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index e9abdbbc4b..9372fffd2d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -332,6 +332,7 @@ protected: void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64, Arm64Gen::ARM64Reg), bool Rc = false); + bool MultiplyImmediate(u32 imm, int a, int d, bool rc); void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg); void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 2fc1524809..046a4f9c1b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -876,6 +876,11 @@ void JitArm64::addic(UGeckoInstruction inst) } } +bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) +{ + return false; +} + void JitArm64::mulli(UGeckoInstruction inst) { INSTRUCTION_START From 53a8cd15638af5c80810e083daa3f13f74084144 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 19:04:50 +0100 Subject: [PATCH 02/11] JitArm64: mulli - Use MultiplyImmediate --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 046a4f9c1b..ef0253b9e7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -893,6 +893,10 @@ void JitArm64::mulli(UGeckoInstruction inst) s32 i = (s32)gpr.GetImm(a); gpr.SetImmediate(d, i * inst.SIMM_16); } + else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false)) + { + // Code is generated inside MultiplyImmediate, nothing to be done here. + } else { gpr.BindToRegister(d, d == a); From 080513284cf0a80a548de331f8a9729eeb5a49d9 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 19:05:33 +0100 Subject: [PATCH 03/11] JitArm64: mullwx - Use MultiplyImmediate --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index ef0253b9e7..a90bf24077 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -922,6 +922,11 @@ void JitArm64::mullwx(UGeckoInstruction inst) if (inst.Rc) ComputeRC0(gpr.GetImm(d)); } + else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) || + (gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc))) + { + // Code is generated inside MultiplyImmediate, nothing to be done here. + } else { gpr.BindToRegister(d, d == a || d == b); From 51cb918aa5fbc26c1cdb78dc0193cb29e9df5201 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 19:26:34 +0100 Subject: [PATCH 04/11] JitArm64: MultiplyImmediate - Handle 0 Multiplication by zero always gives zero. Before: 0x52800019 mov w25, #0x0 0x1b197f5b mul w27, w26, w25 After: Nothing! --- .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a90bf24077..96e4325106 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -878,7 +878,19 @@ void JitArm64::addic(UGeckoInstruction inst) bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) { - return false; + if (imm == 0) + { + gpr.SetImmediate(d, 0); + if (rc) + ComputeRC0(gpr.GetImm(d)); + } + else + { + // Immediate did not match any known special cases. + return false; + } + + return true; } void JitArm64::mulli(UGeckoInstruction inst) From f25611f388e34ec3610a90f8f862cb0ad5679ef9 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 19:58:27 +0100 Subject: [PATCH 05/11] JitArm64: MultiplyImmediate - Handle 1 Multiplication by one is also trivial. Depending on the registers involved, either a single MOV or no instructions will be generated. Before: 0x52800038 mov w24, #0x1 0x1b1a7f1b mul w27, w24, w26 After: 0x2a1a03fb mov w27, w26 Before: 0x52800039 mov w25, #0x1 0x1b1a7f3a mul w26, w25, w26 After: Nothing! --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 96e4325106..fb56ce7060 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -884,6 +884,16 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) if (rc) ComputeRC0(gpr.GetImm(d)); } + else if (imm == 1) + { + if (d != a) + { + gpr.BindToRegister(d, false); + MOV(gpr.R(d), gpr.R(a)); + } + if (rc) + ComputeRC0(gpr.R(d)); + } else { // Immediate did not match any known special cases. From 3aaf1a2b8bad78d648c15cd68c24ce6aa362e3e6 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 20:58:16 +0100 Subject: [PATCH 06/11] JitArm64: MultiplyImmediate - Handle 2^n Turn multiplications by a power of two into bitshifts. Before: 0x52800817 mov w23, #0x40 0x1b167ef6 mul w22, w23, w22 After: 0x531a66d6 lsl w22, w22, #6 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index fb56ce7060..70dfea5f99 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -7,6 +7,7 @@ #include "Common/Assert.h" #include "Common/BitUtils.h" #include "Common/CommonTypes.h" +#include "Common/MathUtil.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -894,6 +895,15 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) if (rc) ComputeRC0(gpr.R(d)); } + else if (MathUtil::IsPow2(imm)) + { + const int shift = IntLog2(imm); + + gpr.BindToRegister(d, d == a); + LSL(gpr.R(d), gpr.R(a), shift); + if (rc) + ComputeRC0(gpr.R(d)); + } else { // Immediate did not match any known special cases. From c349875cdca6ee5abc2f78d1e0c3a09c8716256f Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 21:06:43 +0100 Subject: [PATCH 07/11] JitArm64: MultiplyImmediate - Handle 2^n + 1 By taking advantage of ARM64's ability to shift an input register by any amount, we can calculate multiplication by a number that is one more than a power of two with a single instruction. Before: 0x52800838 mov w24, #0x41 0x1b187f7b mul w27, w27, w24 After: 0x0b1b1b7b add w27, w27, w27, lsl #6 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 70dfea5f99..85554829df 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -904,6 +904,15 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) if (rc) ComputeRC0(gpr.R(d)); } + else if (MathUtil::IsPow2(imm - 1)) + { + const int shift = IntLog2(imm - 1); + + gpr.BindToRegister(d, d == a); + ADD(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift)); + if (rc) + ComputeRC0(gpr.R(d)); + } else { // Immediate did not match any known special cases. From 20dd5cadab461843a5f5f9def26638bb5bc2c3a4 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 21:17:49 +0100 Subject: [PATCH 08/11] JitArm64: MultiplyImmediate - Add comments --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 85554829df..7d0e797340 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -881,12 +881,14 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) { if (imm == 0) { + // Multiplication by zero (0). gpr.SetImmediate(d, 0); if (rc) ComputeRC0(gpr.GetImm(d)); } else if (imm == 1) { + // Multiplication by one (1). if (d != a) { gpr.BindToRegister(d, false); @@ -897,6 +899,7 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) } else if (MathUtil::IsPow2(imm)) { + // Multiplication by a power of two (2^n). const int shift = IntLog2(imm); gpr.BindToRegister(d, d == a); @@ -906,6 +909,7 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) } else if (MathUtil::IsPow2(imm - 1)) { + // Multiplication by a power of two plus one (2^n + 1). const int shift = IntLog2(imm - 1); gpr.BindToRegister(d, d == a); From 1c87f040a3b0771f9b5d231eb5dee620d15b7ed6 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Tue, 1 Nov 2022 21:28:41 +0100 Subject: [PATCH 09/11] JitArm64: mulli - Only allocate reg when necessary If the destination register doesn't equal the input register, using it to temporarily hold the immediate value is fair game as it'll be overwritten with the result of the multiplication anyway. This can slightly reduce register pressure. Before: 0x52800659 mov w25, #0x32 0x1b197f5b mul w27, w26, w25 After: 0x5280065b mov w27, #0x32 0x1b1b7f5b mul w27, w26, w27 --- .../Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 7d0e797340..6cd902d0c9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -944,11 +944,16 @@ void JitArm64::mulli(UGeckoInstruction inst) } else { - gpr.BindToRegister(d, d == a); - ARM64Reg WA = gpr.GetReg(); + const bool allocate_reg = d == a; + gpr.BindToRegister(d, allocate_reg); + + // Reuse d to hold the immediate if possible, allocate a register otherwise. + ARM64Reg WA = allocate_reg ? gpr.GetReg() : gpr.R(d); + MOVI2R(WA, (u32)(s32)inst.SIMM_16); MUL(gpr.R(d), gpr.R(a), WA); - gpr.Unlock(WA); + if (allocate_reg) + gpr.Unlock(WA); } } From 7073a135c659d59f1874b69250d54f34cfb33da2 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Wed, 2 Nov 2022 21:33:47 +0100 Subject: [PATCH 10/11] JitArm64: MultiplyImmediate - Handle -(2^n) ARM64's flexible shifting of input registers also allows us to calculate a negative power of two in one instruction; shift the input of a NEG instruction. Before: 0x128001f7 mov w23, #-0x10 0x1b1a7efa mul w26, w23, w26 0x93407f58 sxtw x24, w26 After: 0x4b1a13fa neg w26, w26, lsl #4 0x93407f58 sxtw x24, w26 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 6cd902d0c9..e3dac61ed0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -917,6 +917,16 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) if (rc) ComputeRC0(gpr.R(d)); } + else if (MathUtil::IsPow2(~imm + 1)) + { + // Multiplication by a negative power of two (-(2^n)). + const int shift = IntLog2(~imm + 1); + + gpr.BindToRegister(d, d == a); + NEG(gpr.R(d), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift)); + if (rc) + ComputeRC0(gpr.R(d)); + } else { // Immediate did not match any known special cases. From 274e34ddf175cb6feae7c7b0465d3e02eb6e3902 Mon Sep 17 00:00:00 2001 From: Bram Speeckaert Date: Wed, 2 Nov 2022 21:41:03 +0100 Subject: [PATCH 11/11] JitArm64: MultiplyImmediate - Handle -(2^n) + 1 Let's take advantage of ARM64's input register shifting one last time, shall we? Before: 0x1280005b mov w27, #-0x3 0x1b1b7f18 mul w24, w24, w27 After: 0x4b180b18 sub w24, w24, w24, lsl #2 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index e3dac61ed0..4810e875b3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -927,6 +927,16 @@ bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) if (rc) ComputeRC0(gpr.R(d)); } + else if (MathUtil::IsPow2(~imm + 2)) + { + // Multiplication by a negative power of two plus one (-(2^n) + 1). + const int shift = IntLog2(~imm + 2); + + gpr.BindToRegister(d, d == a); + SUB(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift)); + if (rc) + ComputeRC0(gpr.R(d)); + } else { // Immediate did not match any known special cases.