From 05b6108c66b2cb5bdbb97ff2412d4d39aa7dc331 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Sat, 16 Sep 2023 18:59:39 -0400 Subject: [PATCH] SPU LLVM: Optimize remaining rotate instructions which take a twos compliment value as input ROTQMBYBI looks for -7 for it's twos compliment construction --- rpcs3/Emu/Cell/SPURecompiler.cpp | 33 ++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 4fa6cf4759..d2098cffd7 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -8095,19 +8095,33 @@ public: void ROTQMBYBI(spu_opcode_t op) { const auto a = get_vr(op.ra); - const auto b = get_vr(op.rb); + const auto b = get_vr(op.rb); + + auto minusb = eval(-(b >> 3)); + if (auto [ok, v0, v1] = match_expr(b, match() - match()); ok) + { + if (auto [ok1, data] = get_const_vector(v0.value, m_pos); ok1) + { + if (data == v128::from32p(7)) + { + minusb = eval(v1 >> 3); + } + } + } + + const auto minusbx = eval(bitcast(minusb) & 0x1f); // Data with swapped endian from a load instruction if (auto [ok, as] = match_expr(a, byteswap(match())); ok) { const auto sc = build(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - const auto sh = sc - (-(splat_scalar(b) >> 3) & 0x1f); + const auto sh = sc - splat_scalar(minusbx); set_vr(op.rt, pshufb(as, sh)); return; } const auto sc = build(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); - const auto sh = sc + (-(splat_scalar(b) >> 3) & 0x1f); + const auto sh = sc + splat_scalar(minusbx); set_vr(op.rt, pshufb(a, sh)); } @@ -8200,9 +8214,16 @@ public: void ROTQMBI(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = splat_scalar(-get_vr(op.rb) & 0x7); - set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); + const auto [a, b] = get_vrs(op.ra, op.rb); + + auto minusb = eval(-b); + if (auto [ok, x] = match_expr(b, -match()); ok) + { + minusb = eval(x); + } + + const auto bx = splat_scalar(minusb) & 0x7; + set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, bx)); } void SHLQBI(spu_opcode_t op)