From 9f829b375a69ee933af1c90c2dc78fe28e872ba0 Mon Sep 17 00:00:00 2001 From: Whatcookie Date: Sat, 25 Jul 2020 12:59:35 -0400 Subject: [PATCH] SPU/PPU LLVM: Optimize VSEL/SELB with constant mask (#8559) --- rpcs3/Emu/Cell/PPUTranslator.cpp | 59 ++++++++++++++++++++++++++- rpcs3/Emu/Cell/SPURecompiler.cpp | 68 +++++++++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 4d84c79572..f189bac2c5 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1435,7 +1435,64 @@ void PPUTranslator::VRSQRTEFP(ppu_opcode_t op) void PPUTranslator::VSEL(ppu_opcode_t op) { - const auto [a, b, c] = get_vrs(op.va, op.vb, op.vc); + const auto c = get_vr(op.vc); + + // Check if the constant mask doesn't require bit granularity + if (auto ci = llvm::dyn_cast(c.value)) + { + v128 mask = get_const_vector(ci, m_addr, 9000); + + bool sel_32 = true; + for (u32 i = 0; i < 4; i++) + { + if (mask._u32[i] && mask._u32[i] != 0xFFFFFFFF) + { + sel_32 = false; + break; + } + } + + if (sel_32) + { + set_vr(op.vd, select(noncast(c) != 0, get_vr(op.vb), get_vr(op.va))); + return; + } + + bool sel_16 = true; + for (u32 i = 0; i < 8; i++) + { + if (mask._u16[i] && mask._u16[i] != 0xFFFF) + { + sel_16 = false; + break; + } + } + + if (sel_16) + { + set_vr(op.vd, select(bitcast(c) != 0, get_vr(op.vb), get_vr(op.va))); + return; + } + + + bool sel_8 = true; + for (u32 i = 0; i < 16; i++) + { + if (mask._u8[i] && mask._u8[i] != 0xFF) + { + sel_8 = false; + break; + } + } + + if (sel_8) + { + set_vr(op.vd, select(bitcast(c) != 0,get_vr(op.vb), get_vr(op.va))); + return; + } + } + + const auto [a, b] = get_vrs(op.va, op.vb); set_vr(op.vd, eval((b & c) | (a & ~c))); } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 4cfb485803..2c87ffe6c8 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7057,6 +7057,73 @@ public: return; } + const auto c = get_vr(op.rc); + + // Check if the constant mask doesn't require bit granularity + if (auto ci = llvm::dyn_cast(c.value)) + { + v128 mask = get_const_vector(ci, m_pos, 8000); + + bool sel_32 = true; + for (u32 i = 0; i < 4; i++) + { + if (mask._u32[i] && mask._u32[i] != 0xFFFFFFFF) + { + sel_32 = false; + break; + } + } + + if (sel_32) + { + if (auto [a, b] = match_vrs(op.ra, op.rb); a || b) + { + set_vr(op.rt4, select(noncast(c) != 0, get_vr(op.rb), get_vr(op.ra))); + return; + } + else if (auto [a, b] = match_vrs(op.ra, op.rb); a || b) + { + set_vr(op.rt4, select(noncast(c) != 0, get_vr(op.rb), get_vr(op.ra))); + return; + } + + set_vr(op.rt4, select(noncast(c) != 0, get_vr(op.rb), get_vr(op.ra))); + return; + } + + bool sel_16 = true; + for (u32 i = 0; i < 8; i++) + { + if (mask._u16[i] && mask._u16[i] != 0xFFFF) + { + sel_16 = false; + break; + } + } + + if (sel_16) + { + set_vr(op.rt4, select(bitcast(c) != 0, get_vr(op.rb), get_vr(op.ra))); + return; + } + + bool sel_8 = true; + for (u32 i = 0; i < 16; i++) + { + if (mask._u8[i] && mask._u8[i] != 0xFF) + { + sel_8 = false; + break; + } + } + + if (sel_8) + { + set_vr(op.rt4, select(bitcast(c) != 0,get_vr(op.rb), get_vr(op.ra))); + return; + } + } + const auto op1 = get_reg_raw(op.rb); const auto op2 = get_reg_raw(op.ra); @@ -7073,7 +7140,6 @@ public: return; } - const auto c = get_vr(op.rc); set_vr(op.rt4, (get_vr(op.rb) & c) | (get_vr(op.ra) & ~c)); }