diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 0423c84146..5339012e0d 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -83,12 +83,31 @@ llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) } template <> -v128 cpu_translator::get_const_vector(llvm::Constant* c, u32 a, u32 b) +std::pair cpu_translator::get_const_vector(llvm::Value* c, u32 a, u32 b) { + v128 result{}; + + if (!llvm::isa(c)) + { + return {false, result}; + } + const auto t = c->getType(); if (!t->isVectorTy()) { + if (const auto ci = llvm::dyn_cast(c); ci && ci->getBitWidth() == 128) + { + auto cv = ci->getValue(); + + for (int i = 0; i < 128; i++) + { + result._bit[i] = cv[i]; + } + + return {true, result}; + } + fmt::throw_exception("[0x%x, %u] Not a vector" HERE, a, b); } @@ -106,13 +125,17 @@ v128 cpu_translator::get_const_vector(llvm::Constant* c, u32 a, u32 b) return {}; } + if (llvm::isa(c)) + { + // Sorry, if we cannot evaluate it we cannot use it + fmt::throw_exception("[0x%x, %u] Constant Expression!" HERE, a, b); + } + fmt::throw_exception("[0x%x, %u] Unexpected constant type" HERE, a, b); } const auto sct = t->getScalarType(); - v128 result; - if (sct->isIntegerTy(8)) { for (u32 i = 0; i < 16; i++) @@ -160,12 +183,17 @@ v128 cpu_translator::get_const_vector(llvm::Constant* c, u32 a, u32 b) fmt::throw_exception("[0x%x, %u] Unexpected vector element type" HERE, a, b); } - return result; + return {true, result}; } template <> llvm::Constant* cpu_translator::make_const_vector(v128 v, llvm::Type* t) { + if (const auto ct = llvm::dyn_cast(t); ct && ct->getBitWidth() == 128) + { + return llvm::ConstantInt::get(t, llvm::APInt(128, llvm::makeArrayRef(reinterpret_cast(v._bytes), 2))); + } + verify(HERE), t->isVectorTy() && llvm::cast(t)->getBitWidth() == 128; const auto sct = t->getScalarType(); diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index a72efadabb..2a063d3449 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -2837,7 +2837,7 @@ public: } template - R get_const_vector(llvm::Constant*, u32 a, u32 b); + std::pair get_const_vector(llvm::Value*, u32 a, u32 b); template llvm::Constant* make_const_vector(T, llvm::Type*); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index f189bac2c5..6814344885 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -947,10 +947,8 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op) auto [a, b, c] = get_vrs(op.va, op.vb, op.vc); // Optimization: Emit only a floating multiply if the addend is zero - if (auto cv = llvm::dyn_cast(b.value)) + if (auto [ok, data] = get_const_vector(b.value, m_addr, 2000); ok) { - v128 data = get_const_vector(cv, m_addr, 2000); - if (data == v128{}) { set_vr(op.vd, vec_handle_result(a * c)); @@ -1253,10 +1251,8 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op) auto [a, b, c] = get_vrs(op.va, op.vb, op.vc); // Optimization: Emit only a floating multiply if the addend is zero - if (const auto cv = llvm::dyn_cast(b.value)) + if (const auto [ok, data] = get_const_vector(b.value, m_addr, 2004); ok) { - const v128 data = get_const_vector(cv, m_addr, 2004); - if (data == v128{}) { set_vr(op.vd, vec_handle_result(-a * c)); @@ -1438,10 +1434,8 @@ void PPUTranslator::VSEL(ppu_opcode_t op) const auto c = get_vr(op.vc); // Check if the constant mask doesn't require bit granularity - if (auto ci = llvm::dyn_cast(c.value)) + if (auto [ok, mask] = get_const_vector(c.value, m_addr, 9000); ok) { - v128 mask = get_const_vector(ci, m_addr, 9000); - bool sel_32 = true; for (u32 i = 0; i < 4; i++) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 02242b26d7..39a44f8217 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7088,10 +7088,8 @@ public: const auto c = get_vr(op.rc); // Check if the constant mask doesn't require bit granularity - if (auto ci = llvm::dyn_cast(c.value)) + if (auto [ok, mask] = get_const_vector(c.value, m_pos, 8000); ok) { - v128 mask = get_const_vector(ci, m_pos, 8000); - bool sel_32 = true; for (u32 i = 0; i < 4; i++) { @@ -7192,11 +7190,9 @@ public: const auto c = get_vr(op.rc); - if (auto ci = llvm::dyn_cast(c.value)) + if (auto [ok, mask] = get_const_vector(c.value, m_pos, 57216); ok) { // Optimization: SHUFB with constant mask - v128 mask = get_const_vector(ci, m_pos, 57216); - if (((mask._u64[0] | mask._u64[1]) & 0xe0e0e0e0e0e0e0e0) == 0) { // Trivial insert or constant shuffle (TODO) @@ -7291,10 +7287,8 @@ public: return; } - if (auto ci = llvm::dyn_cast(b.value)) + if (auto [ok, data] = get_const_vector(b.value, m_pos, 7000); ok) { - v128 data = get_const_vector(ci, m_pos, 7000); - const bool all_bytes_equiv = data == v128::from8p(data._u8[0]); if (all_bytes_equiv) { @@ -7310,10 +7304,8 @@ public: if (auto [ok, v0] = match_expr(b, byteswap(match())); ok) { - if (auto ci = llvm::dyn_cast(a.value)) + if (auto [ok, data] = get_const_vector(a.value, m_pos, 7000); ok) { - v128 data = get_const_vector(ci, m_pos, 7000); - const bool all_bytes_equiv = data == v128::from8p(data._u8[0]); if (all_bytes_equiv) { @@ -7542,9 +7534,8 @@ public: const auto a = get_vr(op.ra); const auto b = get_vr(op.rb); - if (auto cv = llvm::dyn_cast(b.value)) + if (auto [ok, data] = get_const_vector(b.value, m_pos, 5000); ok) { - v128 data = get_const_vector(cv, m_pos, 5000); bool safe_int_compare = true; for (u32 i = 0; i < 4; i++) @@ -7569,9 +7560,8 @@ public: } } - if (auto cv = llvm::dyn_cast(a.value)) + if (auto [ok, data] = get_const_vector(a.value, m_pos, 5000); ok) { - v128 data = get_const_vector(cv, m_pos, 5000); bool safe_int_compare = true; for (u32 i = 0; i < 4; i++) @@ -7735,10 +7725,8 @@ public: // Optimization: Emit only a floating multiply if the addend is zero // This is odd since SPU code could just use the FM instruction, but it seems common enough - if (auto cv = llvm::dyn_cast(c.value)) + if (auto [ok, data] = get_const_vector(c.value, m_pos, 4000); ok) { - v128 data = get_const_vector(cv, m_pos, 4000); - if (is_spu_float_zero(data)) { r = eval(a * b); @@ -7746,10 +7734,8 @@ public: } } - if (auto cv = llvm::dyn_cast(b.value)) + if (auto [ok, data] = get_const_vector(b.value, m_pos, 4000); ok) { - v128 data = get_const_vector(cv, m_pos, 4000); - if (is_spu_float_zero(data)) { // Just return the added value if either a or b is 0 @@ -7757,10 +7743,8 @@ public: } } - if (auto cv = llvm::dyn_cast(a.value)) + if (auto [ok, data] = get_const_vector(a.value, m_pos, 4000); ok) { - v128 data = get_const_vector(cv, m_pos, 4000); - if (is_spu_float_zero(data)) { return c; @@ -7995,9 +7979,8 @@ public: value_t a = get_vr(op.ra); value_t r; - if (auto ca = llvm::dyn_cast(a.value)) + if (auto [ok, data] = get_const_vector(a.value, m_pos, 25971); ok) { - v128 data = get_const_vector(ca, m_pos, 25971); r.value = build(data._s32[0], data._s32[1], data._s32[2], data._s32[3]).eval(m_ir); } else @@ -8036,9 +8019,8 @@ public: value_t a = get_vr(op.ra); value_t r; - if (auto ca = llvm::dyn_cast(a.value)) + if (auto [ok, data] = get_const_vector(a.value, m_pos, 20971); ok) { - v128 data = get_const_vector(ca, m_pos, 20971); r.value = build(data._u32[0], data._u32[1], data._u32[2], data._u32[3]).eval(m_ir); } else @@ -8090,9 +8072,8 @@ public: for (auto pair : std::initializer_list, value_t>>{{a, b}, {b, a}}) { - if (auto cv = llvm::dyn_cast(pair.first.value)) + if (auto [ok, data] = get_const_vector(pair.first.value, m_pos, 10000); ok) { - v128 data = get_const_vector(cv, m_pos, 10000); data._u32[3] %= SPU_LS_SIZE; if (data._u32[3] % 0x10 == 0) @@ -8115,9 +8096,8 @@ public: for (auto pair : std::initializer_list, value_t>>{{a, b}, {b, a}}) { - if (auto cv = llvm::dyn_cast(pair.first.value)) + if (auto [ok, data] = get_const_vector(pair.first.value, m_pos, 10000); ok) { - v128 data = get_const_vector(cv, m_pos, 10000); data._u32[3] %= SPU_LS_SIZE; if (data._u32[3] % 0x10 == 0)