From 524aac75ed0b6c7405134106a2ad8911d16c70ba Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 18 Apr 2019 17:18:46 +0300 Subject: [PATCH] LLVM DSL: rewrite bitcast, zext, sext, trunc, select, min, max ops Are made composable in expressions similar to arithmetic ops. Implement noncast in addition to bitcast (no-op case). Implement bitcast constant folding. Fixed some misuse of sext<>. --- rpcs3/Emu/CPU/CPUTranslator.h | 306 ++++++++++++++++++++++++++----- rpcs3/Emu/Cell/PPUThread.cpp | 3 +- rpcs3/Emu/Cell/PPUTranslator.cpp | 6 +- rpcs3/Emu/Cell/SPURecompiler.cpp | 133 +++++++------- 4 files changed, 324 insertions(+), 124 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 2ab6138ea2..b8ab60ab6c 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -9,6 +9,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Analysis/ConstantFolding.h" #ifdef _MSC_VER #pragma warning(pop) #endif @@ -951,6 +952,226 @@ inline llvm_cmp::type>, llvm::ICmpI return {a1, {c}}; } +template > +struct llvm_noncast +{ + using type = U; + + llvm_expr_t a1; + + static_assert(llvm_value_t::is_int, "llvm_noncast<>: invalid type"); + static_assert(llvm_value_t::is_int, "llvm_noncast<>: invalid result type"); + static_assert(llvm_value_t::esize == llvm_value_t::esize, "llvm_noncast<>: result is resized"); + static_assert(llvm_value_t::is_vector == llvm_value_t::is_vector, "llvm_noncast<>: vector element mismatch"); + + static constexpr bool is_ok = + llvm_value_t::is_int && + llvm_value_t::is_int && + llvm_value_t::esize == llvm_value_t::esize && + llvm_value_t::is_vector == llvm_value_t::is_vector; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + // No operation required + return a1.eval(ir); + } +}; + +template > +struct llvm_bitcast +{ + using type = U; + + llvm_expr_t a1; + + static constexpr uint bitsize0 = llvm_value_t::is_vector ? llvm_value_t::is_vector * llvm_value_t::esize : llvm_value_t::esize; + static constexpr uint bitsize1 = llvm_value_t::is_vector ? llvm_value_t::is_vector * llvm_value_t::esize : llvm_value_t::esize; + + static_assert(bitsize0 == bitsize1, "llvm_bitcast<>: invalid type (size mismatch)"); + static_assert(llvm_value_t::is_int || llvm_value_t::is_float, "llvm_bitcast<>: invalid type"); + static_assert(llvm_value_t::is_int || llvm_value_t::is_float, "llvm_bitcast<>: invalid result type"); + + static constexpr bool is_ok = + bitsize0 && bitsize0 == bitsize1 && + (llvm_value_t::is_int || llvm_value_t::is_float) && + (llvm_value_t::is_int || llvm_value_t::is_float); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto rt = llvm_value_t::get_type(ir->getContext()); + + if constexpr (llvm_value_t::is_int == llvm_value_t::is_int && llvm_value_t::is_vector == llvm_value_t::is_vector) + { + // No-op case + return v1; + } + + if (const auto c1 = llvm::dyn_cast(v1)) + { + const auto module = ir->GetInsertBlock()->getParent()->getParent(); + const auto result = llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, rt, module->getDataLayout()); + + if (result) + { + return result; + } + } + + return ir->CreateBitCast(v1, rt); + } +}; + +template > +struct llvm_trunc +{ + using type = U; + + llvm_expr_t a1; + + static_assert(llvm_value_t::is_int, "llvm_trunc<>: invalid type"); + static_assert(llvm_value_t::is_int, "llvm_trunc<>: invalid result type"); + static_assert(llvm_value_t::esize > llvm_value_t::esize, "llvm_trunc<>: result is not truncated"); + static_assert(llvm_value_t::is_vector == llvm_value_t::is_vector, "llvm_trunc<>: vector element mismatch"); + + static constexpr bool is_ok = + llvm_value_t::is_int && + llvm_value_t::is_int && + llvm_value_t::esize > llvm_value_t::esize && + llvm_value_t::is_vector == llvm_value_t::is_vector; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + return ir->CreateTrunc(a1.eval(ir), llvm_value_t::get_type(ir->getContext())); + } +}; + +template > +struct llvm_sext +{ + using type = U; + + llvm_expr_t a1; + + static_assert(llvm_value_t::is_int, "llvm_sext<>: invalid type"); + static_assert(llvm_value_t::is_sint, "llvm_sext<>: invalid result type"); + static_assert(llvm_value_t::esize < llvm_value_t::esize, "llvm_sext<>: result is not extended"); + static_assert(llvm_value_t::is_vector == llvm_value_t::is_vector, "llvm_sext<>: vector element mismatch"); + + static constexpr bool is_ok = + llvm_value_t::is_int && + llvm_value_t::is_sint && + llvm_value_t::esize < llvm_value_t::esize && + llvm_value_t::is_vector == llvm_value_t::is_vector; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + return ir->CreateSExt(a1.eval(ir), llvm_value_t::get_type(ir->getContext())); + } +}; + +template > +struct llvm_zext +{ + using type = U; + + llvm_expr_t a1; + + static_assert(llvm_value_t::is_int, "llvm_zext<>: invalid type"); + static_assert(llvm_value_t::is_uint, "llvm_zext<>: invalid result type"); + static_assert(llvm_value_t::esize < llvm_value_t::esize, "llvm_zext<>: result is not extended"); + static_assert(llvm_value_t::is_vector == llvm_value_t::is_vector, "llvm_zext<>: vector element mismatch"); + + static constexpr bool is_ok = + llvm_value_t::is_int && + llvm_value_t::is_uint && + llvm_value_t::esize < llvm_value_t::esize && + llvm_value_t::is_vector == llvm_value_t::is_vector; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + return ir->CreateZExt(a1.eval(ir), llvm_value_t::get_type(ir->getContext())); + } +}; + +template , typename U = llvm_common_t> +struct llvm_select +{ + using type = T; + + llvm_expr_t cond; + llvm_expr_t a2; + llvm_expr_t a3; + + static_assert(llvm_value_t::esize == 1 && llvm_value_t::is_int, "llvm_select<>: invalid condition type (bool expected)"); + static_assert(llvm_value_t::is_vector == llvm_value_t::is_vector, "llvm_select<>: vector element mismatch"); + + static constexpr bool is_ok = + llvm_value_t::esize == 1 && llvm_value_t::is_int && + llvm_value_t::is_vector == llvm_value_t::is_vector; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + return ir->CreateSelect(cond.eval(ir), a2.eval(ir), a3.eval(ir)); + } +}; + +template > +struct llvm_min +{ + using type = T; + + llvm_expr_t a1; + llvm_expr_t a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_min<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_sint || llvm_value_t::is_uint; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if constexpr (llvm_value_t::is_sint) + { + return ir->CreateSelect(ir->CreateICmpSLT(v1, v2), v1, v2); + } + + if constexpr (llvm_value_t::is_uint) + { + return ir->CreateSelect(ir->CreateICmpULT(v1, v2), v1, v2); + } + } +}; + +template > +struct llvm_max +{ + using type = T; + + llvm_expr_t a1; + llvm_expr_t a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_max<>: invalid type"); + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if constexpr (llvm_value_t::is_sint) + { + return ir->CreateSelect(ir->CreateICmpSLT(v1, v2), v2, v1); + } + + if constexpr (llvm_value_t::is_uint) + { + return ir->CreateSelect(ir->CreateICmpULT(v1, v2), v2, v1); + } + } +}; + class cpu_translator { protected: @@ -1027,36 +1248,52 @@ public: return llvm_uno{std::forward(cmp_expr)}; } - template - value_t bitcast(T2 expr) + template ::is_ok>> + static auto noncast(T&& expr) { - value_t result; - result.value = m_ir->CreateBitCast(expr.eval(m_ir), result.get_type(m_context)); - return result; + return llvm_noncast{std::forward(expr)}; } - template - value_t trunc(T2 expr) + template ::is_ok>> + static auto bitcast(T&& expr) { - value_t result; - result.value = m_ir->CreateTrunc(expr.eval(m_ir), result.get_type(m_context)); - return result; + return llvm_bitcast{std::forward(expr)}; } - template - value_t sext(T2 expr) + template ::is_ok>> + static auto trunc(T&& expr) { - value_t result; - result.value = m_ir->CreateSExt(expr.eval(m_ir), result.get_type(m_context)); - return result; + return llvm_trunc{std::forward(expr)}; } - template - value_t zext(T2 expr) + template ::is_ok>> + static auto sext(T&& expr) { - value_t result; - result.value = m_ir->CreateZExt(expr.eval(m_ir), result.get_type(m_context)); - return result; + return llvm_sext{std::forward(expr)}; + } + + template ::is_ok>> + static auto zext(T&& expr) + { + return llvm_zext{std::forward(expr)}; + } + + template ::is_ok>> + static auto select(T&& c, U&& a, V&& b) + { + return llvm_select{std::forward(c), std::forward(a), std::forward(b)}; + } + + template ::is_ok>> + static auto min(T&& a, U&& b) + { + return llvm_min{std::forward(a), std::forward(b)}; + } + + template ::is_ok>> + static auto max(T&& a, U&& b) + { + return llvm_max{std::forward(a), std::forward(b)}; } // Get signed addition overflow into the sign bit (s = a + b) @@ -1194,17 +1431,6 @@ public: return result; } - // Select (c ? a : b) - template - auto select(T2 c, T a, T b) - { - static_assert(value_t::esize == 1, "select: expected bool type (first argument)"); - static_assert(value_t::is_vector == value_t::is_vector, "select: incompatible arguments (vectors)"); - T result; - result.value = m_ir->CreateSelect(c.eval(m_ir), a.eval(m_ir), b.eval(m_ir)); - return result; - } - template auto insert(T v, u64 i, E e) { @@ -1246,24 +1472,6 @@ public: return result; } - // Min - template - auto min(T a, T b) - { - T result; - result.value = m_ir->CreateSelect((a > b).eval(m_ir), b.eval(m_ir), a.eval(m_ir)); - return result; - } - - // Max - template - auto max(T a, T b) - { - T result; - result.value = m_ir->CreateSelect((a > b).eval(m_ir), a.eval(m_ir), b.eval(m_ir)); - return result; - } - // Shuffle single vector using all zeros second vector of the same size template auto zshuffle(T1 a, Args... args) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 21e78f3a5e..594614fd82 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -271,7 +271,7 @@ extern void ppu_register_range(u32 addr, u32 size) // Register executable range at utils::memory_commit(&ppu_ref(addr), size * 2, utils::protection::rw); - const u32 fallback = ::narrow(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? + const u32 fallback = ::narrow(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? reinterpret_cast(ppu_recompiler_fallback) : reinterpret_cast(ppu_fallback)); size &= ~3; // Loop assumes `size = n * 4`, enforce that by rounding down @@ -1708,6 +1708,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co // Initialize target module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); + module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout()); // Initialize translator PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.has_ssse3()); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index f2ff8c1f64..79da979a63 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1053,8 +1053,8 @@ void PPUTranslator::VMSUMMBM(ppu_opcode_t op) const auto a = get_vr(op.va); const auto b = get_vr(op.vb); const auto c = get_vr(op.vc); - const auto ml = bitcast((a << 8 >> 8) * bitcast(b << 8 >> 8)); - const auto mh = bitcast((a >> 8) * bitcast(b >> 8)); + const auto ml = bitcast((a << 8 >> 8) * noncast(b << 8 >> 8)); + const auto mh = bitcast((a >> 8) * noncast(b >> 8)); set_vr(op.vd, eval(((ml << 16 >> 16) + (ml >> 16)) + ((mh << 16 >> 16) + (mh >> 16)) + c)); } @@ -1191,7 +1191,7 @@ void PPUTranslator::VPERM(ppu_opcode_t op) const auto b = get_vr(op.vb); const auto c = get_vr(op.vc); const auto i = eval(~c & 0x1f); - set_vr(op.vd, select(bitcast(c << 3) >= 0, pshufb(a, i), pshufb(b, i))); + set_vr(op.vd, select(noncast(c << 3) >= 0, pshufb(a, i), pshufb(b, i))); } void PPUTranslator::VPKPX(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 19ab156e2f..75a639409f 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3087,6 +3087,7 @@ public: // Create LLVM module std::unique_ptr module = std::make_unique(hash + ".obj", m_context); module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); + module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = module.get(); // Initialize IR Builder @@ -3587,6 +3588,7 @@ public: // Create LLVM module std::unique_ptr module = std::make_unique("spu_interpreter.obj", m_context); module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); + module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = module.get(); // Initialize IR Builder @@ -4425,12 +4427,12 @@ public: } case MFC_Size: { - set_reg_fixed(s_reg_mfc_size, trunc(val & 0x7fff).value); + set_reg_fixed(s_reg_mfc_size, trunc(val & 0x7fff).eval(m_ir)); return; } case MFC_TagID: { - set_reg_fixed(s_reg_mfc_tag, trunc(val & 0x1f).value); + set_reg_fixed(s_reg_mfc_tag, trunc(val & 0x1f).eval(m_ir)); return; } case MFC_Cmd: @@ -4447,14 +4449,14 @@ public: break; } - if (auto ci = llvm::dyn_cast(trunc(val).value)) + if (auto ci = llvm::dyn_cast(trunc(val).eval(m_ir))) { const auto eal = get_reg_fixed(s_reg_mfc_eal); const auto lsa = get_reg_fixed(s_reg_mfc_lsa); const auto tag = get_reg_fixed(s_reg_mfc_tag); const auto size = get_reg_fixed(s_reg_mfc_size); - const auto mask = m_ir->CreateShl(m_ir->getInt32(1), zext(tag).value); + const auto mask = m_ir->CreateShl(m_ir->getInt32(1), zext(tag).eval(m_ir)); const auto exec = llvm::BasicBlock::Create(m_context, "", m_function); const auto fail = llvm::BasicBlock::Create(m_context, "", m_function); const auto next = llvm::BasicBlock::Create(m_context, "", m_function); @@ -4515,8 +4517,8 @@ public: csize = -1; } - llvm::Value* src = m_ir->CreateGEP(m_lsptr, zext(lsa).value); - llvm::Value* dst = m_ir->CreateGEP(m_memptr, zext(eal).value); + llvm::Value* src = m_ir->CreateGEP(m_lsptr, zext(lsa).eval(m_ir)); + llvm::Value* dst = m_ir->CreateGEP(m_memptr, zext(eal).eval(m_ir)); if (cmd & MFC_GET_CMD) { @@ -4599,7 +4601,7 @@ public: else { // TODO - m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::memcpy), {dst, src, zext(size).value, m_ir->getTrue()}); + m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::memcpy), {dst, src, zext(size).eval(m_ir), m_ir->getTrue()}); } m_ir->CreateBr(next); @@ -4840,7 +4842,7 @@ public: if constexpr (!by.is_vector) sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value); - set_vr(op.rt, select(sh < by.esize, eval(get_vr(op.ra) >> sh), splat(0))); + set_vr(op.rt, select(sh < by.esize, get_vr(op.ra) >> sh, splat(0))); } template @@ -4866,7 +4868,7 @@ public: if constexpr (!by.is_vector) sh.value = m_ir->CreateVectorSplat(sh.is_vector, sh.value); - set_vr(op.rt, select(sh < by.esize, eval(get_vr(op.ra) << sh), splat(0))); + set_vr(op.rt, select(sh < by.esize, get_vr(op.ra) << sh, splat(0))); } void ROT(spu_opcode_t op) @@ -4996,7 +4998,7 @@ public: } const auto m = zext(bitcast(trunc(a))); - set_vr(op.rt, insert(splat(0), 3, m)); + set_vr(op.rt, insert(splat(0), 3, eval(m))); } void GBH(spu_opcode_t op) @@ -5014,7 +5016,7 @@ public: } const auto m = zext(bitcast(trunc(a))); - set_vr(op.rt, insert(splat(0), 3, m)); + set_vr(op.rt, insert(splat(0), 3, eval(m))); } void GBB(spu_opcode_t op) @@ -5032,7 +5034,7 @@ public: } const auto m = zext(bitcast(trunc(a))); - set_vr(op.rt, insert(splat(0), 3, m)); + set_vr(op.rt, insert(splat(0), 3, eval(m))); } void FSM(spu_opcode_t op) @@ -5047,7 +5049,7 @@ public: } const auto m = bitcast(trunc(v)); - set_vr(op.rt, sext(m)); + set_vr(op.rt, sext(m)); } void FSMH(spu_opcode_t op) @@ -5062,7 +5064,7 @@ public: } const auto m = bitcast(trunc(v)); - set_vr(op.rt, sext(m)); + set_vr(op.rt, sext(m)); } void FSMB(spu_opcode_t op) @@ -5077,7 +5079,7 @@ public: } const auto m = bitcast(trunc(v)); - set_vr(op.rt, sext(m)); + set_vr(op.rt, sext(m)); } void ROTQBYBI(spu_opcode_t op) @@ -5276,7 +5278,7 @@ public: void CGT(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); } void XOR(spu_opcode_t op) @@ -5286,7 +5288,7 @@ public: void CGTH(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); } void EQV(spu_opcode_t op) @@ -5296,7 +5298,7 @@ public: void CGTB(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); } void SUMB(spu_opcode_t op) @@ -5337,7 +5339,7 @@ public: void CLGT(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); } void ANDC(spu_opcode_t op) @@ -5347,7 +5349,7 @@ public: void CLGTH(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); } void ORC(spu_opcode_t op) @@ -5357,12 +5359,12 @@ public: void CLGTB(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) > get_vr(op.rb))); } void CEQ(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); } void MPYHHU(spu_opcode_t op) @@ -5384,16 +5386,16 @@ public: { const auto a = get_vr(op.ra); const auto b = get_vr(op.rb); - const auto x = eval(~get_vr(op.rt) & 1); + const auto x = ~get_vr(op.rt) & 1; const auto s = eval(a + b); - set_vr(op.rt, zext((sext(s < a) | (s & ~x)) == -1)); + set_vr(op.rt, zext((noncast(sext(s < a)) | (s & ~x)) == -1)); } void BGX(spu_opcode_t op) { const auto a = get_vr(op.ra); const auto b = get_vr(op.rb); - const auto c = eval(get_vr(op.rt) << 31); + const auto c = get_vr(op.rt) << 31; set_vr(op.rt, zext(a <= b & ~(a == b & c >= 0))); } @@ -5429,7 +5431,7 @@ public: void CEQH(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); } void MPYU(spu_opcode_t op) @@ -5439,24 +5441,13 @@ public: void CEQB(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); + set_vr(op.rt, sext(get_vr(op.ra) == get_vr(op.rb))); } void FSMBI(spu_opcode_t op) { - if (m_interp_magn) - { - const auto m = bitcast(get_imm(op.i16)); - set_vr(op.rt, sext(m)); - return; - } - - v128 data; - for (u32 i = 0; i < 16; i++) - data._bytes[i] = op.i16 & (1u << i) ? -1 : 0; - value_t r; - r.value = make_const_vector(data, get_type()); - set_vr(op.rt, r); + const auto m = bitcast(get_imm(op.i16)); + set_vr(op.rt, sext(m)); } void IL(spu_opcode_t op) @@ -5546,32 +5537,32 @@ public: void CGTI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); } void CGTHI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); } void CGTBI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); } void CLGTI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); } void CLGTHI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); } void CLGTBI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) > get_imm(op.si10))); } void MPYI(spu_opcode_t op) @@ -5586,17 +5577,17 @@ public: void CEQI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } void CEQHI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } void CEQBI(spu_opcode_t op) { - set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } void ILA(spu_opcode_t op) @@ -5819,11 +5810,11 @@ public: LOG_TODO(SPU, "[0x%x] Const SHUFB mask: %s", m_pos, mask); } - const auto x = avg(sext((c & 0xc0) == 0xc0), sext((c & 0xe0) == 0xc0)); - const auto cr = c ^ 0xf; + const auto x = avg(noncast(sext((c & 0xc0) == 0xc0)), noncast(sext((c & 0xe0) == 0xc0))); + const auto cr = eval(c ^ 0xf); const auto a = pshufb(get_vr(op.ra), cr); const auto b = pshufb(get_vr(op.rb), cr); - set_vr(op.rt4, select(bitcast(cr << 3) >= 0, a, b) | x); + set_vr(op.rt4, select(noncast(cr << 3) >= 0, a, b) | x); } void MPYA(spu_opcode_t op) @@ -5924,7 +5915,7 @@ public: { if (g_cfg.core.spu_accurate_xfloat) { - set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) > get_vr(op.rb)))); + set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) > get_vr(op.rb)))); return; } @@ -5941,11 +5932,11 @@ public: // Use sign bits to invert abs values before comparison. const auto ca = eval(ia ^ (bitcast(a) >> 31)); const auto cb = eval(ib ^ (bitcast(b) >> 31)); - set_vr(op.rt, sext((ca > cb) & nz)); + set_vr(op.rt, sext((ca > cb) & nz)); } else { - set_vr(op.rt, sext(fcmp_ord(a > b))); + set_vr(op.rt, sext(fcmp_ord(a > b))); } } @@ -5953,7 +5944,7 @@ public: { if (g_cfg.core.spu_accurate_xfloat) { - set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) > fabs(get_vr(op.rb))))); + set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) > fabs(get_vr(op.rb))))); return; } @@ -5969,11 +5960,11 @@ public: const auto ia = bitcast(abs_a); const auto ib = bitcast(abs_b); const auto nz = eval((ia > 0x7fffff) | (ib > 0x7fffff)); - set_vr(op.rt, sext((ia > ib) & nz)); + set_vr(op.rt, sext((ia > ib) & nz)); } else { - set_vr(op.rt, sext(fcmp_ord(abs_a > abs_b))); + set_vr(op.rt, sext(fcmp_ord(abs_a > abs_b))); } } @@ -6065,17 +6056,17 @@ public: void FCEQ(spu_opcode_t op) { if (g_cfg.core.spu_accurate_xfloat) - set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) == get_vr(op.rb)))); + set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) == get_vr(op.rb)))); else - set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) == get_vr(op.rb)))); + set_vr(op.rt, sext(fcmp_ord(get_vr(op.ra) == get_vr(op.rb)))); } void FCMEQ(spu_opcode_t op) { if (g_cfg.core.spu_accurate_xfloat) - set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) == fabs(get_vr(op.rb))))); + set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) == fabs(get_vr(op.rb))))); else - set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) == fabs(get_vr(op.rb))))); + set_vr(op.rt, sext(fcmp_ord(fabs(get_vr(op.ra)) == fabs(get_vr(op.rb))))); } // Multiply and return zero if any of the arguments is in the xfloat range. @@ -6084,7 +6075,7 @@ public: // Compare absolute values with max positive float in normal range. const auto aa = bitcast(fabs(a)); const auto ab = bitcast(fabs(b)); - return select(eval(max(aa, ab) > 0x7f7fffff), fsplat(0.), eval(a * b)); + return eval(select(max(aa, ab) > 0x7f7fffff, fsplat(0.), a * b)); } void FNMS(spu_opcode_t op) @@ -6365,7 +6356,7 @@ public: void STQX(spu_opcode_t op) { - value_t addr = zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0); + value_t addr = eval(zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); value_t r = get_vr(op.rt); r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); @@ -6373,7 +6364,7 @@ public: void LQX(spu_opcode_t op) { - value_t addr = zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0); + value_t addr = eval(zext((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); value_t r; r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); @@ -6400,8 +6391,8 @@ public: void STQR(spu_opcode_t op) // { value_t addr; - addr.value = m_interp_magn ? m_interp_pc : m_ir->getInt32(m_pos); - addr = eval(((get_imm(op.i16, false) << 2) + zext(addr)) & 0x3fff0); + addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type()) : m_ir->getInt64(m_pos); + addr = eval(((get_imm(op.i16, false) << 2) + addr) & 0x3fff0); value_t r = get_vr(op.rt); r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); @@ -6410,8 +6401,8 @@ public: void LQR(spu_opcode_t op) // { value_t addr; - addr.value = m_interp_magn ? m_interp_pc : m_ir->getInt32(m_pos); - addr = eval(((get_imm(op.i16, false) << 2) + zext(addr)) & 0x3fff0); + addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type()) : m_ir->getInt64(m_pos); + addr = eval(((get_imm(op.i16, false) << 2) + addr) & 0x3fff0); value_t r; r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); @@ -6420,7 +6411,7 @@ public: void STQD(spu_opcode_t op) { - value_t addr = zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0); + value_t addr = eval(zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0)); value_t r = get_vr(op.rt); r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); @@ -6428,7 +6419,7 @@ public: void LQD(spu_opcode_t op) { - value_t addr = zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0); + value_t addr = eval(zext((extract(get_vr(op.ra), 3) + (get_imm(op.si10) << 4)) & 0x3fff0)); value_t r; r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type())); r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});