diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index db2cfad032..0224675dc5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3729,6 +3729,22 @@ public: return result; } + template + value_t vfixupimmps(T1 a, T2 b, T3 c, u8 d, u8 e) + { + value_t result; + + const auto data0 = a.eval(m_ir); + const auto data1 = b.eval(m_ir); + const auto data2 = c.eval(m_ir); + const auto immediate = (llvm_const_int{d}); + const auto imm32 = immediate.eval(m_ir); + const auto immediate2 = (llvm_const_int{e}); + const auto imm8 = immediate2.eval(m_ir); + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8});\ + return result; + } + llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i, llvm::Type* type = nullptr) { return m_ir->CreateLoad(type ? type : g->getValueType(), m_ir->CreateGEP(g->getValueType(), g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type())})); diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 28a4acea59..38816ed917 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -6704,8 +6704,22 @@ public: } }); - register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) + if (m_use_avx512) { + register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) + { + const auto div = value(ci->getOperand(0)); + const auto the_one = value(ci->getOperand(1)); + + const auto div_result = the_one / div; + + return vfixupimmps(div_result, div_result, splat(0x00330088u), 0, 0xff); + }); + } + else + { + register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) + { const auto div = value(ci->getOperand(0)); const auto the_one = value(ci->getOperand(1)); @@ -6718,10 +6732,11 @@ public: const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); - return bitcast((bitcast(div_result) & and_mask) | or_mask); - }); + }); + } + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); static const auto MT = match();