From 917069e31a673256e7a2cf04da47ad626b000794 Mon Sep 17 00:00:00 2001 From: Eladash Date: Sat, 25 Jul 2020 09:41:41 +0300 Subject: [PATCH] PPU Precise/LLVM: Support NJ modes (#8617) --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 71 ++++++++++++++++++------------- rpcs3/Emu/Cell/PPUThread.cpp | 5 +++ rpcs3/Emu/Cell/PPUThread.h | 5 ++- rpcs3/Emu/Cell/PPUTranslator.cpp | 57 +++++++++++++++++-------- rpcs3/Emu/Cell/PPUTranslator.h | 14 +++--- rpcs3/Emu/system_config.h | 1 + 6 files changed, 98 insertions(+), 55 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 9a10960055..d347860d11 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -359,6 +359,8 @@ public: } const g_ppu_scale_table; +constexpr u32 ppu_inf_u32 = 0x7F800000u; +static const f32 ppu_inf_f32 = std::bit_cast(ppu_inf_u32); constexpr u32 ppu_nan_u32 = 0x7FC00000u; static const f32 ppu_nan_f32 = std::bit_cast(ppu_nan_u32); static const v128 ppu_vec_nans = v128::from32p(ppu_nan_u32); @@ -403,6 +405,14 @@ v128 vec_handle_nan(__m128 result, Args... args) return vec_handle_nan(v128::fromF(result), v128::fromF(args)...); } +// Flush denormals to zero if NJ is 1 +inline v128 vec_handle_denormal(ppu_thread& ppu, v128 a) +{ + const auto mask = v128::from32p(ppu.jm_mask); + const auto nz = v128::fromV(_mm_srli_epi32(v128::eq32(mask & a, v128{}).vi, 1)); + return v128::andnot(nz, a); +} + bool ppu_interpreter::MFVSCR(ppu_thread& ppu, ppu_opcode_t op) { ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat} | (u32{ppu.nj} << 16)); @@ -414,6 +424,7 @@ bool ppu_interpreter::MTVSCR(ppu_thread& ppu, ppu_opcode_t op) const u32 vscr = ppu.vr[op.vb]._u32[3]; ppu.sat = (vscr & 1) != 0; ppu.nj = (vscr & 0x10000) != 0; + ppu.jm_mask = ppu.nj ? ppu_inf_u32 : 0x7fff'ffff; return true; } @@ -427,10 +438,10 @@ bool ppu_interpreter::VADDCUW(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::VADDFP(ppu_thread& ppu, ppu_opcode_t op) { - const auto a = ppu.vr[op.va]; - const auto b = ppu.vr[op.vb]; + const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]); + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]); const auto result = v128::addfs(a, b); - ppu.vr[op.vd] = vec_handle_nan(result, a, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b)); return true; } @@ -958,26 +969,26 @@ bool ppu_interpreter::VLOGEFP(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op) { - const auto a = ppu.vr[op.va].vf; - const auto b = ppu.vr[op.vb].vf; - const auto c = ppu.vr[op.vc].vf; + const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]).vf; + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf; + const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]).vf; const auto result = _mm_add_ps(_mm_mul_ps(a, c), b); - ppu.vr[op.vd] = vec_handle_nan(result); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result)); return true; } bool ppu_interpreter_precise::VMADDFP(ppu_thread& ppu, ppu_opcode_t op) { - const auto a = ppu.vr[op.va]; - const auto b = ppu.vr[op.vb]; - const auto c = ppu.vr[op.vc]; - ppu.vr[op.rd] = vec_handle_nan(v128::fma32f(a, c, b), a, b, c); + const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]); + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]); + const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]); + ppu.vr[op.rd] = vec_handle_denormal(ppu, vec_handle_nan(v128::fma32f(a, c, b), a, b, c)); return true; } bool ppu_interpreter::VMAXFP(ppu_thread& ppu, ppu_opcode_t op) { - ppu.vr[op.vd] = vec_handle_nan(_mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf)); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(_mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf))); return true; } @@ -1123,7 +1134,7 @@ bool ppu_interpreter::VMINFP(ppu_thread& ppu, ppu_opcode_t op) const auto a = ppu.vr[op.va].vf; const auto b = ppu.vr[op.vb].vf; const auto result = _mm_or_ps(_mm_min_ps(a, b), _mm_min_ps(b, a)); - ppu.vr[op.vd] = vec_handle_nan(result, a, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b)); return true; } @@ -1463,18 +1474,18 @@ bool ppu_interpreter_fast::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op) const auto a = _mm_sub_ps(_mm_mul_ps(ppu.vr[op.va].vf, ppu.vr[op.vc].vf), ppu.vr[op.vb].vf); const auto b = _mm_set1_ps(-0.0f); const auto result = _mm_xor_ps(a, b); - ppu.vr[op.vd] = vec_handle_nan(result, a, b); + ppu.vr[op.vd] = vec_handle_nan(result); return true; } bool ppu_interpreter_precise::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op) { const auto m = _mm_set1_ps(-0.0f); - const auto a = ppu.vr[op.va]; - const auto c = ppu.vr[op.vc]; + const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]); + const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]); const auto b = v128::fromF(_mm_xor_ps(ppu.vr[op.vb].vf, m)); const auto r = v128::fromF(_mm_xor_ps(v128::fma32f(a, c, b).vf, m)); - ppu.vr[op.rd] = vec_handle_nan(r, a, b, c); + ppu.vr[op.rd] = vec_handle_denormal(ppu, vec_handle_nan(r, a, b, c)); return true; } @@ -1874,15 +1885,15 @@ bool ppu_interpreter_precise::VPKUWUS(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::VREFP(ppu_thread& ppu, ppu_opcode_t op) { const auto a = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f); - const auto b = ppu.vr[op.vb].vf; + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf; const auto result = _mm_div_ps(a, b); - ppu.vr[op.vd] = vec_handle_nan(result, a, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b)); return true; } bool ppu_interpreter::VRFIM(ppu_thread& ppu, ppu_opcode_t op) { - const auto b = ppu.vr[op.vb]; + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]); v128 d; for (uint w = 0; w < 4; w++) @@ -1890,7 +1901,7 @@ bool ppu_interpreter::VRFIM(ppu_thread& ppu, ppu_opcode_t op) d._f[w] = std::floor(b._f[w]); } - ppu.vr[op.vd] = vec_handle_nan(d, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b)); return true; } @@ -1904,13 +1915,13 @@ bool ppu_interpreter::VRFIN(ppu_thread& ppu, ppu_opcode_t op) d._f[w] = std::nearbyint(b._f[w]); } - ppu.vr[op.vd] = vec_handle_nan(d, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b)); return true; } bool ppu_interpreter::VRFIP(ppu_thread& ppu, ppu_opcode_t op) { - const auto b = ppu.vr[op.vb]; + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]); v128 d; for (uint w = 0; w < 4; w++) @@ -1918,7 +1929,7 @@ bool ppu_interpreter::VRFIP(ppu_thread& ppu, ppu_opcode_t op) d._f[w] = std::ceil(b._f[w]); } - ppu.vr[op.vd] = vec_handle_nan(d, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b)); return true; } @@ -1932,7 +1943,7 @@ bool ppu_interpreter::VRFIZ(ppu_thread& ppu, ppu_opcode_t op) d._f[w] = std::truncf(b._f[w]); } - ppu.vr[op.vd] = vec_handle_nan(d, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b)); return true; } @@ -1978,9 +1989,9 @@ bool ppu_interpreter::VRLW(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::VRSQRTEFP(ppu_thread& ppu, ppu_opcode_t op) { const auto a = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f); - const auto b = ppu.vr[op.vb].vf; + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf; const auto result = _mm_div_ps(a, _mm_sqrt_ps(b)); - ppu.vr[op.vd] = vec_handle_nan(result, a, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b)); return true; } @@ -2277,10 +2288,10 @@ bool ppu_interpreter::VSUBCUW(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::VSUBFP(ppu_thread& ppu, ppu_opcode_t op) { - const auto a = ppu.vr[op.va]; - const auto b = ppu.vr[op.vb]; + const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]); + const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]); const auto result = v128::subfs(a, b); - ppu.vr[op.vd] = vec_handle_nan(result, a, b); + ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b)); return true; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 0d9269af04..de7ead2fbf 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1613,6 +1613,7 @@ extern void ppu_initialize(const ppu_module& info) non_win32, accurate_fma, accurate_ppu_vector_nan, + java_mode_handling, __bitset_enum_max }; @@ -1630,6 +1631,10 @@ extern void ppu_initialize(const ppu_module& info) { settings += ppu_settings::accurate_ppu_vector_nan; } + if (g_cfg.core.llvm_ppu_jm_handling) + { + settings += ppu_settings::java_mode_handling; + } // Write version, hash, CPU, settings fmt::append(obj_name, "v3-tane-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index ddebcb2998..c36ee89746 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -186,7 +186,10 @@ public: exception, the corresponding element in the target vr is cleared to '0'. In both cases, the '0' has the same sign as the denormalized or underflowing value. */ - bool nj = false; + bool nj = true; + + // Optimization: precomputed java-mode mask for handling denormals + u32 jm_mask = 0x7f80'0000; u32 raddr{0}; // Reservation addr u64 rtime{0}; diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 461f8de494..ae749b80c2 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -46,6 +46,8 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo thread_struct.insert(thread_struct.end(), 3, GetType()); // so, ov, ca thread_struct.insert(thread_struct.end(), 1, GetType()); // cnt thread_struct.insert(thread_struct.end(), 2, GetType()); // sat, nj + thread_struct.emplace_back(ArrayType::get(GetType(), 2)); // Padding + thread_struct.insert(thread_struct.end(), 1, GetType()); // jm_mask m_thread_type = StructType::create(m_context, thread_struct, "context_t"); @@ -231,6 +233,25 @@ Value* PPUTranslator::VecHandleNan(Value* val) return val; } +Value* PPUTranslator::VecHandleDenormal(Value* val) +{ + const auto type = val->getType(); + const auto value = type == GetType() ? val : m_ir->CreateBitCast(val, GetType()); + + const auto mask = SExt(m_ir->CreateICmpEQ(m_ir->CreateAnd(value, Broadcast(RegLoad(m_jm_mask), 4)), ConstantVector::getSplat(4, m_ir->getInt32(0))), GetType()); + const auto nz = m_ir->CreateLShr(mask, 1); + const auto result = m_ir->CreateAnd(m_ir->CreateNot(nz), value); + + return type == GetType() ? result : m_ir->CreateBitCast(result, type); +} + +Value* PPUTranslator::VecHandleResult(Value* val) +{ + val = g_cfg.core.llvm_ppu_accurate_vector_nan ? VecHandleNan(val) : val; + val = g_cfg.core.llvm_ppu_jm_handling ? VecHandleDenormal(val) : val; + return val; +} + Value* PPUTranslator::GetAddr(u64 _add) { if (m_reloc) @@ -609,7 +630,9 @@ void PPUTranslator::MFVSCR(ppu_opcode_t op) void PPUTranslator::MTVSCR(ppu_opcode_t op) { const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0)); - RegStore(Trunc(m_ir->CreateLShr(vscr, 16), GetType()), m_nj); + const auto nj = Trunc(m_ir->CreateLShr(vscr, 16), GetType()); + RegStore(nj, m_nj); + if (g_cfg.core.llvm_ppu_jm_handling) RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask); RegStore(Trunc(vscr, GetType()), m_sat); } @@ -625,7 +648,7 @@ void PPUTranslator::VADDFP(ppu_opcode_t op) const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - set_vr(op.vd, vec_handle_nan(a + b)); + set_vr(op.vd, vec_handle_result(a + b)); } void PPUTranslator::VADDSBS(ppu_opcode_t op) @@ -930,7 +953,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op) if (data == v128{}) { - set_vr(op.vd, vec_handle_nan(a * c)); + set_vr(op.vd, vec_handle_result(a * c)); ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); return; } @@ -938,7 +961,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op) if (m_use_fma) { - SetVr(op.vd, VecHandleNan(m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), { a.value, c.value, b.value }))); + SetVr(op.vd, VecHandleResult(m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), { a.value, c.value, b.value }))); return; } @@ -948,13 +971,13 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op) const auto xc = m_ir->CreateFPExt(c.value, get_type()); const auto xr = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fmuladd), {xa, xc, xb}); - SetVr(op.vd, VecHandleNan(m_ir->CreateFPTrunc(xr, get_type()))); + SetVr(op.vd, VecHandleResult(m_ir->CreateFPTrunc(xr, get_type()))); } void PPUTranslator::VMAXFP(ppu_opcode_t op) { const auto ab = GetVrs(VrType::vf, op.va, op.vb); - SetVr(op.vd, VecHandleNan(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1]))); + SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1]))); } void PPUTranslator::VMAXSB(ppu_opcode_t op) @@ -1026,7 +1049,7 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op) void PPUTranslator::VMINFP(ppu_opcode_t op) { const auto ab = GetVrs(VrType::vf, op.va, op.vb); - SetVr(op.vd, VecHandleNan(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1]))); + SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1]))); } void PPUTranslator::VMINSB(ppu_opcode_t op) @@ -1236,7 +1259,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op) if (data == v128{}) { - set_vr(op.vd, vec_handle_nan(-a * c)); + set_vr(op.vd, vec_handle_result(-a * c)); ppu_log.notice("LLVM: VNMSUBFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0)); return; } @@ -1245,7 +1268,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op) // Differs from the emulated path with regards to negative zero if (m_use_fma) { - SetVr(op.vd, VecHandleNan(m_ir->CreateFNeg(m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), { a.value, c.value, m_ir->CreateFNeg(b.value) })))); + SetVr(op.vd, VecHandleResult(m_ir->CreateFNeg(m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), { a.value, c.value, m_ir->CreateFNeg(b.value) })))); return; } @@ -1255,7 +1278,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op) const auto xc = m_ir->CreateFPExt(c.value, get_type()); const auto xr = m_ir->CreateFNeg(m_ir->CreateFSub(m_ir->CreateFMul(xa, xc), xb)); - SetVr(op.vd, VecHandleNan(m_ir->CreateFPTrunc(xr, get_type()))); + SetVr(op.vd, VecHandleResult(m_ir->CreateFPTrunc(xr, get_type()))); } void PPUTranslator::VNOR(ppu_opcode_t op) @@ -1361,28 +1384,28 @@ void PPUTranslator::VPKUWUS(ppu_opcode_t op) void PPUTranslator::VREFP(ppu_opcode_t op) { - const auto result = VecHandleNan(m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType(), 1.0)), GetVr(op.vb, VrType::vf))); + const auto result = VecHandleResult(m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType(), 1.0)), GetVr(op.vb, VrType::vf))); SetVr(op.vd, result); } void PPUTranslator::VRFIM(ppu_opcode_t op) { - SetVr(op.vd, VecHandleNan(Call(GetType(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf)))); + SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf)))); } void PPUTranslator::VRFIN(ppu_opcode_t op) { - SetVr(op.vd, VecHandleNan(Call(GetType(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf)))); + SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf)))); } void PPUTranslator::VRFIP(ppu_opcode_t op) { - SetVr(op.vd, VecHandleNan(Call(GetType(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf)))); + SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf)))); } void PPUTranslator::VRFIZ(ppu_opcode_t op) { - SetVr(op.vd, VecHandleNan(Call(GetType(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf)))); + SetVr(op.vd, VecHandleResult(Call(GetType(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf)))); } void PPUTranslator::VRLB(ppu_opcode_t op) @@ -1407,7 +1430,7 @@ void PPUTranslator::VRSQRTEFP(ppu_opcode_t op) { const auto result = m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType(), 1.0)), Call(GetType(), "llvm.sqrt.v4f32", GetVr(op.vb, VrType::vf))); - SetVr(op.vd, VecHandleNan(result)); + SetVr(op.vd, VecHandleResult(result)); } void PPUTranslator::VSEL(ppu_opcode_t op) @@ -1565,7 +1588,7 @@ void PPUTranslator::VSUBFP(ppu_opcode_t op) { const auto a = get_vr(op.va); const auto b = get_vr(op.vb); - SetVr(op.vd, VecHandleNan(eval(a - b).eval(m_ir))); + SetVr(op.vd, VecHandleResult(eval(a - b).eval(m_ir))); } void PPUTranslator::VSUBSBS(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index ec512b739d..179a4fded6 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -52,9 +52,9 @@ class PPUTranslator final : public cpu_translator llvm::Value* m_mtocr_table{}; - llvm::Value* m_globals[173]; + llvm::Value* m_globals[175]; llvm::Value** const m_g_cr = m_globals + 99; - llvm::Value* m_locals[173]; + llvm::Value* m_locals[175]; llvm::Value** const m_gpr = m_locals + 3; llvm::Value** const m_fpr = m_locals + 35; llvm::Value** const m_vr = m_locals + 67; @@ -77,6 +77,7 @@ class PPUTranslator final : public cpu_translator DEF_VALUE(m_cnt, m_g_cnt, 170) // XER.CNT DEF_VALUE(m_sat, m_g_sat, 171) // VSCR.SAT bit, sticky saturation flag DEF_VALUE(m_nj, m_g_nj, 172) // VSCR.NJ bit, non-Java mode + DEF_VALUE(m_jm_mask, m_g_jm_mask, 174) // Java-Mode helper mask #undef DEF_VALUE public: @@ -102,15 +103,14 @@ public: } llvm::Value* VecHandleNan(llvm::Value* val); + llvm::Value* VecHandleDenormal(llvm::Value* val); + llvm::Value* VecHandleResult(llvm::Value* val); template - auto vec_handle_nan(T&& expr) + auto vec_handle_result(T&& expr) { value_t result; - if (g_cfg.core.llvm_ppu_accurate_vector_nan) - result.value = VecHandleNan(expr.eval(m_ir)); - else - result.value = expr.eval(m_ir); + result.value = VecHandleResult(expr.eval(m_ir)); return result; } diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 53a9af906c..84005af783 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -52,6 +52,7 @@ struct cfg_root : cfg::node cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false }; cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true }; cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively + cfg::_bool llvm_ppu_jm_handling{ this, "PPU LLVM Java Mode Handling", false }; // Respect current Java Mode for alti-vec ops by PPU LLVM cfg::_bool llvm_ppu_accurate_vector_nan{ this, "PPU LLVM Accurate Vector NaN values", false }; cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)