From c9b0f0e7343cb82c18cb9189917d7be258c104a8 Mon Sep 17 00:00:00 2001 From: Eladash Date: Fri, 15 Nov 2019 17:58:41 +0200 Subject: [PATCH] SPU: Fix FREST --- rpcs3/Emu/CPU/CPUTranslator.h | 19 ++++++++++++++++++- rpcs3/Emu/Cell/SPUInterpreter.cpp | 28 ++++++++++++++++++---------- rpcs3/Emu/Cell/SPURecompiler.cpp | 11 +++++++++-- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 614a6a8694..6edf49c8c5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #ifdef LLVM_AVAILABLE @@ -2729,6 +2729,23 @@ public: return result; } + // TODO: Support doubles + auto fre(value_t a) + { + decltype(a) result; + const auto av = a.eval(m_ir); + result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av}); + return result; + } + + auto frsqe(value_t a) + { + decltype(a) result; + const auto av = a.eval(m_ir); + result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av}); + return result; + } + template value_t pshufb(T1 a, T2 b) { diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index ae3cf0b739..2cdc597009 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "SPUInterpreter.h" #include "Emu/System.h" @@ -1903,21 +1903,29 @@ inline bool isdenormal(double x) bool spu_interpreter_precise::FREST(spu_thread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); + const auto ra = spu.gpr[op.ra]; + auto res = v128::fromF(_mm_rcp_ps(ra.vf)); for (int i = 0; i < 4; i++) { - const float a = spu.gpr[op.ra]._f[i]; - float result; - if (fexpf(a) == 0) + const auto a = ra._f[i]; + switch (fexpf(a)) + { + case 0: { spu.fpscr.setDivideByZeroFlag(i); - result = extended(std::signbit(a), 0x7FFFFF); + res._f[i] = extended(std::signbit(a), 0x7FFFFF); + break; + } + case (0x7e800000 >> 23): // Special case for value not handled properly in rcpps + { + res._f[i] = 0.0f; + break; + } + default: break; } - else if (isextended(a)) - result = 0.0f; - else - result = 1 / a; - spu.gpr[op.rt]._f[i] = result; } + + spu.gpr[op.rt] = res; return true; } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 8a4dd547d5..2f1a6d3a67 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7216,9 +7216,16 @@ public: { // TODO if (g_cfg.core.spu_accurate_xfloat) - set_vr(op.rt, fsplat(1.0) / get_vr(op.ra)); + { + const auto a = get_vr(op.ra); + const auto mask_ov = sext(bitcast(fabs(a)) > splat(0x7e7fffff)); + const auto mask_de = eval(noncast(sext(fcmp_uno(a == fsplat(0.)))) >> 1); + set_vr(op.rt, (bitcast(fre(a)) & ~mask_ov) | noncast(mask_de)); + } else - set_vr(op.rt, fsplat(1.0) / get_vr(op.ra)); + { + set_vr(op.rt, fre(get_vr(op.ra))); + } } void FRSQEST(spu_opcode_t op)