From 923cd7ad720ae1d55de61e19f7fb213d00f210aa Mon Sep 17 00:00:00 2001 From: Eladash Date: Fri, 25 Oct 2019 11:03:49 +0300 Subject: [PATCH] SPU LLVM: rewrite comparison on non-xfloat path of CFLTU, CFLTS CFLTU on non-xfloat path is accurate as xfloat path now. * Also optimize FCTIW like FCTIWZ (PPU) --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 6 +++--- rpcs3/Emu/Cell/PPUTranslator.cpp | 6 ++++-- rpcs3/Emu/Cell/SPURecompiler.cpp | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index b5b73d92f5..3efb65b078 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -4832,9 +4832,9 @@ bool ppu_interpreter::FCMPU(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op) { - const f64 b = ppu.fpr[op.frb]; - const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b)); - ppu.fpr[op.frd] = std::bit_cast(res); + const auto b = _mm_load_sd(&ppu.fpr[op.frb]); + const auto res = _mm_xor_si128(_mm_cvtpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000)))); + ppu.fpr[op.frd] = std::bit_cast(_mm_cvtsi128_si32(res)); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index bbfb5afcb2..11f0743c6e 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -4004,8 +4004,10 @@ void PPUTranslator::FRSP(ppu_opcode_t op) void PPUTranslator::FCTIW(ppu_opcode_t op) { const auto b = GetFpr(op.frb); - SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX), - Call(GetType(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef(), b, u64{0})))); + const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::exp2l(31.))), GetType()); + + // fix result saturation (0x80000000 -> 0x7fffffff) + SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef(), b, u64{0})))); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fctiw_get_fr", b)); //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fctiw_get_fi", b)); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 71fe86c19f..8a4dd547d5 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7509,7 +7509,7 @@ public: value_t r; r.value = m_ir->CreateFPToSI(a.value, get_type()); - set_vr(op.rt, r ^ sext(fcmp_ord(a >= fsplat(std::exp2(31.f))))); + set_vr(op.rt, r ^ sext(bitcast(a) > splat(((31 + 127) << 23) - 1))); } } @@ -7568,7 +7568,7 @@ public: } r.value = m_ir->CreateFPToUI(a.value, get_type()); - set_vr(op.rt, select(fcmp_uno(a >= fsplat(std::exp2(32.f))), splat(-1), r & sext(fcmp_ord(a >= fsplat(0.))))); + set_vr(op.rt, select(fcmp_ord(a >= fsplat(std::exp2(32.f))), splat(-1), r & sext(fcmp_ord(a >= fsplat(0.))))); } else { @@ -7583,7 +7583,7 @@ public: value_t r; r.value = m_ir->CreateFPToUI(a.value, get_type()); - set_vr(op.rt, select(fcmp_uno(a >= fsplat(std::exp2(32.f))), splat(-1), r & ~(bitcast(a) >> 31))); + set_vr(op.rt, select(bitcast(a) > splat(((32 + 127) << 23) - 1), splat(-1), r & ~(bitcast(a) >> 31))); } }