diff --git a/rpcs3/Emu/Cell/Common.h b/rpcs3/Emu/Cell/Common.h index be87f5abf2..14fd22550f 100644 --- a/rpcs3/Emu/Cell/Common.h +++ b/rpcs3/Emu/Cell/Common.h @@ -17,6 +17,138 @@ inline int fexpf(float x) return (std::bit_cast(x) >> 23) & 0xff; } +constexpr u32 ppu_fres_mantissas[128] = +{ + 0x007f0000, + 0x007d0800, + 0x007b1800, + 0x00793000, + 0x00775000, + 0x00757000, + 0x0073a000, + 0x0071e000, + 0x00700000, + 0x006e4000, + 0x006ca000, + 0x006ae000, + 0x00694000, + 0x00678000, + 0x00660000, + 0x00646000, + 0x0062c000, + 0x00614000, + 0x005fc000, + 0x005e4000, + 0x005cc000, + 0x005b4000, + 0x0059c000, + 0x00584000, + 0x00570000, + 0x00558000, + 0x00540000, + 0x0052c000, + 0x00518000, + 0x00500000, + 0x004ec000, + 0x004d8000, + 0x004c0000, + 0x004b0000, + 0x00498000, + 0x00488000, + 0x00474000, + 0x00460000, + 0x0044c000, + 0x00438000, + 0x00428000, + 0x00418000, + 0x00400000, + 0x003f0000, + 0x003e0000, + 0x003d0000, + 0x003bc000, + 0x003ac000, + 0x00398000, + 0x00388000, + 0x00378000, + 0x00368000, + 0x00358000, + 0x00348000, + 0x00338000, + 0x00328000, + 0x00318000, + 0x00308000, + 0x002f8000, + 0x002ec000, + 0x002e0000, + 0x002d0000, + 0x002c0000, + 0x002b0000, + 0x002a0000, + 0x00298000, + 0x00288000, + 0x00278000, + 0x0026c000, + 0x00260000, + 0x00250000, + 0x00244000, + 0x00238000, + 0x00228000, + 0x00220000, + 0x00210000, + 0x00200000, + 0x001f8000, + 0x001e8000, + 0x001e0000, + 0x001d0000, + 0x001c8000, + 0x001b8000, + 0x001b0000, + 0x001a0000, + 0x00198000, + 0x00190000, + 0x00180000, + 0x00178000, + 0x00168000, + 0x00160000, + 0x00158000, + 0x00148000, + 0x00140000, + 0x00138000, + 0x00128000, + 0x00120000, + 0x00118000, + 0x00108000, + 0x00100000, + 0x000f8000, + 0x000f0000, + 0x000e0000, + 0x000d8000, + 0x000d0000, + 0x000c8000, + 0x000b8000, + 0x000b0000, + 0x000a8000, + 0x000a0000, + 0x00098000, + 0x00090000, + 0x00080000, + 0x00078000, + 0x00070000, + 0x00068000, + 0x00060000, + 0x00058000, + 0x00050000, + 0x00048000, + 0x00040000, + 0x00038000, + 0x00030000, + 0x00028000, + 0x00020000, + 0x00018000, + 0x00010000, + 0x00000000, +}; + constexpr u32 ppu_frsqrte_mantissas[16] = { 0x000f1000u, 0x000d8000u, 0x000c0000u, 0x000a8000u, diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index e2f0af0be1..365981f3c3 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -6176,9 +6176,16 @@ auto FRES() if constexpr (Build == 0xf1a6) return ppu_exec_select::template select(); - static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { - ppu.fpr[op.frd] = f32(1.0 / ppu.fpr[op.frb]); - ppu_set_fpcc(ppu, ppu.fpr[op.frd], 0.); + static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) + { + const f64 a = ppu.fpr[op.frb]; + const u64 b = std::bit_cast(a); + const u64 e = (b >> 52) & 0x7ff; // double exp + const u64 i = (b >> 45) & 0x7f; // mantissa LUT index + const u64 r = e >= (0x3ff + 0x80) ? 0 : (0x7ff - 2 - e) << 52 | u64{ppu_fres_mantissas[i]} << (32 - 3); + + ppu.fpr[op.frd] = f32(std::bit_cast(a == a ? (b & 0x8000'0000'0000'0000) | r : (0x8'0000'0000'0000 | b))); + ppu_set_fpcc(ppu, ppu.fpr[op.frd], 0.); }; RETURN_(ppu, op); } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index f0871b5892..48decf175c 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -3991,9 +3991,22 @@ void PPUTranslator::FSQRTS(ppu_opcode_t op) void PPUTranslator::FRES(ppu_opcode_t op) { - const auto b = GetFpr(op.frb, 32); - const auto result = m_ir->CreateFDiv(ConstantFP::get(GetType(), 1.0), b); - SetFpr(op.frd, result); + if (!m_fres_table) + { + m_fres_table = new GlobalVariable(*m_module, ArrayType::get(GetType(), 128), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, ppu_fres_mantissas)); + } + + const auto a = GetFpr(op.frb); + const auto b = bitcast(a); + const auto n = m_ir->CreateFCmpUNO(a, a); // test for NaN + const auto e = m_ir->CreateAnd(m_ir->CreateLShr(b, 52), 0x7ff); // double exp + const auto i = m_ir->CreateAnd(m_ir->CreateLShr(b, 45), 0x7f); // mantissa LUT index + const auto m = m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_ir->CreateGEP(m_fres_table, {m_ir->getInt64(0), i}))), 29); + const auto c = m_ir->CreateICmpUGE(e, m_ir->getInt64(0x3ff + 0x80)); // test for INF + const auto x = m_ir->CreateShl(m_ir->CreateSub(m_ir->getInt64(0x7ff - 2), e), 52); + const auto s = m_ir->CreateSelect(c, m_ir->getInt64(0), m_ir->CreateOr(x, m)); + const auto r = bitcast(m_ir->CreateSelect(n, m_ir->CreateOr(b, 0x8'0000'0000'0000), m_ir->CreateOr(s, m_ir->CreateAnd(b, 0x8000'0000'0000'0000)))); + SetFpr(op.frd, m_ir->CreateFPTrunc(r, GetType())); //m_ir->CreateStore(GetUndef(), m_fpscr_fr); //m_ir->CreateStore(GetUndef(), m_fpscr_fi); @@ -4002,7 +4015,7 @@ void PPUTranslator::FRES(ppu_opcode_t op) //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fres_get_ux", b)); //SetFPSCRException(m_fpscr_zx, Call(GetType(), m_pure_attr, "__fres_get_zx", b)); //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fres_get_vxsnan", b)); - SetFPRF(result, op.rc != 0); + SetFPRF(r, op.rc != 0); } void PPUTranslator::FMULS(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index bcaddbb389..41e8745355 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -56,6 +56,7 @@ class PPUTranslator final : public cpu_translator llvm::Value* m_mtocr_table{}; llvm::Value* m_frsqrte_table{}; + llvm::Value* m_fres_table{}; llvm::Value* m_globals[175]; llvm::Value** const m_g_cr = m_globals + 99;