mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 08:11:51 +00:00
PPU: implement accurate FRES
Implemented with an accurate lookup table.
This commit is contained in:
parent
7b162c7513
commit
e243ef5907
@ -17,6 +17,138 @@ inline int fexpf(float x)
|
||||
return (std::bit_cast<u32>(x) >> 23) & 0xff;
|
||||
}
|
||||
|
||||
constexpr u32 ppu_fres_mantissas[128] =
|
||||
{
|
||||
0x007f0000,
|
||||
0x007d0800,
|
||||
0x007b1800,
|
||||
0x00793000,
|
||||
0x00775000,
|
||||
0x00757000,
|
||||
0x0073a000,
|
||||
0x0071e000,
|
||||
0x00700000,
|
||||
0x006e4000,
|
||||
0x006ca000,
|
||||
0x006ae000,
|
||||
0x00694000,
|
||||
0x00678000,
|
||||
0x00660000,
|
||||
0x00646000,
|
||||
0x0062c000,
|
||||
0x00614000,
|
||||
0x005fc000,
|
||||
0x005e4000,
|
||||
0x005cc000,
|
||||
0x005b4000,
|
||||
0x0059c000,
|
||||
0x00584000,
|
||||
0x00570000,
|
||||
0x00558000,
|
||||
0x00540000,
|
||||
0x0052c000,
|
||||
0x00518000,
|
||||
0x00500000,
|
||||
0x004ec000,
|
||||
0x004d8000,
|
||||
0x004c0000,
|
||||
0x004b0000,
|
||||
0x00498000,
|
||||
0x00488000,
|
||||
0x00474000,
|
||||
0x00460000,
|
||||
0x0044c000,
|
||||
0x00438000,
|
||||
0x00428000,
|
||||
0x00418000,
|
||||
0x00400000,
|
||||
0x003f0000,
|
||||
0x003e0000,
|
||||
0x003d0000,
|
||||
0x003bc000,
|
||||
0x003ac000,
|
||||
0x00398000,
|
||||
0x00388000,
|
||||
0x00378000,
|
||||
0x00368000,
|
||||
0x00358000,
|
||||
0x00348000,
|
||||
0x00338000,
|
||||
0x00328000,
|
||||
0x00318000,
|
||||
0x00308000,
|
||||
0x002f8000,
|
||||
0x002ec000,
|
||||
0x002e0000,
|
||||
0x002d0000,
|
||||
0x002c0000,
|
||||
0x002b0000,
|
||||
0x002a0000,
|
||||
0x00298000,
|
||||
0x00288000,
|
||||
0x00278000,
|
||||
0x0026c000,
|
||||
0x00260000,
|
||||
0x00250000,
|
||||
0x00244000,
|
||||
0x00238000,
|
||||
0x00228000,
|
||||
0x00220000,
|
||||
0x00210000,
|
||||
0x00200000,
|
||||
0x001f8000,
|
||||
0x001e8000,
|
||||
0x001e0000,
|
||||
0x001d0000,
|
||||
0x001c8000,
|
||||
0x001b8000,
|
||||
0x001b0000,
|
||||
0x001a0000,
|
||||
0x00198000,
|
||||
0x00190000,
|
||||
0x00180000,
|
||||
0x00178000,
|
||||
0x00168000,
|
||||
0x00160000,
|
||||
0x00158000,
|
||||
0x00148000,
|
||||
0x00140000,
|
||||
0x00138000,
|
||||
0x00128000,
|
||||
0x00120000,
|
||||
0x00118000,
|
||||
0x00108000,
|
||||
0x00100000,
|
||||
0x000f8000,
|
||||
0x000f0000,
|
||||
0x000e0000,
|
||||
0x000d8000,
|
||||
0x000d0000,
|
||||
0x000c8000,
|
||||
0x000b8000,
|
||||
0x000b0000,
|
||||
0x000a8000,
|
||||
0x000a0000,
|
||||
0x00098000,
|
||||
0x00090000,
|
||||
0x00080000,
|
||||
0x00078000,
|
||||
0x00070000,
|
||||
0x00068000,
|
||||
0x00060000,
|
||||
0x00058000,
|
||||
0x00050000,
|
||||
0x00048000,
|
||||
0x00040000,
|
||||
0x00038000,
|
||||
0x00030000,
|
||||
0x00028000,
|
||||
0x00020000,
|
||||
0x00018000,
|
||||
0x00010000,
|
||||
0x00000000,
|
||||
};
|
||||
|
||||
constexpr u32 ppu_frsqrte_mantissas[16] =
|
||||
{
|
||||
0x000f1000u, 0x000d8000u, 0x000c0000u, 0x000a8000u,
|
||||
|
@ -6176,9 +6176,16 @@ auto FRES()
|
||||
if constexpr (Build == 0xf1a6)
|
||||
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.fpr[op.frd] = f32(1.0 / ppu.fpr[op.frb]);
|
||||
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const f64 a = ppu.fpr[op.frb];
|
||||
const u64 b = std::bit_cast<u64>(a);
|
||||
const u64 e = (b >> 52) & 0x7ff; // double exp
|
||||
const u64 i = (b >> 45) & 0x7f; // mantissa LUT index
|
||||
const u64 r = e >= (0x3ff + 0x80) ? 0 : (0x7ff - 2 - e) << 52 | u64{ppu_fres_mantissas[i]} << (32 - 3);
|
||||
|
||||
ppu.fpr[op.frd] = f32(std::bit_cast<f64>(a == a ? (b & 0x8000'0000'0000'0000) | r : (0x8'0000'0000'0000 | b)));
|
||||
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
||||
};
|
||||
RETURN_(ppu, op);
|
||||
}
|
||||
|
@ -3991,9 +3991,22 @@ void PPUTranslator::FSQRTS(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::FRES(ppu_opcode_t op)
|
||||
{
|
||||
const auto b = GetFpr(op.frb, 32);
|
||||
const auto result = m_ir->CreateFDiv(ConstantFP::get(GetType<f32>(), 1.0), b);
|
||||
SetFpr(op.frd, result);
|
||||
if (!m_fres_table)
|
||||
{
|
||||
m_fres_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u32>(), 128), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, ppu_fres_mantissas));
|
||||
}
|
||||
|
||||
const auto a = GetFpr(op.frb);
|
||||
const auto b = bitcast<u64>(a);
|
||||
const auto n = m_ir->CreateFCmpUNO(a, a); // test for NaN
|
||||
const auto e = m_ir->CreateAnd(m_ir->CreateLShr(b, 52), 0x7ff); // double exp
|
||||
const auto i = m_ir->CreateAnd(m_ir->CreateLShr(b, 45), 0x7f); // mantissa LUT index
|
||||
const auto m = m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_ir->CreateGEP(m_fres_table, {m_ir->getInt64(0), i}))), 29);
|
||||
const auto c = m_ir->CreateICmpUGE(e, m_ir->getInt64(0x3ff + 0x80)); // test for INF
|
||||
const auto x = m_ir->CreateShl(m_ir->CreateSub(m_ir->getInt64(0x7ff - 2), e), 52);
|
||||
const auto s = m_ir->CreateSelect(c, m_ir->getInt64(0), m_ir->CreateOr(x, m));
|
||||
const auto r = bitcast<f64>(m_ir->CreateSelect(n, m_ir->CreateOr(b, 0x8'0000'0000'0000), m_ir->CreateOr(s, m_ir->CreateAnd(b, 0x8000'0000'0000'0000))));
|
||||
SetFpr(op.frd, m_ir->CreateFPTrunc(r, GetType<f32>()));
|
||||
|
||||
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fr);
|
||||
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fi);
|
||||
@ -4002,7 +4015,7 @@ void PPUTranslator::FRES(ppu_opcode_t op)
|
||||
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fres_get_ux", b));
|
||||
//SetFPSCRException(m_fpscr_zx, Call(GetType<bool>(), m_pure_attr, "__fres_get_zx", b));
|
||||
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fres_get_vxsnan", b));
|
||||
SetFPRF(result, op.rc != 0);
|
||||
SetFPRF(r, op.rc != 0);
|
||||
}
|
||||
|
||||
void PPUTranslator::FMULS(ppu_opcode_t op)
|
||||
|
@ -56,6 +56,7 @@ class PPUTranslator final : public cpu_translator
|
||||
|
||||
llvm::Value* m_mtocr_table{};
|
||||
llvm::Value* m_frsqrte_table{};
|
||||
llvm::Value* m_fres_table{};
|
||||
|
||||
llvm::Value* m_globals[175];
|
||||
llvm::Value** const m_g_cr = m_globals + 99;
|
||||
|
Loading…
Reference in New Issue
Block a user