PPU: implement accurate FRES

Implemented with an accurate lookup table.
This commit is contained in:
Nekotekina 2022-05-10 13:38:07 +03:00 committed by Ivan
parent 7b162c7513
commit e243ef5907
4 changed files with 160 additions and 7 deletions

View File

@ -17,6 +17,138 @@ inline int fexpf(float x)
return (std::bit_cast<u32>(x) >> 23) & 0xff;
}
constexpr u32 ppu_fres_mantissas[128] =
{
0x007f0000,
0x007d0800,
0x007b1800,
0x00793000,
0x00775000,
0x00757000,
0x0073a000,
0x0071e000,
0x00700000,
0x006e4000,
0x006ca000,
0x006ae000,
0x00694000,
0x00678000,
0x00660000,
0x00646000,
0x0062c000,
0x00614000,
0x005fc000,
0x005e4000,
0x005cc000,
0x005b4000,
0x0059c000,
0x00584000,
0x00570000,
0x00558000,
0x00540000,
0x0052c000,
0x00518000,
0x00500000,
0x004ec000,
0x004d8000,
0x004c0000,
0x004b0000,
0x00498000,
0x00488000,
0x00474000,
0x00460000,
0x0044c000,
0x00438000,
0x00428000,
0x00418000,
0x00400000,
0x003f0000,
0x003e0000,
0x003d0000,
0x003bc000,
0x003ac000,
0x00398000,
0x00388000,
0x00378000,
0x00368000,
0x00358000,
0x00348000,
0x00338000,
0x00328000,
0x00318000,
0x00308000,
0x002f8000,
0x002ec000,
0x002e0000,
0x002d0000,
0x002c0000,
0x002b0000,
0x002a0000,
0x00298000,
0x00288000,
0x00278000,
0x0026c000,
0x00260000,
0x00250000,
0x00244000,
0x00238000,
0x00228000,
0x00220000,
0x00210000,
0x00200000,
0x001f8000,
0x001e8000,
0x001e0000,
0x001d0000,
0x001c8000,
0x001b8000,
0x001b0000,
0x001a0000,
0x00198000,
0x00190000,
0x00180000,
0x00178000,
0x00168000,
0x00160000,
0x00158000,
0x00148000,
0x00140000,
0x00138000,
0x00128000,
0x00120000,
0x00118000,
0x00108000,
0x00100000,
0x000f8000,
0x000f0000,
0x000e0000,
0x000d8000,
0x000d0000,
0x000c8000,
0x000b8000,
0x000b0000,
0x000a8000,
0x000a0000,
0x00098000,
0x00090000,
0x00080000,
0x00078000,
0x00070000,
0x00068000,
0x00060000,
0x00058000,
0x00050000,
0x00048000,
0x00040000,
0x00038000,
0x00030000,
0x00028000,
0x00020000,
0x00018000,
0x00010000,
0x00000000,
};
constexpr u32 ppu_frsqrte_mantissas[16] =
{
0x000f1000u, 0x000d8000u, 0x000c0000u, 0x000a8000u,

View File

@ -6176,9 +6176,16 @@ auto FRES()
if constexpr (Build == 0xf1a6)
return ppu_exec_select<Flags...>::template select<set_fpcc>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.fpr[op.frd] = f32(1.0 / ppu.fpr[op.frb]);
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
const f64 a = ppu.fpr[op.frb];
const u64 b = std::bit_cast<u64>(a);
const u64 e = (b >> 52) & 0x7ff; // double exp
const u64 i = (b >> 45) & 0x7f; // mantissa LUT index
const u64 r = e >= (0x3ff + 0x80) ? 0 : (0x7ff - 2 - e) << 52 | u64{ppu_fres_mantissas[i]} << (32 - 3);
ppu.fpr[op.frd] = f32(std::bit_cast<f64>(a == a ? (b & 0x8000'0000'0000'0000) | r : (0x8'0000'0000'0000 | b)));
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
};
RETURN_(ppu, op);
}

View File

@ -3991,9 +3991,22 @@ void PPUTranslator::FSQRTS(ppu_opcode_t op)
void PPUTranslator::FRES(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb, 32);
const auto result = m_ir->CreateFDiv(ConstantFP::get(GetType<f32>(), 1.0), b);
SetFpr(op.frd, result);
if (!m_fres_table)
{
m_fres_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u32>(), 128), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, ppu_fres_mantissas));
}
const auto a = GetFpr(op.frb);
const auto b = bitcast<u64>(a);
const auto n = m_ir->CreateFCmpUNO(a, a); // test for NaN
const auto e = m_ir->CreateAnd(m_ir->CreateLShr(b, 52), 0x7ff); // double exp
const auto i = m_ir->CreateAnd(m_ir->CreateLShr(b, 45), 0x7f); // mantissa LUT index
const auto m = m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_ir->CreateGEP(m_fres_table, {m_ir->getInt64(0), i}))), 29);
const auto c = m_ir->CreateICmpUGE(e, m_ir->getInt64(0x3ff + 0x80)); // test for INF
const auto x = m_ir->CreateShl(m_ir->CreateSub(m_ir->getInt64(0x7ff - 2), e), 52);
const auto s = m_ir->CreateSelect(c, m_ir->getInt64(0), m_ir->CreateOr(x, m));
const auto r = bitcast<f64>(m_ir->CreateSelect(n, m_ir->CreateOr(b, 0x8'0000'0000'0000), m_ir->CreateOr(s, m_ir->CreateAnd(b, 0x8000'0000'0000'0000))));
SetFpr(op.frd, m_ir->CreateFPTrunc(r, GetType<f32>()));
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fr);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fi);
@ -4002,7 +4015,7 @@ void PPUTranslator::FRES(ppu_opcode_t op)
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fres_get_ux", b));
//SetFPSCRException(m_fpscr_zx, Call(GetType<bool>(), m_pure_attr, "__fres_get_zx", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fres_get_vxsnan", b));
SetFPRF(result, op.rc != 0);
SetFPRF(r, op.rc != 0);
}
void PPUTranslator::FMULS(ppu_opcode_t op)

View File

@ -56,6 +56,7 @@ class PPUTranslator final : public cpu_translator
llvm::Value* m_mtocr_table{};
llvm::Value* m_frsqrte_table{};
llvm::Value* m_fres_table{};
llvm::Value* m_globals[175];
llvm::Value** const m_g_cr = m_globals + 99;