PPU: implement quasi-accurate FRSQRTE

Denormals are handled like zeros.
NaN handling is inaccurate in some cases.

Co-authored-by: Nekotekina <nekotekina@gmail.com>
This commit is contained in:
doesthisusername 2019-12-25 15:12:48 +01:00 committed by Ivan
parent 5b8f105308
commit 7b162c7513
4 changed files with 67 additions and 5 deletions

View File

@ -16,3 +16,54 @@ inline int fexpf(float x)
{
return (std::bit_cast<u32>(x) >> 23) & 0xff;
}
constexpr u32 ppu_frsqrte_mantissas[16] =
{
0x000f1000u, 0x000d8000u, 0x000c0000u, 0x000a8000u,
0x00098000u, 0x00088000u, 0x00080000u, 0x00070000u,
0x00060000u, 0x0004c000u, 0x0003c000u, 0x00030000u,
0x00020000u, 0x00018000u, 0x00010000u, 0x00008000u,
};
// Large lookup table for FRSQRTE instruction
inline struct ppu_frsqrte_lut_t
{
// Store only high 32 bits of doubles
u32 data[0x8000]{};
constexpr ppu_frsqrte_lut_t() noexcept
{
for (u64 i = 0; i < 0x8000; i++)
{
// Decomposed LUT index
const u64 sign = i >> 14;
const u64 expv = (i >> 3) & 0x7ff;
// (0x3FF - (((EXP_BITS(b) - 0x3FF) >> 1) + 1)) << 52
const u64 exp = 0x3fe0'0000 - (((expv + 0x1c01) >> 1) << (52 - 32));
if (expv == 0) // ±INF on zero/denormal, not accurate
{
data[i] = 0x7ff0'0000 | (sign << 31);
}
else if (expv == 0x7ff)
{
if (i == (0x7ff << 3))
data[i] = 0; // Zero on +INF, inaccurate
else
data[i] = 0x7ff8'0000; // QNaN
}
else if (sign)
{
data[i] = 0x7ff8'0000; // QNaN
}
else
{
// ((MAN_BITS(b) >> 49) & 7ull) + (!(EXP_BITS(b) & 1) << 3)
const u64 idx = 8 ^ (i & 0xf);
data[i] = ppu_frsqrte_mantissas[idx] | exp;
}
}
}
} ppu_frqrte_lut;

View File

@ -6534,10 +6534,13 @@ auto FRSQRTE()
if constexpr (Build == 0xf1a6)
return ppu_exec_select<Flags...>::template select<set_fpcc>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.fpr[op.frd] = 1.0 / std::sqrt(ppu.fpr[op.frb]);
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
const u64 b = std::bit_cast<u64>(ppu.fpr[op.frb]);
ppu.fpr[op.frd] = std::bit_cast<f64>(u64{ppu_frqrte_lut.data[b >> 49]} << 32);
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
};
RETURN_(ppu, op);
}

View File

@ -1,6 +1,7 @@
#ifdef LLVM_AVAILABLE
#include "Emu/system_config.h"
#include "Emu/Cell/Common.h"
#include "PPUTranslator.h"
#include "PPUThread.h"
@ -4344,8 +4345,14 @@ void PPUTranslator::FMUL(ppu_opcode_t op)
void PPUTranslator::FRSQRTE(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb, 32);
const auto result = m_ir->CreateFDiv(ConstantFP::get(GetType<f32>(), 1.0), Call(GetType<f32>(), "llvm.sqrt.f32", b));
if (!m_frsqrte_table)
{
m_frsqrte_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u32>(), 0x8000), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, ppu_frqrte_lut.data));
}
const auto b = m_ir->CreateBitCast(GetFpr(op.frb), GetType<u64>());
const auto v = m_ir->CreateLoad(m_ir->CreateGEP(m_frsqrte_table, {m_ir->getInt64(0), m_ir->CreateLShr(b, 49)}));
const auto result = m_ir->CreateBitCast(m_ir->CreateShl(ZExt(v), 32), GetType<f64>());
SetFpr(op.frd, result);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fr);

View File

@ -55,6 +55,7 @@ class PPUTranslator final : public cpu_translator
llvm::StructType* m_thread_type;
llvm::Value* m_mtocr_table{};
llvm::Value* m_frsqrte_table{};
llvm::Value* m_globals[175];
llvm::Value** const m_g_cr = m_globals + 99;