SPU LLVM: Add accurate double-precision FMA support

This commit is contained in:
Eladash 2020-04-03 08:11:47 +03:00 committed by Ivan
parent 1b68f90e42
commit 158b24ec25
9 changed files with 62 additions and 26 deletions

View File

@ -1628,7 +1628,7 @@ extern void ppu_initialize(const ppu_module& info)
#ifndef _WIN32
settings += ppu_settings::non_win32;
#endif
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
settings += ppu_settings::accurate_fma;
}

View File

@ -3881,7 +3881,7 @@ void PPUTranslator::FMADDS(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
}
@ -3909,7 +3909,7 @@ void PPUTranslator::FMSUBS(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
@ -3937,7 +3937,7 @@ void PPUTranslator::FNMSUBS(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
@ -3965,7 +3965,7 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
}
@ -4225,7 +4225,7 @@ void PPUTranslator::FMSUB(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
@ -4253,7 +4253,7 @@ void PPUTranslator::FMADD(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), { a, c, b });
}
@ -4281,7 +4281,7 @@ void PPUTranslator::FNMSUB(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
@ -4309,7 +4309,7 @@ void PPUTranslator::FNMADD(ppu_opcode_t op)
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.ppu_accurate_fma)
if (g_cfg.core.llvm_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
}

View File

@ -7192,22 +7192,58 @@ public:
void DFMA(spu_opcode_t op)
{
set_vr(op.rt, get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) + get_vr<f64[2]>(op.rt));
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
set_vr(op.rt, r);
}
else
set_vr(op.rt, a * b + c);
}
void DFMS(spu_opcode_t op)
{
set_vr(op.rt, get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) - get_vr<f64[2]>(op.rt));
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, eval(-c).value});
set_vr(op.rt, r);
}
else
set_vr(op.rt, a * b - c);
}
void DFNMS(spu_opcode_t op)
{
set_vr(op.rt, get_vr<f64[2]>(op.rt) - get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb));
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {eval(-a).value, b.value, c.value});
set_vr(op.rt, r);
}
else
set_vr(op.rt, c - (a * b));
}
void DFNMA(spu_opcode_t op)
{
set_vr(op.rt, -(get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) + get_vr<f64[2]>(op.rt)));
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
if (g_cfg.core.llvm_accurate_dfma)
{
value_t<f64[2]> r;
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
set_vr(op.rt, -r);
}
else
set_vr(op.rt, -(a * b + c));
}
// clamping helpers

View File

@ -45,7 +45,7 @@ struct cfg_root : cfg::node
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", tsx_usage::enabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
cfg::_bool ppu_accurate_fma{ this, "PPU Accurate FMA", true }; // Enable accurate FMA for CPUs which do not support it natively (can't be disabled for CPUs which do support it)
cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended
cfg::_enum<lib_loading_type> lib_loading{ this, "Lib Loader", lib_loading_type::liblv2only };

View File

@ -111,7 +111,7 @@ private:
{ emu_settings_type::EnableTSX, { "Core", "Enable TSX"}},
{ emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},
{ emu_settings_type::AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}},
{ emu_settings_type::AccuratePPUfma, { "Core", "PPU Accurate FMA"}},
{ emu_settings_type::AccurateLLVMdfma, { "Core", "LLVM Accurate DFMA"}},
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},

View File

@ -17,7 +17,7 @@ enum class emu_settings_type
EnableTSX,
AccurateGETLLAR,
AccuratePUTLLUC,
AccuratePPUfma,
AccurateLLVMdfma,
AccurateRSXAccess,
AccurateXFloat,
SetDAZandFTZ,

View File

@ -931,8 +931,8 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->debugConsoleMode, emu_settings_type::DebugConsoleMode);
SubscribeTooltip(ui->debugConsoleMode, tooltips.settings.debug_console_mode);
m_emu_settings->EnhanceCheckBox(ui->accuratePPUfma, emu_settings_type::AccuratePPUfma);
SubscribeTooltip(ui->accuratePPUfma, tooltips.settings.accurate_ppu_fma);
m_emu_settings->EnhanceCheckBox(ui->accurateLLVMdfma, emu_settings_type::AccurateLLVMdfma);
SubscribeTooltip(ui->accurateLLVMdfma, tooltips.settings.accurate_llvm_dfma);
m_emu_settings->EnhanceCheckBox(ui->silenceAllLogs, emu_settings_type::SilenceAllLogs);
SubscribeTooltip(ui->silenceAllLogs, tooltips.settings.silence_all_logs);

View File

@ -1717,13 +1717,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="accuratePPUfma">
<property name="text">
<string>Accurate PPU FMA</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="accurateLLVMdfma">
<property name="text">
<string>Accurate LLVM DFMA</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="silenceAllLogs">
<property name="text">

View File

@ -78,7 +78,7 @@ public:
const QString set_daz_and_ftz = tr("Never use this.");
const QString accurate_getllar = tr("Never use this.");
const QString accurate_putlluc = tr("Never use this.");
const QString accurate_ppu_fma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support.");
const QString accurate_llvm_dfma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support.");
const QString accurate_rsx_access = tr("Never use this.");
const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental.");
const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option.");