mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-02 15:02:11 +00:00
SPU LLVM: Add accurate double-precision FMA support
This commit is contained in:
parent
1b68f90e42
commit
158b24ec25
@ -1628,7 +1628,7 @@ extern void ppu_initialize(const ppu_module& info)
|
||||
#ifndef _WIN32
|
||||
settings += ppu_settings::non_win32;
|
||||
#endif
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
settings += ppu_settings::accurate_fma;
|
||||
}
|
||||
|
@ -3881,7 +3881,7 @@ void PPUTranslator::FMADDS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
|
||||
}
|
||||
@ -3909,7 +3909,7 @@ void PPUTranslator::FMSUBS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -3937,7 +3937,7 @@ void PPUTranslator::FNMSUBS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -3965,7 +3965,7 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
|
||||
}
|
||||
@ -4225,7 +4225,7 @@ void PPUTranslator::FMSUB(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -4253,7 +4253,7 @@ void PPUTranslator::FMADD(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), { a, c, b });
|
||||
}
|
||||
@ -4281,7 +4281,7 @@ void PPUTranslator::FNMSUB(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -4309,7 +4309,7 @@ void PPUTranslator::FNMADD(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.ppu_accurate_fma)
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
|
||||
}
|
||||
|
@ -7192,22 +7192,58 @@ public:
|
||||
|
||||
void DFMA(spu_opcode_t op)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) + get_vr<f64[2]>(op.rt));
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
value_t<f64[2]> r;
|
||||
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
|
||||
set_vr(op.rt, r);
|
||||
}
|
||||
else
|
||||
set_vr(op.rt, a * b + c);
|
||||
}
|
||||
|
||||
void DFMS(spu_opcode_t op)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) - get_vr<f64[2]>(op.rt));
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
value_t<f64[2]> r;
|
||||
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, eval(-c).value});
|
||||
set_vr(op.rt, r);
|
||||
}
|
||||
else
|
||||
set_vr(op.rt, a * b - c);
|
||||
}
|
||||
|
||||
void DFNMS(spu_opcode_t op)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[2]>(op.rt) - get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb));
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
value_t<f64[2]> r;
|
||||
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {eval(-a).value, b.value, c.value});
|
||||
set_vr(op.rt, r);
|
||||
}
|
||||
else
|
||||
set_vr(op.rt, c - (a * b));
|
||||
}
|
||||
|
||||
void DFNMA(spu_opcode_t op)
|
||||
{
|
||||
set_vr(op.rt, -(get_vr<f64[2]>(op.ra) * get_vr<f64[2]>(op.rb) + get_vr<f64[2]>(op.rt)));
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
{
|
||||
value_t<f64[2]> r;
|
||||
r.value = m_ir->CreateCall(get_intrinsic<f64[2]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
|
||||
set_vr(op.rt, -r);
|
||||
}
|
||||
else
|
||||
set_vr(op.rt, -(a * b + c));
|
||||
}
|
||||
|
||||
// clamping helpers
|
||||
|
@ -45,7 +45,7 @@ struct cfg_root : cfg::node
|
||||
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", tsx_usage::enabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
|
||||
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
|
||||
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
|
||||
cfg::_bool ppu_accurate_fma{ this, "PPU Accurate FMA", true }; // Enable accurate FMA for CPUs which do not support it natively (can't be disabled for CPUs which do support it)
|
||||
cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
|
||||
|
||||
cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended
|
||||
cfg::_enum<lib_loading_type> lib_loading{ this, "Lib Loader", lib_loading_type::liblv2only };
|
||||
|
@ -111,7 +111,7 @@ private:
|
||||
{ emu_settings_type::EnableTSX, { "Core", "Enable TSX"}},
|
||||
{ emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},
|
||||
{ emu_settings_type::AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}},
|
||||
{ emu_settings_type::AccuratePPUfma, { "Core", "PPU Accurate FMA"}},
|
||||
{ emu_settings_type::AccurateLLVMdfma, { "Core", "LLVM Accurate DFMA"}},
|
||||
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
|
||||
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
|
||||
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
|
||||
|
@ -17,7 +17,7 @@ enum class emu_settings_type
|
||||
EnableTSX,
|
||||
AccurateGETLLAR,
|
||||
AccuratePUTLLUC,
|
||||
AccuratePPUfma,
|
||||
AccurateLLVMdfma,
|
||||
AccurateRSXAccess,
|
||||
AccurateXFloat,
|
||||
SetDAZandFTZ,
|
||||
|
@ -931,8 +931,8 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
m_emu_settings->EnhanceCheckBox(ui->debugConsoleMode, emu_settings_type::DebugConsoleMode);
|
||||
SubscribeTooltip(ui->debugConsoleMode, tooltips.settings.debug_console_mode);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->accuratePPUfma, emu_settings_type::AccuratePPUfma);
|
||||
SubscribeTooltip(ui->accuratePPUfma, tooltips.settings.accurate_ppu_fma);
|
||||
m_emu_settings->EnhanceCheckBox(ui->accurateLLVMdfma, emu_settings_type::AccurateLLVMdfma);
|
||||
SubscribeTooltip(ui->accurateLLVMdfma, tooltips.settings.accurate_llvm_dfma);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->silenceAllLogs, emu_settings_type::SilenceAllLogs);
|
||||
SubscribeTooltip(ui->silenceAllLogs, tooltips.settings.silence_all_logs);
|
||||
|
@ -1717,13 +1717,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="accuratePPUfma">
|
||||
<property name="text">
|
||||
<string>Accurate PPU FMA</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="accurateLLVMdfma">
|
||||
<property name="text">
|
||||
<string>Accurate LLVM DFMA</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="silenceAllLogs">
|
||||
<property name="text">
|
||||
|
@ -78,7 +78,7 @@ public:
|
||||
const QString set_daz_and_ftz = tr("Never use this.");
|
||||
const QString accurate_getllar = tr("Never use this.");
|
||||
const QString accurate_putlluc = tr("Never use this.");
|
||||
const QString accurate_ppu_fma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support.");
|
||||
const QString accurate_llvm_dfma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support.");
|
||||
const QString accurate_rsx_access = tr("Never use this.");
|
||||
const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental.");
|
||||
const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option.");
|
||||
|
Loading…
Reference in New Issue
Block a user