From 158b24ec2521cded7643607a11801740169623bf Mon Sep 17 00:00:00 2001 From: Eladash Date: Fri, 3 Apr 2020 08:11:47 +0300 Subject: [PATCH] SPU LLVM: Add accurate double-precision FMA support --- rpcs3/Emu/Cell/PPUThread.cpp | 2 +- rpcs3/Emu/Cell/PPUTranslator.cpp | 16 +++++------ rpcs3/Emu/Cell/SPURecompiler.cpp | 44 ++++++++++++++++++++++++++++--- rpcs3/Emu/system_config.h | 2 +- rpcs3/rpcs3qt/emu_settings.h | 2 +- rpcs3/rpcs3qt/emu_settings_type.h | 2 +- rpcs3/rpcs3qt/settings_dialog.cpp | 4 +-- rpcs3/rpcs3qt/settings_dialog.ui | 14 +++++----- rpcs3/rpcs3qt/tooltips.h | 2 +- 9 files changed, 62 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index e1f4e947d3..805b4008fd 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1628,7 +1628,7 @@ extern void ppu_initialize(const ppu_module& info) #ifndef _WIN32 settings += ppu_settings::non_win32; #endif - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { settings += ppu_settings::accurate_fma; } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index c428c652f0..23222a1a0b 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -3881,7 +3881,7 @@ void PPUTranslator::FMADDS(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, b}); } @@ -3909,7 +3909,7 @@ void PPUTranslator::FMSUBS(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); } @@ -3937,7 +3937,7 @@ void PPUTranslator::FNMSUBS(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); } @@ -3965,7 +3965,7 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, b}); } @@ -4225,7 +4225,7 @@ void PPUTranslator::FMSUB(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); } @@ -4253,7 +4253,7 @@ void PPUTranslator::FMADD(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), { a, c, b }); } @@ -4281,7 +4281,7 @@ void PPUTranslator::FNMSUB(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)}); } @@ -4309,7 +4309,7 @@ void PPUTranslator::FNMADD(ppu_opcode_t op) const auto c = GetFpr(op.frc); llvm::Value* result; - if (g_cfg.core.ppu_accurate_fma) + if (g_cfg.core.llvm_accurate_dfma) { result = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a, c, b}); } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 2689283bd2..6ff5e6bcfa 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7192,22 +7192,58 @@ public: void DFMA(spu_opcode_t op) { - set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb) + get_vr(op.rt)); + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rt); + + if (g_cfg.core.llvm_accurate_dfma) + { + value_t r; + r.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a.value, b.value, c.value}); + set_vr(op.rt, r); + } + else + set_vr(op.rt, a * b + c); } void DFMS(spu_opcode_t op) { - set_vr(op.rt, get_vr(op.ra) * get_vr(op.rb) - get_vr(op.rt)); + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rt); + + if (g_cfg.core.llvm_accurate_dfma) + { + value_t r; + r.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a.value, b.value, eval(-c).value}); + set_vr(op.rt, r); + } + else + set_vr(op.rt, a * b - c); } void DFNMS(spu_opcode_t op) { - set_vr(op.rt, get_vr(op.rt) - get_vr(op.ra) * get_vr(op.rb)); + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rt); + + if (g_cfg.core.llvm_accurate_dfma) + { + value_t r; + r.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {eval(-a).value, b.value, c.value}); + set_vr(op.rt, r); + } + else + set_vr(op.rt, c - (a * b)); } void DFNMA(spu_opcode_t op) { - set_vr(op.rt, -(get_vr(op.ra) * get_vr(op.rb) + get_vr(op.rt))); + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rt); + + if (g_cfg.core.llvm_accurate_dfma) + { + value_t r; + r.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::fma), {a.value, b.value, c.value}); + set_vr(op.rt, -r); + } + else + set_vr(op.rt, -(a * b + c)); } // clamping helpers diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 7c3153efa2..c751e66560 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -45,7 +45,7 @@ struct cfg_root : cfg::node cfg::_enum enable_TSX{ this, "Enable TSX", tsx_usage::enabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false }; cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true }; - cfg::_bool ppu_accurate_fma{ this, "PPU Accurate FMA", true }; // Enable accurate FMA for CPUs which do not support it natively (can't be disabled for CPUs which do support it) + cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended cfg::_enum lib_loading{ this, "Lib Loader", lib_loading_type::liblv2only }; diff --git a/rpcs3/rpcs3qt/emu_settings.h b/rpcs3/rpcs3qt/emu_settings.h index c008648fb0..f0648235d2 100644 --- a/rpcs3/rpcs3qt/emu_settings.h +++ b/rpcs3/rpcs3qt/emu_settings.h @@ -111,7 +111,7 @@ private: { emu_settings_type::EnableTSX, { "Core", "Enable TSX"}}, { emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}}, { emu_settings_type::AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}}, - { emu_settings_type::AccuratePPUfma, { "Core", "PPU Accurate FMA"}}, + { emu_settings_type::AccurateLLVMdfma, { "Core", "LLVM Accurate DFMA"}}, { emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}}, { emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}}, { emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}}, diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index a5accfdfeb..95f1c8e0d5 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -17,7 +17,7 @@ enum class emu_settings_type EnableTSX, AccurateGETLLAR, AccuratePUTLLUC, - AccuratePPUfma, + AccurateLLVMdfma, AccurateRSXAccess, AccurateXFloat, SetDAZandFTZ, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 005284efbe..c1e0e30965 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -931,8 +931,8 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std m_emu_settings->EnhanceCheckBox(ui->debugConsoleMode, emu_settings_type::DebugConsoleMode); SubscribeTooltip(ui->debugConsoleMode, tooltips.settings.debug_console_mode); - m_emu_settings->EnhanceCheckBox(ui->accuratePPUfma, emu_settings_type::AccuratePPUfma); - SubscribeTooltip(ui->accuratePPUfma, tooltips.settings.accurate_ppu_fma); + m_emu_settings->EnhanceCheckBox(ui->accurateLLVMdfma, emu_settings_type::AccurateLLVMdfma); + SubscribeTooltip(ui->accurateLLVMdfma, tooltips.settings.accurate_llvm_dfma); m_emu_settings->EnhanceCheckBox(ui->silenceAllLogs, emu_settings_type::SilenceAllLogs); SubscribeTooltip(ui->silenceAllLogs, tooltips.settings.silence_all_logs); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index f67836038d..4ad0454fbe 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -1717,13 +1717,13 @@ - - - - Accurate PPU FMA - - - + + + + Accurate LLVM DFMA + + + diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index ad7de60e9b..049eea95f0 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -78,7 +78,7 @@ public: const QString set_daz_and_ftz = tr("Never use this."); const QString accurate_getllar = tr("Never use this."); const QString accurate_putlluc = tr("Never use this."); - const QString accurate_ppu_fma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support."); + const QString accurate_llvm_dfma = tr("Enables extra accuracy on FMA instructions, which can be needed by some games.\nIt can impact performance negatively on CPUs without FMA acceleration support."); const QString accurate_rsx_access = tr("Never use this."); const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental."); const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option.");