diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 161c5dae5d..b225d0a0f2 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -288,8 +288,6 @@ struct add_flags_result_t { T result; bool carry; - bool zero; - bool sign; add_flags_result_t() = default; @@ -297,8 +295,6 @@ struct add_flags_result_t add_flags_result_t(T a, T b) : result(a + b) , carry(result < a) - , zero(result == 0) - , sign(result >> (sizeof(T) * 8 - 1) != 0) { } @@ -309,8 +305,6 @@ struct add_flags_result_t add_flags_result_t r(result, c); result = r.result; carry |= r.carry; - zero = r.zero; - sign = r.sign; } }; @@ -904,12 +898,28 @@ bool ppu_interpreter::VLOGEFP(ppu_thread& ppu, ppu_opcode_t op) return true; } -bool ppu_interpreter::VMADDFP(ppu_thread& ppu, ppu_opcode_t op) +bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op) { ppu.vr[op.vd].vf = _mm_add_ps(_mm_mul_ps(ppu.vr[op.va].vf, ppu.vr[op.vc].vf), ppu.vr[op.vb].vf); return true; } +bool ppu_interpreter_precise::VMADDFP(ppu_thread& ppu, ppu_opcode_t op) +{ + const auto& a = ppu.vr[op.va]._f; + const auto& b = ppu.vr[op.vb]._f; + const auto& c = ppu.vr[op.vc]._f; + auto& d = ppu.vr[op.rd]._f; + + // TODO: Optimize + for (u32 i = 0; i < 4; i++) + { + d[i] = f32(f64{a[i]} * f64{c[i]} + f64{b[i]}); + } + + return true; +} + bool ppu_interpreter::VMAXFP(ppu_thread& ppu, ppu_opcode_t op) { ppu.vr[op.vd].vf = _mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf); @@ -4942,7 +4952,7 @@ bool ppu_interpreter_fast::FMSUB(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::FMSUB(ppu_thread& ppu, ppu_opcode_t op) { - const f64 res = ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb]; + const f64 res = ppu.fpr[op.frd] = std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]); ppu_fpcc_set(ppu, res, 0., op.rc); return true; } @@ -4955,7 +4965,7 @@ bool ppu_interpreter_fast::FMADD(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::FMADD(ppu_thread& ppu, ppu_opcode_t op) { - const f64 res = ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb]; + const f64 res = ppu.fpr[op.frd] = std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]); ppu_fpcc_set(ppu, res, 0., op.rc); return true; } @@ -4968,7 +4978,7 @@ bool ppu_interpreter_fast::FNMSUB(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::FNMSUB(ppu_thread& ppu, ppu_opcode_t op) { - const f64 res = ppu.fpr[op.frd] = -(ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb]); + const f64 res = ppu.fpr[op.frd] = -std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]); ppu_fpcc_set(ppu, res, 0., op.rc); return true; } @@ -4981,7 +4991,7 @@ bool ppu_interpreter_fast::FNMADD(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::FNMADD(ppu_thread& ppu, ppu_opcode_t op) { - const f64 res = ppu.fpr[op.frd] = -(ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb]); + const f64 res = ppu.fpr[op.frd] = -std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]); ppu_fpcc_set(ppu, res, 0., op.rc); return true; } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index a6f309d8d7..14e0ba08ef 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -40,7 +40,6 @@ struct ppu_interpreter static bool VCMPGTUW(ppu_thread&, ppu_opcode_t); static bool VEXPTEFP(ppu_thread&, ppu_opcode_t); static bool VLOGEFP(ppu_thread&, ppu_opcode_t); - static bool VMADDFP(ppu_thread&, ppu_opcode_t); static bool VMAXFP(ppu_thread&, ppu_opcode_t); static bool VMAXSB(ppu_thread&, ppu_opcode_t); static bool VMAXSH(ppu_thread&, ppu_opcode_t); @@ -373,6 +372,7 @@ struct ppu_interpreter_precise final : ppu_interpreter static bool VSUM4UBS(ppu_thread&, ppu_opcode_t); static bool VCTSXS(ppu_thread&, ppu_opcode_t); static bool VCTUXS(ppu_thread&, ppu_opcode_t); + static bool VMADDFP(ppu_thread&, ppu_opcode_t); static bool FDIVS(ppu_thread&, ppu_opcode_t); static bool FSUBS(ppu_thread&, ppu_opcode_t); @@ -439,6 +439,7 @@ struct ppu_interpreter_fast final : ppu_interpreter static bool VSUM4UBS(ppu_thread&, ppu_opcode_t); static bool VCTSXS(ppu_thread&, ppu_opcode_t); static bool VCTUXS(ppu_thread&, ppu_opcode_t); + static bool VMADDFP(ppu_thread&, ppu_opcode_t); static bool FDIVS(ppu_thread&, ppu_opcode_t); static bool FSUBS(ppu_thread&, ppu_opcode_t); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 1784b58b6a..a14d75b2a4 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -928,8 +928,6 @@ void ppu_thread::stack_pop_verbose(u32 addr, u32 size) noexcept LOG_ERROR(PPU, "Invalid thread" HERE); } -const ppu_decoder s_ppu_itype; - extern u64 get_timebased_time(); extern ppu_function_t ppu_get_syscall(u64 code);