mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 08:11:51 +00:00
PPU Precise: Fix fused float ops misaccuracy by using fma
Also cleanup add64_flags, remove redundent copy of s_ppu_itype.
This commit is contained in:
parent
4d46e66833
commit
c16319f959
@ -288,8 +288,6 @@ struct add_flags_result_t
|
||||
{
|
||||
T result;
|
||||
bool carry;
|
||||
bool zero;
|
||||
bool sign;
|
||||
|
||||
add_flags_result_t() = default;
|
||||
|
||||
@ -297,8 +295,6 @@ struct add_flags_result_t
|
||||
add_flags_result_t(T a, T b)
|
||||
: result(a + b)
|
||||
, carry(result < a)
|
||||
, zero(result == 0)
|
||||
, sign(result >> (sizeof(T) * 8 - 1) != 0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -309,8 +305,6 @@ struct add_flags_result_t
|
||||
add_flags_result_t r(result, c);
|
||||
result = r.result;
|
||||
carry |= r.carry;
|
||||
zero = r.zero;
|
||||
sign = r.sign;
|
||||
}
|
||||
};
|
||||
|
||||
@ -904,12 +898,28 @@ bool ppu_interpreter::VLOGEFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.vr[op.vd].vf = _mm_add_ps(_mm_mul_ps(ppu.vr[op.va].vf, ppu.vr[op.vc].vf), ppu.vr[op.vb].vf);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter_precise::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto& a = ppu.vr[op.va]._f;
|
||||
const auto& b = ppu.vr[op.vb]._f;
|
||||
const auto& c = ppu.vr[op.vc]._f;
|
||||
auto& d = ppu.vr[op.rd]._f;
|
||||
|
||||
// TODO: Optimize
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
d[i] = f32(f64{a[i]} * f64{c[i]} + f64{b[i]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter::VMAXFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.vr[op.vd].vf = _mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf);
|
||||
@ -4942,7 +4952,7 @@ bool ppu_interpreter_fast::FMSUB(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter_precise::FMSUB(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const f64 res = ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb];
|
||||
const f64 res = ppu.fpr[op.frd] = std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]);
|
||||
ppu_fpcc_set(ppu, res, 0., op.rc);
|
||||
return true;
|
||||
}
|
||||
@ -4955,7 +4965,7 @@ bool ppu_interpreter_fast::FMADD(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter_precise::FMADD(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const f64 res = ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb];
|
||||
const f64 res = ppu.fpr[op.frd] = std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]);
|
||||
ppu_fpcc_set(ppu, res, 0., op.rc);
|
||||
return true;
|
||||
}
|
||||
@ -4968,7 +4978,7 @@ bool ppu_interpreter_fast::FNMSUB(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter_precise::FNMSUB(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const f64 res = ppu.fpr[op.frd] = -(ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb]);
|
||||
const f64 res = ppu.fpr[op.frd] = -std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]);
|
||||
ppu_fpcc_set(ppu, res, 0., op.rc);
|
||||
return true;
|
||||
}
|
||||
@ -4981,7 +4991,7 @@ bool ppu_interpreter_fast::FNMADD(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter_precise::FNMADD(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const f64 res = ppu.fpr[op.frd] = -(ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb]);
|
||||
const f64 res = ppu.fpr[op.frd] = -std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]);
|
||||
ppu_fpcc_set(ppu, res, 0., op.rc);
|
||||
return true;
|
||||
}
|
||||
|
@ -40,7 +40,6 @@ struct ppu_interpreter
|
||||
static bool VCMPGTUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VEXPTEFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VLOGEFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMADDFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXSH(ppu_thread&, ppu_opcode_t);
|
||||
@ -373,6 +372,7 @@ struct ppu_interpreter_precise final : ppu_interpreter
|
||||
static bool VSUM4UBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTSXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTUXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMADDFP(ppu_thread&, ppu_opcode_t);
|
||||
|
||||
static bool FDIVS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSUBS(ppu_thread&, ppu_opcode_t);
|
||||
@ -439,6 +439,7 @@ struct ppu_interpreter_fast final : ppu_interpreter
|
||||
static bool VSUM4UBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTSXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTUXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMADDFP(ppu_thread&, ppu_opcode_t);
|
||||
|
||||
static bool FDIVS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSUBS(ppu_thread&, ppu_opcode_t);
|
||||
|
@ -928,8 +928,6 @@ void ppu_thread::stack_pop_verbose(u32 addr, u32 size) noexcept
|
||||
LOG_ERROR(PPU, "Invalid thread" HERE);
|
||||
}
|
||||
|
||||
const ppu_decoder<ppu_itype> s_ppu_itype;
|
||||
|
||||
extern u64 get_timebased_time();
|
||||
extern ppu_function_t ppu_get_syscall(u64 code);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user