mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 09:39:55 +00:00
SPU LLVM: use FMA with approx xfloat when available
Emulate FMA with double precision if unsupported natively.
This commit is contained in:
parent
068450d4fe
commit
3b46c9cb6a
@ -7362,13 +7362,30 @@ public:
|
||||
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f32[4]>(op.ra)) == fabs(get_vr<f32[4]>(op.rb)))));
|
||||
}
|
||||
|
||||
// Multiply and return zero if any of the arguments is in the xfloat range.
|
||||
value_t<f32[4]> mzero_if_xtended(value_t<f32[4]> a, value_t<f32[4]> b)
|
||||
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
|
||||
{
|
||||
// Compare absolute values with max positive float in normal range.
|
||||
const auto aa = bitcast<s32[4]>(fabs(a));
|
||||
const auto ab = bitcast<s32[4]>(fabs(b));
|
||||
return eval(select(max(aa, ab) > 0x7f7fffff, fsplat<f32[4]>(0.), a * b));
|
||||
const auto sc = eval(max(aa, ab) > 0x7f7fffff);
|
||||
|
||||
if (m_use_fma)
|
||||
{
|
||||
value_t<f32[4]> r;
|
||||
r.value = m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), {a.value, b.value, c.value});
|
||||
r.value = m_ir->CreateSelect(sc.value, c.value, r.value);
|
||||
return r;
|
||||
}
|
||||
|
||||
// Convert to doubles
|
||||
const auto xa = m_ir->CreateFPExt(a.value, get_type<f64[4]>());
|
||||
const auto xb = m_ir->CreateFPExt(b.value, get_type<f64[4]>());
|
||||
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
|
||||
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xb, xc});
|
||||
value_t<f32[4]> r;
|
||||
r.value = m_ir->CreateFPTrunc(xr, get_type<f32[4]>());
|
||||
r.value = m_ir->CreateSelect(sc.value, c.value, r.value);
|
||||
return r;
|
||||
}
|
||||
|
||||
void FNMS(spu_opcode_t op)
|
||||
@ -7377,7 +7394,7 @@ public:
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
set_vr(op.rt4, -fmuladd(get_vr<f64[4]>(op.ra), get_vr<f64[4]>(op.rb), eval(-get_vr<f64[4]>(op.rc))));
|
||||
else if (g_cfg.core.spu_approx_xfloat)
|
||||
set_vr(op.rt4, get_vr<f32[4]>(op.rc) - mzero_if_xtended(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)));
|
||||
set_vr(op.rt4, -fma32x4(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb), eval(-get_vr<f32[4]>(op.rc))));
|
||||
else
|
||||
set_vr(op.rt4, get_vr<f32[4]>(op.rc) - get_vr<f32[4]>(op.ra) * get_vr<f32[4]>(op.rb));
|
||||
}
|
||||
@ -7388,7 +7405,7 @@ public:
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
set_vr(op.rt4, fmuladd(get_vr<f64[4]>(op.ra), get_vr<f64[4]>(op.rb), get_vr<f64[4]>(op.rc)));
|
||||
else if (g_cfg.core.spu_approx_xfloat)
|
||||
set_vr(op.rt4, mzero_if_xtended(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)) + get_vr<f32[4]>(op.rc));
|
||||
set_vr(op.rt4, fma32x4(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb), get_vr<f32[4]>(op.rc)));
|
||||
else
|
||||
set_vr(op.rt4, get_vr<f32[4]>(op.ra) * get_vr<f32[4]>(op.rb) + get_vr<f32[4]>(op.rc));
|
||||
}
|
||||
@ -7399,7 +7416,7 @@ public:
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
set_vr(op.rt4, fmuladd(get_vr<f64[4]>(op.ra), get_vr<f64[4]>(op.rb), eval(-get_vr<f64[4]>(op.rc))));
|
||||
else if (g_cfg.core.spu_approx_xfloat)
|
||||
set_vr(op.rt4, mzero_if_xtended(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)) - get_vr<f32[4]>(op.rc));
|
||||
set_vr(op.rt4, fma32x4(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb), eval(-get_vr<f32[4]>(op.rc))));
|
||||
else
|
||||
set_vr(op.rt4, get_vr<f32[4]>(op.ra) * get_vr<f32[4]>(op.rb) - get_vr<f32[4]>(op.rc));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user