mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-22 03:40:30 +00:00
SPU: Optimize FMA ops with 0 addend
This commit is contained in:
parent
a568c958af
commit
c1bd154bcd
@ -7440,14 +7440,27 @@ public:
|
|||||||
|
|
||||||
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
|
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
|
||||||
{
|
{
|
||||||
|
value_t<f32[4]> r;
|
||||||
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
|
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
|
||||||
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
|
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
|
||||||
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
|
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
|
||||||
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
|
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
|
||||||
|
|
||||||
|
// Optimization: Emit only a floating multiply if the addend is zero
|
||||||
|
// This is odd since SPU code could just use the FM instruction, but it seems common enough
|
||||||
|
if (auto cv = llvm::dyn_cast<llvm::Constant>(c.value))
|
||||||
|
{
|
||||||
|
v128 data = get_const_vector(cv, m_pos, 4000);
|
||||||
|
|
||||||
|
if (data == v128{})
|
||||||
|
{
|
||||||
|
r = eval(ca * cb);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (m_use_fma)
|
if (m_use_fma)
|
||||||
{
|
{
|
||||||
value_t<f32[4]> r;
|
|
||||||
r.value = m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), {ca.value, cb.value, c.value});
|
r.value = m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), {ca.value, cb.value, c.value});
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -7457,7 +7470,6 @@ public:
|
|||||||
const auto xb = m_ir->CreateFPExt(cb.value, get_type<f64[4]>());
|
const auto xb = m_ir->CreateFPExt(cb.value, get_type<f64[4]>());
|
||||||
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
|
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
|
||||||
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xb, xc});
|
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xb, xc});
|
||||||
value_t<f32[4]> r;
|
|
||||||
r.value = m_ir->CreateFPTrunc(xr, get_type<f32[4]>());
|
r.value = m_ir->CreateFPTrunc(xr, get_type<f32[4]>());
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user