mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-19 12:40:29 +00:00
SPU: Optimize FMA ops with 0 addend
This commit is contained in:
parent
a568c958af
commit
c1bd154bcd
@ -7440,14 +7440,27 @@ public:
|
||||
|
||||
value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
|
||||
{
|
||||
value_t<f32[4]> r;
|
||||
const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
|
||||
const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
|
||||
const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
|
||||
const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
|
||||
|
||||
// Optimization: Emit only a floating multiply if the addend is zero
|
||||
// This is odd since SPU code could just use the FM instruction, but it seems common enough
|
||||
if (auto cv = llvm::dyn_cast<llvm::Constant>(c.value))
|
||||
{
|
||||
v128 data = get_const_vector(cv, m_pos, 4000);
|
||||
|
||||
if (data == v128{})
|
||||
{
|
||||
r = eval(ca * cb);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_use_fma)
|
||||
{
|
||||
value_t<f32[4]> r;
|
||||
r.value = m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), {ca.value, cb.value, c.value});
|
||||
return r;
|
||||
}
|
||||
@ -7457,7 +7470,6 @@ public:
|
||||
const auto xb = m_ir->CreateFPExt(cb.value, get_type<f64[4]>());
|
||||
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
|
||||
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xb, xc});
|
||||
value_t<f32[4]> r;
|
||||
r.value = m_ir->CreateFPTrunc(xr, get_type<f32[4]>());
|
||||
return r;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user