SPU: Optimize FMA ops with 0 addend

2025-02-19 12:40:29 +00:00 · 2020-05-01 05:05:24 -04:00 · 2020-05-01 05:05:24 -04:00 · c1bd154bcd
commit c1bd154bcd
parent a568c958af
1 changed files with 14 additions and 2 deletions
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@ -7440,14 +7440,27 @@ public:

 	value_t<f32[4]> fma32x4(value_t<f32[4]> a, value_t<f32[4]> b, value_t<f32[4]> c)
 	{
+		value_t<f32[4]> r;
 		const auto ma = eval(sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.))));
 		const auto mb = eval(sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.))));
 		const auto ca = eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
 		const auto cb = eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
+				
+		// Optimization: Emit only a floating multiply if the addend is zero
+		// This is odd since SPU code could just use the FM instruction, but it seems common enough
+		if (auto cv = llvm::dyn_cast<llvm::Constant>(c.value))
+		{
+			v128 data = get_const_vector(cv, m_pos, 4000);
+
+			if (data == v128{})
+			{
+				r = eval(ca * cb);
+				return r;
+			}
+		}

 		if (m_use_fma)
 		{
-			value_t<f32[4]> r;
 			r.value = m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), {ca.value, cb.value, c.value});
 			return r;
 		}
@ -7457,7 +7470,6 @@ public:
 		const auto xb = m_ir->CreateFPExt(cb.value, get_type<f64[4]>());
 		const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
 		const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xb, xc});
-		value_t<f32[4]> r;
 		r.value = m_ir->CreateFPTrunc(xr, get_type<f32[4]>());
 		return r;
 	}