SPU LLVM: Improve approx FCGT (#8728)

2025-02-06 09:39:55 +00:00 · 2020-08-14 21:33:35 +03:00 · 2020-08-14 21:33:35 +03:00 · 995cb8125e
commit 995cb8125e
parent 01d3585bf3
1 changed files with 33 additions and 43 deletions
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@ -7552,63 +7552,53 @@ public:
 			return;
 		}

-		const auto a = get_vr<f32[4]>(op.ra);
-		const auto b = get_vr<f32[4]>(op.rb);
+		const auto [a, b] = get_vrs<f32[4]>(op.ra, op.rb);
+		const value_t<f32[4]> ab[2]{a, b};

-		if (auto [ok, data] = get_const_vector(b.value, m_pos, 5000); ok)
+		std::bitset<2> safe_int_compare(0);
+		std::bitset<2> safe_nonzero_compare(0);
+
+		for (u32 i = 0; i < 2; i++)
 		{
-			bool safe_int_compare = true;
-
-			for (u32 i = 0; i < 4; i++)
+			if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, 5000); ok)
 			{
-				const u32 exponent = data._u32[i] & 0x7f800000u;
+				safe_int_compare.set(i);
+				safe_nonzero_compare.set(i);

-				if (data._u32[i] >= 0x7f7fffffu || !exponent)
+				for (u32 j = 0; j < 4; j++)
 				{
-					// Postive or negative zero, Denormal (treated as zero), Negative constant, or Normalized number with exponent +127
-		 			// Cannot used signed integer compare safely
-					// Note: Technically this optimization is accurate for any positive value, but due to the fact that
-					// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
-					// this optimization for values outside of the range of x86 floating point hardware.
-					safe_int_compare = false;
-				}
-			}
+					const u32 value = data._u32[j];
+					const u8 exponent = static_cast<u8>(value >> 23);

-			if (safe_int_compare)
-			{
-				set_vr(op.rt, sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
-				return;
+					if (value >= 0x7f7fffffu || !exponent)
+					{
+						// Postive or negative zero, Denormal (treated as zero), Negative constant, or Normalized number with exponent +127
+		 				// Cannot used signed integer compare safely
+						// Note: Technically this optimization is accurate for any positive value, but due to the fact that
+						// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
+						// this optimization for values outside of the range of x86 floating point hardware.
+						safe_int_compare.reset(i);
+						if (!exponent) safe_nonzero_compare.reset(i);
+					}
+				}
 			}
 		}

-		if (auto [ok, data] = get_const_vector(a.value, m_pos, 5000); ok)
+		if (safe_int_compare.any())
 		{
-			bool safe_int_compare = true;
-
-			for (u32 i = 0; i < 4; i++)
-			{
-				const u32 exponent = data._u32[i] & 0x7f800000u;
-
-				if (data._u32[i] >= 0x7f7fffffu || !exponent)
-				{
-					// See above
-					safe_int_compare = false;
-					break;
-				}
-			}
-
-			if (safe_int_compare)
-			{
-				set_vr(op.rt, sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
-				return;
-			}
+			set_vr(op.rt, sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
+			return;
 		}

 		if (g_cfg.core.spu_approx_xfloat)
 		{
-			const auto ca = eval(clamp_positive_smax(a));
-			const auto cb = eval(clamp_negative_smax(b));
-			set_vr(op.rt, sext<s32[4]>(fcmp_ord(ca > cb)));
+			const auto ai = eval(bitcast<s32[4]>(a));
+			const auto bi = eval(bitcast<s32[4]>(b));
+
+			if (!safe_nonzero_compare.any())
+				set_vr(op.rt, sext<s32[4]>(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi)));
+			else
+				set_vr(op.rt, sext<s32[4]>(select((ai & bi) >= 0, ai > bi, ai < bi)));
 		}
 		else
 		{