PPU LLVM: Fix VMAXFP, VMINFP NaN handling

2025-03-14 01:27:00 +00:00 · 2020-07-30 18:39:45 +03:00 · 2020-07-30 18:39:45 +03:00 · 6a51c27fde
commit 6a51c27fde
parent 17f965c171
2 changed files with 30 additions and 8 deletions
--- a/rpcs3/Emu/CPU/CPUTranslator.h
+++ b/rpcs3/Emu/CPU/CPUTranslator.h
@ -2744,22 +2744,44 @@ public:
 	}

 	// TODO: Support doubles
-	auto fre(value_t<f32[4]> a)
+	template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
+	auto fre(T a)
 	{
-		decltype(a) result;
+		value_t<typename T::type> result;
 		const auto av = a.eval(m_ir);
 		result.value  = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
 		return result;
 	}

-	auto frsqe(value_t<f32[4]> a)
+	template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
+	auto frsqe(T a)
 	{
-		decltype(a) result;
+		value_t<typename T::type> result;
 		const auto av = a.eval(m_ir);
 		result.value  = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
 		return result;
 	}

+	template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
+	auto fmax(T a, U b)
+	{
+		value_t<typename T::type> result;
+		const auto av = a.eval(m_ir);
+		const auto bv = b.eval(m_ir);
+		result.value  = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.max.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
+		return result;
+	}
+
+	template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
+	auto fmin(T a, U b)
+	{
+		value_t<typename T::type> result;
+		const auto av = a.eval(m_ir);
+		const auto bv = b.eval(m_ir);
+		result.value  = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.min.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
+		return result;
+	}
+
 	template <typename T1, typename T2>
 	value_t<u8[16]> pshufb(T1 a, T2 b)
 	{
--- a/rpcs3/Emu/Cell/PPUTranslator.cpp
+++ b/rpcs3/Emu/Cell/PPUTranslator.cpp
@ -973,8 +973,8 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)

 void PPUTranslator::VMAXFP(ppu_opcode_t op)
 {
-	const auto ab = GetVrs(VrType::vf, op.va, op.vb);
-	SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
+	const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
+	set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmin(a, b)) == bitcast<u32[4]>(a), b, a)));
 }

 void PPUTranslator::VMAXSB(ppu_opcode_t op)
@ -1045,8 +1045,8 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)

 void PPUTranslator::VMINFP(ppu_opcode_t op)
 {
-	const auto ab = GetVrs(VrType::vf, op.va, op.vb);
-	SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
+	const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
+	set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmax(a, b)) == bitcast<u32[4]>(a), b, a)));
 }

 void PPUTranslator::VMINSB(ppu_opcode_t op)