From 8c857b45f85beff0b3610bedf823677fa6379375 Mon Sep 17 00:00:00 2001
From: Scott Mansell <phiren@gmail.com>
Date: Sun, 27 Jul 2014 20:55:47 +1200
Subject: [PATCH] Fix PPC_FP on non-sse4.1 code paths.

The Invalid bit on the x87 fpu is sticky, so once a single NaN goes
through the old code on CPUs without sse4.1 all future floats are
mutilated.

Patch to emulate PTEST by Fiora.

Fixes issue 7237 and issue 7510.
---
 .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp  | 20 +++++++++++++++----
 Source/Core/Core/PowerPC/JitCommon/Jit_Util.h |  3 ++-
 2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
index 3c8eababe1..9b7c8360fc 100644
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
@@ -551,6 +551,8 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
 static u32 GC_ALIGNED16(temp32);
 static u64 GC_ALIGNED16(temp64);
 
+static const float GC_ALIGNED16(m_zero[]) = { 0.0f, 0.0f, 0.0f, 0.0f };
+
 #if _M_X86_64
 static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0, 0x0000000000400000);
 static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi64x(0, 0x000000007f800000);
@@ -669,8 +671,13 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
 		PTEST(XMM1, M((void *)&double_exponent));
 		cond = CC_NC;
 	} else {
-		FNSTSW_AX();
-		TEST(16, R(AX), Imm16(x87_InvalidOperation));
+		// emulate PTEST; checking FPU flags is incorrect because the NaN bits
+		// are sticky (persist between instructions)
+		MOVSD(XMM0, M((void *)&double_exponent));
+		PAND(XMM0, R(XMM1));
+		PCMPEQB(XMM0, M((void *)&m_zero));
+		PMOVMSKB(EAX, R(XMM0));
+		CMP(32, R(EAX), Imm32(0xffff));
 		cond = CC_Z;
 	}
 	FSTP(32, M(&temp32));
@@ -706,8 +713,13 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
 		PTEST(XMM1, M((void *)&single_exponent));
 		cond = CC_NC;
 	} else {
-		FNSTSW_AX();
-		TEST(16, R(AX), Imm16(x87_InvalidOperation));
+		// emulate PTEST; checking FPU flags is incorrect because the NaN bits
+		// are sticky (persist between instructions)
+		MOVSS(XMM0, M((void *)&single_exponent));
+		PAND(XMM0, R(XMM1));
+		PCMPEQB(XMM0, M((void *)&m_zero));
+		PMOVMSKB(EAX, R(XMM0));
+		CMP(32, R(EAX), Imm32(0xffff));
 		cond = CC_Z;
 	}
 	FSTP(64, M(&temp64));
diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
index 85a3320d0a..791f5418d1 100644
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
@@ -57,8 +57,9 @@ public:
 	void ForceSinglePrecisionS(Gen::X64Reg xmm);
 	void ForceSinglePrecisionP(Gen::X64Reg xmm);
 
-	// AX might get trashed
+	// EAX might get trashed
 	void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
+	// EAX might get trashed
 	void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
 protected:
 	std::unordered_map<u8 *, u32> registersInUseAtLoc;