From 8c857b45f85beff0b3610bedf823677fa6379375 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 27 Jul 2014 20:55:47 +1200 Subject: [PATCH] Fix PPC_FP on non-sse4.1 code paths. The Invalid bit on the x87 fpu is sticky, so once a single NaN goes through the old code on CPUs without sse4.1 all future floats are mutilated. Patch to emulate PTEST by Fiora. Fixes issue 7237 and issue 7510. --- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 20 +++++++++++++++---- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 3 ++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 3c8eababe1..9b7c8360fc 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -551,6 +551,8 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) { static u32 GC_ALIGNED16(temp32); static u64 GC_ALIGNED16(temp64); +static const float GC_ALIGNED16(m_zero[]) = { 0.0f, 0.0f, 0.0f, 0.0f }; + #if _M_X86_64 static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0, 0x0000000000400000); static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi64x(0, 0x000000007f800000); @@ -669,8 +671,13 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) PTEST(XMM1, M((void *)&double_exponent)); cond = CC_NC; } else { - FNSTSW_AX(); - TEST(16, R(AX), Imm16(x87_InvalidOperation)); + // emulate PTEST; checking FPU flags is incorrect because the NaN bits + // are sticky (persist between instructions) + MOVSD(XMM0, M((void *)&double_exponent)); + PAND(XMM0, R(XMM1)); + PCMPEQB(XMM0, M((void *)&m_zero)); + PMOVMSKB(EAX, R(XMM0)); + CMP(32, R(EAX), Imm32(0xffff)); cond = CC_Z; } FSTP(32, M(&temp32)); @@ -706,8 +713,13 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr PTEST(XMM1, M((void *)&single_exponent)); cond = CC_NC; } else { - FNSTSW_AX(); - TEST(16, R(AX), Imm16(x87_InvalidOperation)); + // emulate PTEST; checking FPU flags is incorrect because the NaN bits + // are sticky (persist between instructions) + MOVSS(XMM0, M((void *)&single_exponent)); + PAND(XMM0, R(XMM1)); + PCMPEQB(XMM0, M((void *)&m_zero)); + PMOVMSKB(EAX, R(XMM0)); + CMP(32, R(EAX), Imm32(0xffff)); cond = CC_Z; } FSTP(64, M(&temp64)); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 85a3320d0a..791f5418d1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -57,8 +57,9 @@ public: void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm); - // AX might get trashed + // EAX might get trashed void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false); + // EAX might get trashed void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); protected: std::unordered_map registersInUseAtLoc;