From 7d74e55109d9681d743f7fafe93e33cd3c958ead Mon Sep 17 00:00:00 2001 From: LinesPrower Date: Thu, 24 Sep 2009 17:43:46 +0000 Subject: [PATCH] Lots of work on improving the floating point emulation. Note that most changes affect only the interpreter mode. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4314 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Core.vcproj | 4 + .../Src/PowerPC/Interpreter/Interpreter.h | 2 +- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 243 +++++++++++ .../Interpreter/Interpreter_FloatingPoint.cpp | 376 +++++++++++------- .../Interpreter/Interpreter_LoadStore.cpp | 30 +- .../Interpreter_LoadStorePaired.cpp | 38 +- .../Interpreter/Interpreter_Paired.cpp | 272 +++++++++---- .../Interpreter_SystemRegisters.cpp | 130 +++--- 8 files changed, 801 insertions(+), 294 deletions(-) create mode 100644 Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj index a7fa7cc3d3..874a69ad8a 100644 --- a/Source/Core/Core/Core.vcproj +++ b/Source/Core/Core/Core.vcproj @@ -1030,6 +1030,10 @@ /> + + diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h index 04997786a7..76eb67947e 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h @@ -307,7 +307,7 @@ namespace Interpreter // paired helper float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale); - void Helper_Quantize (const u32 _Addr, const float _fValue, const EQuantizeType _quantizeType, const unsigned _uScale); + void Helper_Quantize (const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned _uScale); // other helper u32 Helper_Mask(int mb, int me); diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h new file mode 100644 index 0000000000..2aa7a31f89 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -0,0 +1,243 @@ +// Copyright (C) 2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "../../Core.h" +#include "Interpreter.h" +#include "MathUtil.h" + +using namespace MathUtil; + +// warining! very slow! +//#define VERY_ACCURATE_FP + +#define MIN_SINGLE 0xc7efffffe0000000ull +#define MAX_SINGLE 0x47efffffe0000000ull + +// FPSCR exception flags +const u32 FPSCR_OX = (u32)1 << (31 - 3); +const u32 FPSCR_UX = (u32)1 << (31 - 4); +const u32 FPSCR_ZX = (u32)1 << (31 - 5); +// ! XX shouldn't be accessed directly to set 1. Use SetFI() instead ! +const u32 FPSCR_XX = (u32)1 << (31 - 6); +const u32 FPSCR_VXSNAN = (u32)1 << (31 - 7); +const u32 FPSCR_VXISI = (u32)1 << (31 - 8); +const u32 FPSCR_VXIDI = (u32)1 << (31 - 9); +const u32 FPSCR_VXZDZ = (u32)1 << (31 - 10); +const u32 FPSCR_VXIMZ = (u32)1 << (31 - 11); +const u32 FPSCR_VXVC = (u32)1 << (31 - 12); +const u32 FPSCR_VXSOFT = (u32)1 << (31 - 21); +const u32 FPSCR_VXSQRT = (u32)1 << (31 - 22); +const u32 FPSCR_VXCVI = (u32)1 << (31 - 23); + +const u32 FPSCR_VX_ANY = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | + FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI; + +const u32 FPSCR_ANY_X = FPSCR_OX | FPSCR_UX | FPSCR_ZX | FPSCR_XX | FPSCR_VX_ANY; + +const u64 PPC_NAN_U64 = 0x7ff8000000000000ull; +const double PPC_NAN = *(double* const)&PPC_NAN_U64; + +inline bool IsINF(double x) +{ + return ((*(u64*)&x) & ~DOUBLE_SIGN) == DOUBLE_EXP; +} + +inline void SetFPException(u32 mask) +{ + if ((FPSCR.Hex & mask) != mask) + FPSCR.FX = 1; + FPSCR.Hex |= mask; +} + +inline void SetFI(int FI) +{ + if (FI) + { + SetFPException(FPSCR_XX); + } + FPSCR.FI = FI; +} + +inline void UpdateFPSCR() +{ + FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0; + FPSCR.FEX = 0; // we assume that "?E" bits are always 0 +} + +inline double ForceSingle(double _x) +{ + //if (FPSCR.RN != 0) + // PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC); + if (FPSCR.NI) + _x = FlushToZeroAsFloat(_x); + double x = static_cast(_x); + return x; +} + +inline double ForceDouble(double d) +{ + //if (FPSCR.RN != 0) + // PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC); + + //if (FPSCR.NI) + //{ + // IntDouble x; x.d = d; + //if ((x.i & DOUBLE_EXP) == 0) + // x.i &= DOUBLE_SIGN; // turn into signed zero + // return x.d; + //} + return d; +} + +// these functions allow globally modify operations behaviour +// also, these may be used to set flags like FR, FI, OX, UX + +inline double NI_mul(const double a, const double b) +{ +#ifdef VERY_ACCURATE_FP + if (a != a) return a; + if (b != b) return b; + double t = a * b; + if (t != t) + { + SetFPException(FPSCR_VXIMZ); + return PPC_NAN; + } + return t; +#else + return a * b; +#endif +} + +inline double NI_add(const double a, const double b) +{ +#ifdef VERY_ACCURATE_FP + if (a != a) return a; + if (b != b) return b; + double t = a + b; + if (t != t) + { + SetFPException(FPSCR_VXISI); + return PPC_NAN; + } + return t; +#else + return a + b; +#endif +} + +inline double NI_sub(const double a, const double b) +{ +#ifdef VERY_ACCURATE_FP + if (a != a) return a; + if (b != b) return b; + double t = a - b; + if (t != t) + { + SetFPException(FPSCR_VXISI); + return PPC_NAN; + } + return t; +#else + return a - b; +#endif +} + +inline double NI_madd(const double a, const double b, const double c) +{ +#ifdef VERY_ACCURATE_FP + if (a != a) return a; + if (c != c) return c; + if (b != b) return b; + double t = a * b; + if (t != t) + { + SetFPException(FPSCR_VXIMZ); + return PPC_NAN; + } + t = t + c; + if (t != t) + { + SetFPException(FPSCR_VXISI); + return PPC_NAN; + } + return t; +#else + return NI_add(NI_mul(a, b), c); +#endif +} + +inline double NI_msub(const double a, const double b, const double c) +{ +#ifdef VERY_ACCURATE_FP + if (a != a) return a; + if (c != c) return c; + if (b != b) return b; + double t = a * b; + if (t != t) + { + SetFPException(FPSCR_VXIMZ); + return PPC_NAN; + } + t = t - c; + if (t != t) + { + SetFPException(FPSCR_VXISI); + return PPC_NAN; + } + return t; +#else + return NI_sub(NI_mul(a, b), c); +#endif +} + +// used by stfsXX instructions and ps_rsqrte +inline u32 ConvertToSingle(u64 x) +{ + u32 exp = (x >> 52) & 0x7ff; + if (exp > 896 || (x & ~DOUBLE_SIGN) == 0) + { + return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff); + } + else if (exp >= 874) + { + u32 t = (u32)(0x80000000 | ((x & DOUBLE_FRAC) >> 21)); + t = t >> (905 - exp); + t |= (x >> 32) & 0x80000000; + return t; + } + else + { + // this is said to be undefined + // based on hardware tests + return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff); + } +} + +// used by psq_stXX operations. +inline u32 ConvertToSingleFTZ(u64 x) +{ + u32 exp = (x >> 52) & 0x7ff; + if (exp > 896 || (x & ~DOUBLE_SIGN) == 0) + { + return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff); + } + else + { + return (x >> 32) & 0x80000000; + } +} \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 902c7c11f6..a7a44c1c6b 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -35,13 +35,13 @@ #include "../../Core.h" #include "Interpreter.h" #include "MathUtil.h" +#include "Interpreter_FPUtils.h" using namespace MathUtil; namespace Interpreter { -void UpdateFPSCR(UReg_FPSCR fp); void UpdateSSEState(); // Extremely rare - actually, never seen. @@ -52,9 +52,7 @@ void Helper_UpdateCR1(double _fValue) } void fcmpo(UGeckoInstruction _inst) -{ - // Use FlushToZeroAsFloat() to fix a couple of games - but seriously, - // the real problem should be fixed instead. +{ double fa = rPS0(_inst.FA); double fb = rPS0(_inst.FB); @@ -67,10 +65,17 @@ void fcmpo(UGeckoInstruction _inst) { FPSCR.FX = 1; compareResult = 1; - if (IsSNAN(fa) || IsSNAN(fb)) - FPSCR.VXSNAN = 1; - if (!FPSCR.FEX || IsQNAN(fa) || IsQNAN(fb)) - FPSCR.VXVC = 1; + if (IsSNAN(fa) || IsSNAN(fb)) + { + SetFPException(FPSCR_VXSNAN); + if (FPSCR.VE == 0) + SetFPException(FPSCR_VXVC); + } + else + { + //if (IsQNAN(fa) || IsQNAN(fb)) // this is always true + SetFPException(FPSCR_VXVC); + } } FPSCR.FPRF = compareResult; @@ -78,9 +83,7 @@ void fcmpo(UGeckoInstruction _inst) } void fcmpu(UGeckoInstruction _inst) -{ - // Use FlushToZeroAsFloat() to fix a couple of games - but seriously, - // the real problem should be fixed instead. +{ double fa = rPS0(_inst.FA); double fb = rPS0(_inst.FB); @@ -90,45 +93,75 @@ void fcmpu(UGeckoInstruction _inst) else if (fa > fb) compareResult = 4; else if (fa == fb) compareResult = 2; else - { - FPSCR.FX = 1; + { compareResult = 1; if (IsSNAN(fa) || IsSNAN(fb)) { - FPSCR.VXSNAN = 1; + SetFPException(FPSCR_VXSNAN); } } - FPSCR.FPRF = compareResult; SetCRField(_inst.CRFD, compareResult); } // Apply current rounding mode +// need to investigate this instruction. void fctiwx(UGeckoInstruction _inst) { const double b = rPS0(_inst.FB); u32 value; if (b > (double)0x7fffffff) { - value = 0x7fffffff; - FPSCR.VXCVI = 1; + value = 0x7fffffff; + SetFPException(FPSCR_VXCVI); + FPSCR.FI = 0; + FPSCR.FR = 0; } - else if (b < -(double)0x7fffffff) + else if (b < -(double)0x80000000) { - value = 0x80000000; - FPSCR.VXCVI = 1; + value = 0x80000000; + SetFPException(FPSCR_VXCVI); + FPSCR.FI = 0; + FPSCR.FR = 0; } else { - value = (u32)(s32)_mm_cvtsd_si32(_mm_set_sd(b)); // obey current rounding mode -// double d_value = (double)value; -// bool inexact = (d_value != b); -// FPSCR.FI = inexact ? 1 : 0; -// FPSCR.XX |= FPSCR.FI; -// FPSCR.FR = fabs(d_value) > fabs(b); - } - - //TODO: FR + s32 i; + switch (FPSCR.RN) + { + case 0: // nearest + { + double t = b + 0.5; + i = (s32)t; + if (t - i < 0) i--; + break; + } + case 1: // zero + i = (s32)b; + break; + case 2: // +inf + i = (s32)b; + if (b - i > 0) i++; + break; + case 3: // -inf + i = (s32)b; + if (b - i < 0) i--; + break; + } + value = (u32)i; + double di = i; + if (di == b) + { + FPSCR.FI = 0; + FPSCR.FR = 0; + } + else + { + SetFI(1); + FPSCR.FR = fabs(di) > fabs(b); + } + } + //FPRF undefined riPS0(_inst.FD) = (u64)value; // zero extend @@ -149,25 +182,36 @@ void fctiwzx(UGeckoInstruction _inst) if (b > (double)0x7fffffff) { value = 0x7fffffff; - FPSCR.VXCVI = 1; + SetFPException(FPSCR_VXCVI); + FPSCR.FI = 0; + FPSCR.FR = 0; } - else if (b < -(double)0x7fffffff) + else if (b < -(double)0x80000000) { value = 0x80000000; - FPSCR.VXCVI = 1; + SetFPException(FPSCR_VXCVI); + FPSCR.FI = 0; + FPSCR.FR = 0; } else { - value = (u32)(s32)_mm_cvttsd_si32(_mm_set_sd(b)); // truncate -// double d_value = (double)value; -// bool inexact = (d_value != b); -// FPSCR.FI = inexact ? 1 : 0; -// FPSCR.XX |= FPSCR.FI; -// FPSCR.FR = 1; //fabs(d_value) > fabs(b); + s32 i = (s32)b; + double di = i; + if (di == b) + { + FPSCR.FI = 0; + FPSCR.FR = 0; + } + else + { + SetFI(1); + FPSCR.FR = fabs(di) > fabs(b); + } + value = (u32)i; } riPS0(_inst.FD) = (u64)value; - if (_inst.Rc) + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -206,15 +250,15 @@ void fselx(UGeckoInstruction _inst) if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - // !!! warning !!! // PS1 must be set to the value of PS0 or DragonballZ will be f**ked up // PS1 is said to be undefined void frspx(UGeckoInstruction _inst) // round to single { double b = rPS0(_inst.FB); - double rounded = (double)(float)b; - //FPSCR.FI = b != rounded; + double rounded = ForceSingle(b); + SetFI(b != rounded); + FPSCR.FR = fabs(rounded) > fabs(b); UpdateFPRF(rounded); rPS0(_inst.FD) = rPS1(_inst.FD) = rounded; return; @@ -223,24 +267,26 @@ void frspx(UGeckoInstruction _inst) // round to single void fmulx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC); - FPSCR.FI = 0; - FPSCR.FR = 1; + rPS0(_inst.FD) = ForceDouble(NI_mul(rPS0(_inst.FA), rPS0(_inst.FC))); + FPSCR.FI = 0; // are these flags important? + FPSCR.FR = 0; + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fmulsx(UGeckoInstruction _inst) { - double d_value = rPS0(_inst.FA) * rPS0(_inst.FC); - rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(d_value); - FPSCR.FI = d_value != rPS0(_inst.FD); + double d_value = NI_mul(rPS0(_inst.FA), rPS0(_inst.FC)); + rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(d_value); + //FPSCR.FI = d_value != rPS0(_inst.FD); + FPSCR.FI = 0; + FPSCR.FR = 0; UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - void fmaddx(UGeckoInstruction _inst) { - double result = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); + double result = ForceDouble(NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) )); rPS0(_inst.FD) = result; UpdateFPRF(result); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); @@ -248,85 +294,188 @@ void fmaddx(UGeckoInstruction _inst) void fmaddsx(UGeckoInstruction _inst) { - double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); - rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(d_value); + double d_value = NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ); + rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(d_value); FPSCR.FI = d_value != rPS0(_inst.FD); FPSCR.FR = 0; UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void faddx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB); + rPS0(_inst.FD) = ForceDouble(NI_add(rPS0(_inst.FA), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void faddsx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(rPS0(_inst.FA) + rPS0(_inst.FB)); + rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_add(rPS0(_inst.FA), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - void fdivx(UGeckoInstruction _inst) { double a = rPS0(_inst.FA); double b = rPS0(_inst.FB); - rPS0(_inst.FD) = a / b; - if (b == 0.0) { - if (!FPSCR.ZX) - FPSCR.FX = 1; - FPSCR.ZX = 1; - FPSCR.XX = 1; + if (a != a) rPS0(_inst.FD) = a; + else if (b != b) rPS0(_inst.FD) = b; + else + { + rPS0(_inst.FD) = ForceDouble(a / b); + if (b == 0.0) + { + if (a == 0.0) + { + SetFPException(FPSCR_VXZDZ); + rPS0(_inst.FD) = PPC_NAN; + } + SetFPException(FPSCR_ZX); + } + else + { + if (IsINF(a) && IsINF(b)) + { + SetFPException(FPSCR_VXIDI); + rPS0(_inst.FD) = PPC_NAN; + } + } } UpdateFPRF(rPS0(_inst.FD)); + // FR,FI,OX,UX??? if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fdivsx(UGeckoInstruction _inst) { double a = rPS0(_inst.FA); double b = rPS0(_inst.FB); - rPS0(_inst.FD) = rPS1(_inst.FD) = (float)(a / b); - if (b == 0.0) + double res; + if (a != a) res = a; + else if (b != b) res = b; + else { - if (!FPSCR.ZX) - FPSCR.FX = 1; - FPSCR.ZX = 1; - FPSCR.XX = 1; + res = ForceSingle(a / b); + if (b == 0.0) + { + if (a == 0.0) + { + SetFPException(FPSCR_VXZDZ); + res = PPC_NAN; + } + SetFPException(FPSCR_ZX); + } + else + { + if (IsINF(a) && IsINF(b)) + { + SetFPException(FPSCR_VXIDI); + res = PPC_NAN; + } + } } + rPS0(_inst.FD) = rPS1(_inst.FD) = res; UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } // Single precision only. void fresx(UGeckoInstruction _inst) { - float b = (float)rPS0(_inst.FB); - float one_over = 1.0f / b; - rPS0(_inst.FD) = rPS1(_inst.FD) = one_over; + double b = rPS0(_inst.FB); + double one_over = ForceSingle(1.0 / b); + // this is based on the real hardware tests + if (b != 0.0 && IsINF(one_over)) + { + if (one_over > 0) + riPS0(_inst.FD) = riPS1(_inst.FD) = MAX_SINGLE; + else + riPS0(_inst.FD) = riPS1(_inst.FD) = MIN_SINGLE; + } + else + { + rPS0(_inst.FD) = rPS1(_inst.FD) = one_over; + } if (b == 0.0) { - if (!FPSCR.ZX) - FPSCR.FX = 1; - FPSCR.ZX = 1; - FPSCR.XX = 1; + SetFPException(FPSCR_ZX); } UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void frsqrtex(UGeckoInstruction _inst) { - float b = (float)rPS0(_inst.FB); - if (b < 0.0) { - FPSCR.VXSQRT = 1; - } else if (b == 0) { - FPSCR.ZX = 1; + double b = rPS0(_inst.FB); + if (b < 0.0) + { + SetFPException(FPSCR_VXSQRT); + rPS0(_inst.FD) = PPC_NAN; + } + else + { + if (b == 0.0) SetFPException(FPSCR_ZX); + rPS0(_inst.FD) = ForceDouble(1.0 / sqrt(b)); } - rPS0(_inst.FD) = 1.0f / sqrtf(b); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} + +void fmsubx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = ForceDouble(NI_msub( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) )); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} + +void fmsubsx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = rPS1(_inst.FD) = + ForceSingle( NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) )); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} + + +void fnmaddx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = ForceDouble(0.0-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} +void fnmaddsx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = rPS1(_inst.FD) = + ForceSingle(0.0-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} + +void fnmsubx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = ForceDouble(0.0-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} +void fnmsubsx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = rPS1(_inst.FD) = + ForceSingle(0.0-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} + + +void fsubx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = ForceDouble(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); +} +void fsubsx(UGeckoInstruction _inst) +{ + rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -344,63 +493,4 @@ void fsqrtx(UGeckoInstruction _inst) if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } -void fmsubx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} - -void fmsubsx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = rPS1(_inst.FD) = - static_cast((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} - - -void fnmaddx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} -void fnmaddsx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = rPS1(_inst.FD) = - static_cast(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB))); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} - - -void fnmsubx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} -void fnmsubsx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = rPS1(_inst.FD) = - static_cast(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB))); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} - - -void fsubx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} -void fsubsx(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(rPS0(_inst.FA) - rPS0(_inst.FB)); - UpdateFPRF(rPS0(_inst.FD)); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); -} - } // namespace diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 9c004fb5cb..10d9cc7b00 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -28,6 +28,8 @@ #include "../Jit64/Jit.h" #include "../JitCommon/JitCache.h" +#include "Interpreter_FPUtils.h" + namespace Interpreter { @@ -234,16 +236,16 @@ void stfdu(UGeckoInstruction _inst) void stfs(UGeckoInstruction _inst) { - double value = rPS0(_inst.FS); - float fTemp = (float)value; - Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst)); + //double value = rPS0(_inst.FS); + //float fTemp = (float)value; + //Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst)); + Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA(_inst)); } void stfsu(UGeckoInstruction _inst) -{ - float fTemp = (float)rPS0(_inst.FS); - u32 uAddress = Helper_Get_EA_U(_inst); - Memory::Write_U32(*(u32*)&fTemp, uAddress); +{ + u32 uAddress = Helper_Get_EA_U(_inst); + Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), uAddress); m_GPR[_inst.RA] = uAddress; } @@ -465,19 +467,15 @@ void stfiwx(UGeckoInstruction _inst) void stfsux(UGeckoInstruction _inst) -{ - double value = rPS0(_inst.FS); - float fTemp = (float)value; - u32 uAddress = Helper_Get_EA_UX(_inst); - Memory::Write_U32(*(u32*)&fTemp, uAddress); +{ + u32 uAddress = Helper_Get_EA_UX(_inst); + Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), uAddress); m_GPR[_inst.RA] = uAddress; } void stfsx(UGeckoInstruction _inst) -{ - double value = rPS0(_inst.FS); - float fTemp = (float)value; - Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst)); +{ + Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA_X(_inst)); } void sthbrx(UGeckoInstruction _inst) diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index 9a754a448d..c0fe7008b3 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -19,6 +19,8 @@ #include "Interpreter.h" #include "../../HW/Memmap.h" +#include "Interpreter_FPUtils.h" + namespace Interpreter { @@ -71,40 +73,40 @@ inline T CLAMP(T a, T bottom, T top) { return a; } -void Helper_Quantize(const u32 _Addr, const float _fValue, +void Helper_Quantize(const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned int _uScale) { switch(_quantizeType) { - case QUANTIZE_FLOAT: - Memory::Write_U32(*(u32*)&_fValue,_Addr); + case QUANTIZE_FLOAT: + Memory::Write_U32( ConvertToSingleFTZ( *(u64*)&_fValue ), _Addr ); break; // used for THP player case QUANTIZE_U8: { - float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f); + float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f); Memory::Write_U8((u8)fResult, _Addr); } break; case QUANTIZE_U16: { - float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f); + float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f); Memory::Write_U16((u16)fResult, _Addr); } break; case QUANTIZE_S8: { - float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f); + float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f); Memory::Write_U8((u8)(s8)fResult, _Addr); } break; case QUANTIZE_S16: { - float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f); + float fResult = CLAMP((float)_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f); Memory::Write_U16((u16)(s16)fResult, _Addr); } break; @@ -215,8 +217,8 @@ void psq_st(UGeckoInstruction _inst) if (_inst.W == 0) { - Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale ); - Helper_Quantize( EA+c, (float)rPS1(_inst.RS), stType, stScale ); + Helper_Quantize( EA, rPS0(_inst.RS), stType, stScale ); + Helper_Quantize( EA+c, rPS1(_inst.RS), stType, stScale ); } else { @@ -237,12 +239,12 @@ void psq_stu(UGeckoInstruction _inst) if (_inst.W == 0) { - Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale); + Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); + Helper_Quantize(EA+c, rPS1(_inst.RS), stType, stScale); } else { - Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); + Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); } m_GPR[_inst.RA] = EA; } @@ -283,12 +285,12 @@ void psq_stx(UGeckoInstruction _inst) if (_inst.Wx == 0) { - Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale); + Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); + Helper_Quantize(EA+c, rPS1(_inst.RS), stType, stScale); } else { - Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); + Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); } } @@ -329,12 +331,12 @@ void psq_stux(UGeckoInstruction _inst) if (_inst.Wx == 0) { - Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale); + Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); + Helper_Quantize(EA+c, rPS1(_inst.RS), stType, stScale); } else { - Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale); + Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); } m_GPR[_inst.RA] = EA; diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp index e6c1816a1f..ce035f48ee 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -21,6 +21,8 @@ #include "Interpreter.h" #include "../../HW/Memmap.h" +#include "Interpreter_FPUtils.h" + using namespace MathUtil; namespace Interpreter @@ -99,140 +101,272 @@ void ps_merge11(UGeckoInstruction _inst) if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - // From here on, the real deal. - void ps_div(UGeckoInstruction _inst) -{ - rPS0(_inst.FD) = static_cast(rPS0(_inst.FA) / rPS0(_inst.FB)); - rPS1(_inst.FD) = static_cast(rPS1(_inst.FA) / rPS1(_inst.FB)); - if (fabs(rPS0(_inst.FB)) == 0.0) { - FPSCR.ZX = 1; +{ + u32 ex_mask = 0; + + // PS0 + { + double a = rPS0(_inst.FA); + double b = rPS0(_inst.FB); + double &res = rPS0(_inst.FD); + + if (a != a) res = a; + else if (b != b) res = b; + else + { + if (b == 0.0) + { + ex_mask |= FPSCR_ZX; + if (rPS0(_inst.FA) == 0.0) + { + ex_mask |= FPSCR_VXZDZ; + res = PPC_NAN; + } + else + { + res = ForceSingle(a / b); + } + } + else + { + if (IsINF(a) && IsINF(b)) + { + ex_mask |= FPSCR_VXIDI; + res = PPC_NAN; + } + else + { + res = ForceSingle(a / b); + } + } + } } + + // PS1 + { + double a = rPS1(_inst.FA); + double b = rPS1(_inst.FB); + double &res = rPS1(_inst.FD); + + if (a != a) res = a; + else if (b != b) res = b; + else + { + if (b == 0.0) + { + ex_mask |= FPSCR_ZX; + if (rPS0(_inst.FA) == 0.0) + { + ex_mask |= FPSCR_VXZDZ; + res = PPC_NAN; + } + else + { + res = ForceSingle(a / b); + } + } + else + { + if (IsINF(a) && IsINF(b)) + { + ex_mask |= FPSCR_VXIDI; + res = PPC_NAN; + } + else + { + res = ForceSingle(a / b); + } + } + } + } + + SetFPException(ex_mask); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_res(UGeckoInstruction _inst) { - rPS0(_inst.FD) = 1.0f / static_cast(rPS0(_inst.FB)); - rPS1(_inst.FD) = 1.0f / static_cast(rPS1(_inst.FB)); - if (fabs(rPS0(_inst.FB)) == 0.0) { - FPSCR.ZX = 1; + // this code is based on the real hardware tests + double a = rPS0(_inst.FB); + double b = rPS1(_inst.FB); + if (a == 0.0 || b == 0.0) + { + SetFPException(FPSCR_ZX); } + rPS0(_inst.FD) = ForceSingle(1.0 / a); + if (a != 0.0 && IsINF(rPS0(_inst.FD))) + { + if (rPS0(_inst.FD) > 0) + riPS0(_inst.FD) = MAX_SINGLE; // largest finite single + else + riPS0(_inst.FD) = MIN_SINGLE; // most negative finite single + } + rPS1(_inst.FD) = ForceSingle(1.0 / b); + if (b != 0.0 && IsINF(rPS1(_inst.FD))) + { + if (rPS1(_inst.FD) > 0) + riPS1(_inst.FD) = MAX_SINGLE; + else + riPS1(_inst.FD) = MIN_SINGLE; + } + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_rsqrte(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast(1.0f / sqrtf((float)rPS0(_inst.FB))); - rPS1(_inst.FD) = static_cast(1.0f / sqrtf((float)rPS1(_inst.FB))); - if (fabs(rPS0(_inst.FB)) == 0.0) { - FPSCR.ZX = 1; + // this code is based on the real hardware tests + if (rPS0(_inst.FB) == 0.0 || rPS1(_inst.FB) == 0.0) + { + SetFPException(FPSCR_ZX); } + // PS0 + if (rPS0(_inst.FB) < 0.0) + { + SetFPException(FPSCR_VXSQRT); + rPS0(_inst.FD) = PPC_NAN; + } + else + { + rPS0(_inst.FD) = 1.0 / sqrt(rPS0(_inst.FB)); + u32 t = ConvertToSingle(riPS0(_inst.FD)); + rPS0(_inst.FD) = *(float*)&t; + } + // PS1 + if (rPS1(_inst.FB) < 0.0) + { + SetFPException(FPSCR_VXSQRT); + rPS1(_inst.FD) = PPC_NAN; + } + else + { + rPS1(_inst.FD) = 1.0 / sqrt(rPS1(_inst.FB)); + u32 t = ConvertToSingle(riPS1(_inst.FD)); + rPS1(_inst.FD) = *(float*)&t; + } + + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } + void ps_sub(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast(rPS0(_inst.FA) - rPS0(_inst.FB)); - rPS1(_inst.FD) = static_cast(rPS1(_inst.FA) - rPS1(_inst.FB)); + rPS0(_inst.FD) = ForceSingle(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB))); + rPS1(_inst.FD) = ForceSingle(NI_sub(rPS1(_inst.FA), rPS1(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_add(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast(rPS0(_inst.FA) + rPS0(_inst.FB)); - rPS1(_inst.FD) = static_cast(rPS1(_inst.FA) + rPS1(_inst.FB)); + rPS0(_inst.FD) = ForceSingle(NI_add(rPS0(_inst.FA), rPS0(_inst.FB))); + rPS1(_inst.FD) = ForceSingle(NI_add(rPS1(_inst.FA), rPS1(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_mul(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast(rPS0(_inst.FA) * rPS0(_inst.FC)); - rPS1(_inst.FD) = static_cast(rPS1(_inst.FA) * rPS1(_inst.FC)); + rPS0(_inst.FD) = ForceSingle(NI_mul(rPS0(_inst.FA), rPS0(_inst.FC))); + rPS1(_inst.FD) = ForceSingle(NI_mul(rPS1(_inst.FA), rPS1(_inst.FC))); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_msub(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)); - rPS1(_inst.FD) = static_cast((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB)); + rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_madd(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)); - rPS1(_inst.FD) = static_cast((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB)); + rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB))); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_nmsub(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB))); - rPS1(_inst.FD) = static_cast(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB))); + rPS0(_inst.FD) = ForceSingle( 0.0-NI_msub( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) ); + rPS1(_inst.FD) = ForceSingle( 0.0-NI_msub( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) ); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_nmadd(UGeckoInstruction _inst) { - rPS0(_inst.FD) = static_cast(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB))); - rPS1(_inst.FD) = static_cast(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB))); + rPS0(_inst.FD) = ForceSingle( 0.0-NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) ); + rPS1(_inst.FD) = ForceSingle( 0.0-NI_madd( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) ); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_sum0(UGeckoInstruction _inst) { - double p0 = (float)(rPS0(_inst.FA) + rPS1(_inst.FB)); - double p1 = (float)(rPS1(_inst.FC)); + double p0 = ForceSingle(NI_add(rPS0(_inst.FA), rPS1(_inst.FB))); + double p1 = ForceSingle(rPS1(_inst.FC)); rPS0(_inst.FD) = p0; rPS1(_inst.FD) = p1; + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_sum1(UGeckoInstruction _inst) { - double p0 = rPS0(_inst.FC); - double p1 = rPS0(_inst.FA) + rPS1(_inst.FB); + double p0 = ForceSingle(rPS0(_inst.FC)); + double p1 = ForceSingle(NI_add(rPS0(_inst.FA), rPS1(_inst.FB))); rPS0(_inst.FD) = p0; rPS1(_inst.FD) = p1; + UpdateFPRF(rPS1(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_muls0(UGeckoInstruction _inst) { - double p0 = rPS0(_inst.FA) * rPS0(_inst.FC); - double p1 = rPS1(_inst.FA) * rPS0(_inst.FC); + double p0 = ForceSingle(NI_mul(rPS0(_inst.FA), rPS0(_inst.FC))); + double p1 = ForceSingle(NI_mul(rPS1(_inst.FA), rPS0(_inst.FC))); rPS0(_inst.FD) = p0; rPS1(_inst.FD) = p1; + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_muls1(UGeckoInstruction _inst) { - double p0 = rPS0(_inst.FA) * rPS1(_inst.FC); - double p1 = rPS1(_inst.FA) * rPS1(_inst.FC); + double p0 = ForceSingle(NI_mul(rPS0(_inst.FA), rPS1(_inst.FC))); + double p1 = ForceSingle(NI_mul(rPS1(_inst.FA), rPS1(_inst.FC))); rPS0(_inst.FD) = p0; rPS1(_inst.FD) = p1; + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_madds0(UGeckoInstruction _inst) { - double p0 = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB); - double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB); + double p0 = ForceSingle( NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)) ); + double p1 = ForceSingle( NI_madd( rPS1(_inst.FA), rPS0(_inst.FC), rPS1(_inst.FB)) ); rPS0(_inst.FD) = p0; rPS1(_inst.FD) = p1; + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_madds1(UGeckoInstruction _inst) { - double p0 = (rPS0(_inst.FA) * rPS1(_inst.FC)) + rPS0(_inst.FB); - double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB); + double p0 = ForceSingle( NI_madd( rPS0(_inst.FA), rPS1(_inst.FC), rPS0(_inst.FB)) ); + double p1 = ForceSingle( NI_madd( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB)) ); rPS0(_inst.FD) = p0; rPS1(_inst.FD) = p1; + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -250,17 +384,15 @@ void ps_cmpu0(UGeckoInstruction _inst) compareResult = 1; if (IsSNAN(fa) || IsSNAN(fb)) { - FPSCR.VXSNAN = 1; + SetFPException(FPSCR_VXSNAN); } } - SetCRField(_inst.CRFD, compareResult); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + FPSCR.FPRF = compareResult; + SetCRField(_inst.CRFD, compareResult); } void ps_cmpo0(UGeckoInstruction _inst) -{ - // Ector, please check - //ps_cmpu0(_inst); +{ double fa = rPS0(_inst.FA); double fb = rPS0(_inst.FB); int compareResult; @@ -268,19 +400,23 @@ void ps_cmpo0(UGeckoInstruction _inst) if (fa < fb) compareResult = 8; else if (fa > fb) compareResult = 4; else if (fa == fb) compareResult = 2; - else + else { compareResult = 1; if (IsSNAN(fa) || IsSNAN(fb)) { - FPSCR.VXSNAN = 1; - if (!FPSCR.FEX) FPSCR.VXVC = 1; + SetFPException(FPSCR_VXSNAN); + if (!FPSCR.VE) + SetFPException(FPSCR_VXVC); + } + else + { + //if (IsQNAN(fa) || IsQNAN(fb)) // this is always true + SetFPException(FPSCR_VXVC); } - else if (IsQNAN(fa) || IsQNAN(fb)) - FPSCR.VXVC = 1; } - SetCRField(_inst.CRFD, compareResult); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + FPSCR.FPRF = compareResult; + SetCRField(_inst.CRFD, compareResult); } void ps_cmpu1(UGeckoInstruction _inst) @@ -297,17 +433,15 @@ void ps_cmpu1(UGeckoInstruction _inst) compareResult = 1; if (IsSNAN(fa) || IsSNAN(fb)) { - FPSCR.VXSNAN = 1; + SetFPException(FPSCR_VXSNAN); } } - SetCRField(_inst.CRFD, compareResult); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + FPSCR.FPRF = compareResult; + SetCRField(_inst.CRFD, compareResult); } void ps_cmpo1(UGeckoInstruction _inst) -{ - // Ector, please check - //ps_cmpu1(_inst); +{ double fa = rPS1(_inst.FA); double fb = rPS1(_inst.FB); int compareResult; @@ -315,19 +449,23 @@ void ps_cmpo1(UGeckoInstruction _inst) if (fa < fb) compareResult = 8; else if (fa > fb) compareResult = 4; else if (fa == fb) compareResult = 2; - else + else { compareResult = 1; if (IsSNAN(fa) || IsSNAN(fb)) { - FPSCR.VXSNAN = 1; - if (!FPSCR.FEX) FPSCR.VXVC = 1; + SetFPException(FPSCR_VXSNAN); + if (!FPSCR.VE) + SetFPException(FPSCR_VXVC); + } + else + { + //if (IsQNAN(fa) || IsQNAN(fb)) // this is always true + SetFPException(FPSCR_VXVC); } - else if (IsQNAN(fa) || IsQNAN(fb)) - FPSCR.VXVC = 1; } - SetCRField(_inst.CRFD, compareResult); - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + FPSCR.FPRF = compareResult; + SetCRField(_inst.CRFD, compareResult); } // __________________________________________________________________________________________________ diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 23da436bc8..32067dd873 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -43,6 +43,8 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10; #include "../../Core.h" #include "Interpreter.h" +#include "Interpreter_FPUtils.h" + /* Most of these are together with fctiwx @@ -95,8 +97,8 @@ void FPSCRtoFPUSettings(UReg_FPSCR fp) #endif if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE) { - //PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i", - // fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE); + PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i", + fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE); // Pokemon Colosseum does this. Gah. } @@ -119,60 +121,29 @@ void FPSCRtoFPUSettings(UReg_FPSCR fp) _mm_setcsr(csr); } -void mcrfs(UGeckoInstruction _inst) -{ - u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF); - switch (_inst.CRFS) { - case 0: - FPSCR.FX = 0; - FPSCR.OX = 0; - break; - case 1: - FPSCR.UX = 0; - FPSCR.ZX = 0; - FPSCR.XX = 0; - FPSCR.VXSNAN = 0; - break; - case 2: - FPSCR.VXISI = 0; - FPSCR.VXIDI = 0; - FPSCR.VXZDZ = 0; - FPSCR.VXIMZ = 0; - break; - case 3: - FPSCR.VXVC = 0; - break; - case 5: - FPSCR.VXSOFT = 0; - FPSCR.VXSQRT = 0; - FPSCR.VXCVI = 0; - break; - } - SetCRField(_inst.CRFD, fpflags); - FPSCRtoFPUSettings(FPSCR); -} - -void mffsx(UGeckoInstruction _inst) -{ - // load from FPSCR - // This may or may not be accurate - but better than nothing, I guess - // TODO(ector): grab all overflow flags etc and set them in FPSCR - - riPS0(_inst.FD) = (u64)FPSCR.Hex; - if (_inst.Rc) PanicAlert("mffsx: inst_.Rc"); -} - void mtfsb0x(UGeckoInstruction _inst) { - FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD)); + u32 b = 0x80000000 >> _inst.CRBD; + + /*if (b & 0x9ff80700) + PanicAlert("mtfsb0 clears bit %d, PC=%x", _inst.CRBD, PC);*/ + + FPSCR.Hex &= ~b; FPSCRtoFPUSettings(FPSCR); + if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc"); } void mtfsb1x(UGeckoInstruction _inst) { - FPSCR.Hex |= 0x80000000 >> _inst.CRBD; - FPSCRtoFPUSettings(FPSCR); + // this instruction can affect FX + u32 b = 0x80000000 >> _inst.CRBD; + if (b & FPSCR_ANY_X) + SetFPException(b); + else + FPSCR.Hex |= b; + FPSCRtoFPUSettings(FPSCR); + if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc"); } @@ -180,8 +151,15 @@ void mtfsfix(UGeckoInstruction _inst) { u32 mask = (0xF0000000 >> (4 * _inst.CRFD)); u32 imm = (_inst.hex << 16) & 0xF0000000; + + /*u32 cleared = ~(imm >> (4 * _inst.CRFD)) & FPSCR.Hex & mask; + if (cleared & 0x9ff80700) + PanicAlert("mtfsfi clears %08x, PC=%x", cleared, PC);*/ + FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD)); + FPSCRtoFPUSettings(FPSCR); + if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc"); } @@ -189,14 +167,19 @@ void mtfsfx(UGeckoInstruction _inst) { u32 fm = _inst.FM; u32 m = 0; - for (int i = 0; i < 8; i++) //7?? todo check + for (int i = 0; i < 8; i++) { if (fm & (1 << i)) m |= (0xF << (i * 4)); } + /*u32 cleared = ~((u32)(riPS0(_inst.FB))) & FPSCR.Hex & m; + if (cleared & 0x9ff80700) + PanicAlert("mtfsf clears %08x, PC=%x", cleared, PC);*/ + FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m); FPSCRtoFPUSettings(FPSCR); + if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc"); } @@ -458,4 +441,53 @@ void isync(UGeckoInstruction _inst) //shouldnt do anything } +// the following commands read from FPSCR + +void mcrfs(UGeckoInstruction _inst) +{ + //if (_inst.CRFS != 3 && _inst.CRFS != 4) + // PanicAlert("msrfs at %x, CRFS = %d, CRFD = %d", PC, (int)_inst.CRFS, (int)_inst.CRFD); + + UpdateFPSCR(); + u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF); + switch (_inst.CRFS) { + case 0: + FPSCR.FX = 0; + FPSCR.OX = 0; + break; + case 1: + FPSCR.UX = 0; + FPSCR.ZX = 0; + FPSCR.XX = 0; + FPSCR.VXSNAN = 0; + break; + case 2: + FPSCR.VXISI = 0; + FPSCR.VXIDI = 0; + FPSCR.VXZDZ = 0; + FPSCR.VXIMZ = 0; + break; + case 3: + FPSCR.VXVC = 0; + break; + case 5: + FPSCR.VXSOFT = 0; + FPSCR.VXSQRT = 0; + FPSCR.VXCVI = 0; + break; + } + SetCRField(_inst.CRFD, fpflags); +} + +void mffsx(UGeckoInstruction _inst) +{ + // load from FPSCR + // This may or may not be accurate - but better than nothing, I guess + // TODO(ector): grab all overflow flags etc and set them in FPSCR + + UpdateFPSCR(); + riPS0(_inst.FD) = (u64)FPSCR.Hex; + if (_inst.Rc) PanicAlert("mffsx: inst_.Rc"); +} + } // namespace