This commit is contained in:
Nekotekina 2015-01-19 00:29:41 +03:00
commit 87f1a9d9dc
9 changed files with 805 additions and 205 deletions

10
rpcs3/Emu/Cell/Common.h Normal file
View File

@ -0,0 +1,10 @@
#pragma once
// Floating-point rounding mode (for both PPU and SPU)
enum FPSCR_RN
{
FPSCR_RN_NEAR = 0,
FPSCR_RN_ZERO = 1,
FPSCR_RN_PINF = 2,
FPSCR_RN_MINF = 3,
};

View File

@ -17,6 +17,8 @@
#define _rotl64(x,r) (((u64)(x) << (r)) | ((u64)(x) >> (64 - (r)))) #define _rotl64(x,r) (((u64)(x) << (r)) | ((u64)(x) >> (64 - (r))))
#endif #endif
#include <fenv.h>
#if 0//def _DEBUG #if 0//def _DEBUG
#define HLE_CALL_DEBUG #define HLE_CALL_DEBUG
#endif #endif
@ -60,6 +62,26 @@ static double SilenceNaN(double x)
return (double&)bits; return (double&)bits;
} }
static void SetHostRoundingMode(u32 rn)
{
switch (rn)
{
case FPSCR_RN_NEAR:
fesetround(FE_TONEAREST);
break;
case FPSCR_RN_ZERO:
fesetround(FE_TOWARDZERO);
break;
case FPSCR_RN_PINF:
fesetround(FE_UPWARD);
break;
case FPSCR_RN_MINF:
fesetround(FE_DOWNWARD);
break;
}
}
namespace ppu_recompiler_llvm { namespace ppu_recompiler_llvm {
class Compiler; class Compiler;
} }
@ -79,6 +101,13 @@ public:
} }
private: private:
void CheckHostFPExceptions()
{
CPU.SetFPSCR_FI(fetestexcept(FE_INEXACT) != 0);
if (fetestexcept(FE_UNDERFLOW)) CPU.SetFPSCRException(FPSCR_UX);
if (fetestexcept(FE_OVERFLOW)) CPU.SetFPSCRException(FPSCR_OX);
}
void Exit() {} void Exit() {}
void SysCall() void SysCall()
@ -245,6 +274,7 @@ private:
} }
void VADDFP(u32 vd, u32 va, u32 vb) void VADDFP(u32 vd, u32 va, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]);
@ -441,6 +471,7 @@ private:
} }
void VCFSX(u32 vd, u32 uimm5, u32 vb) void VCFSX(u32 vd, u32 uimm5, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
u32 scale = 1 << uimm5; u32 scale = 1 << uimm5;
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
@ -450,6 +481,7 @@ private:
} }
void VCFUX(u32 vd, u32 uimm5, u32 vb) void VCFUX(u32 vd, u32 uimm5, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
u32 scale = 1 << uimm5; u32 scale = 1 << uimm5;
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
@ -786,8 +818,8 @@ private:
CPU.VPR[vd]._s32[w] = (int)0x80000000; CPU.VPR[vd]._s32[w] = (int)0x80000000;
CPU.VSCR.SAT = 1; CPU.VSCR.SAT = 1;
} }
else // C rounding = Round towards 0 else
CPU.VPR[vd]._s32[w] = (int)result; CPU.VPR[vd]._s32[w] = (int)trunc(result);
} }
} }
} }
@ -804,7 +836,6 @@ private:
} }
else else
{ {
// C rounding = Round towards 0
double result = (double)b * nScale; double result = (double)b * nScale;
if (result > 0xffffffffu) if (result > 0xffffffffu)
{ {
@ -817,7 +848,7 @@ private:
CPU.VSCR.SAT = 1; CPU.VSCR.SAT = 1;
} }
else else
CPU.VPR[vd]._u32[w] = (u32)result; CPU.VPR[vd]._u32[w] = (u32)trunc(result);
} }
} }
} }
@ -826,6 +857,7 @@ private:
// vd = 2^x // vd = 2^x
// ISA : Note that the value placed into the element of vD may vary between implementations // ISA : Note that the value placed into the element of vD may vary between implementations
// and between different executions on the same implementation. // and between different executions on the same implementation.
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]);
@ -839,6 +871,7 @@ private:
{ {
// ISA : Note that the value placed into the element of vD may vary between implementations // ISA : Note that the value placed into the element of vD may vary between implementations
// and between different executions on the same implementation. // and between different executions on the same implementation.
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]);
@ -850,6 +883,7 @@ private:
} }
void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]);
@ -1275,6 +1309,7 @@ private:
} }
void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]);
@ -1568,6 +1603,7 @@ private:
} }
void VREFP(u32 vd, u32 vb) void VREFP(u32 vd, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]); const float b = CheckVSCR_NJ(CPU.VPR[vb]._f[w]);
@ -1596,7 +1632,10 @@ private:
if (std::isnan(b)) if (std::isnan(b))
CPU.VPR[vd]._f[w] = SilenceNaN(b); CPU.VPR[vd]._f[w] = SilenceNaN(b);
else else
{
SetHostRoundingMode(FPSCR_RN_NEAR);
CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]); CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]);
}
} }
} }
void VRFIP(u32 vd, u32 vb) void VRFIP(u32 vd, u32 vb)
@ -1646,6 +1685,7 @@ private:
} }
void VRSQRTEFP(u32 vd, u32 vb) void VRSQRTEFP(u32 vd, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
//TODO: accurate div //TODO: accurate div
@ -1846,6 +1886,7 @@ private:
} }
void VSUBFP(u32 vd, u32 va, u32 vb) void VSUBFP(u32 vd, u32 va, u32 vb)
{ {
SetHostRoundingMode(FPSCR_RN_NEAR);
for (uint w = 0; w < 4; w++) for (uint w = 0; w < 4; w++)
{ {
const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]); const float a = CheckVSCR_NJ(CPU.VPR[va]._f[w]);
@ -3663,6 +3704,7 @@ private:
void FSQRTS(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, true);} void FSQRTS(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, true);}
void FRES(u32 frd, u32 frb, bool rc) void FRES(u32 frd, u32 frb, bool rc)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if(FPRdouble::IsSNaN(b)) if(FPRdouble::IsSNaN(b))
{ {
@ -3693,7 +3735,9 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
CPU.FPR[frd] = static_cast<float>(1.0 / b); CPU.FPR[frd] = static_cast<float>(1.0 / b);
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
if(rc) CPU.UpdateCR1(); if(rc) CPU.UpdateCR1();
@ -3797,6 +3841,7 @@ private:
} }
void FRSP(u32 frd, u32 frb, bool rc) void FRSP(u32 frd, u32 frb, bool rc)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if (FPRdouble::IsSNaN(b)) if (FPRdouble::IsSNaN(b))
{ {
@ -3814,6 +3859,7 @@ private:
{ {
if (((u64&)b0 & DOUBLE_EXP) < 0x3800000000000000ULL) (u64&)b0 &= DOUBLE_SIGN; if (((u64&)b0 & DOUBLE_EXP) < 0x3800000000000000ULL) (u64&)b0 &= DOUBLE_SIGN;
} }
feclearexcept(FE_ALL_EXCEPT);
const double r = static_cast<float>(b0); const double r = static_cast<float>(b0);
if (FPRdouble::IsNaN(r)) if (FPRdouble::IsNaN(r))
{ {
@ -3823,7 +3869,7 @@ private:
else else
{ {
CPU.FPSCR.FR = fabs(r) > fabs(b); CPU.FPSCR.FR = fabs(r) > fabs(b);
CPU.SetFPSCR_FI(b != r); CheckHostFPExceptions();
} }
u32 type = PPCdouble(r).GetType(); u32 type = PPCdouble(r).GetType();
if (type == FPR_PN && r < ldexp(1.0, -126)) type = FPR_PD; if (type == FPR_PN && r < ldexp(1.0, -126)) type = FPR_PD;
@ -3869,6 +3915,7 @@ private:
switch(rn) switch(rn)
{ {
case FPSCR_RN_NEAR: case FPSCR_RN_NEAR:
SetHostRoundingMode(FPSCR_RN_NEAR);
i = (s32)nearbyint(b); i = (s32)nearbyint(b);
break; break;
case FPSCR_RN_ZERO: case FPSCR_RN_ZERO:
@ -3902,6 +3949,7 @@ private:
void FDIV(u32 frd, u32 fra, u32 frb, bool rc) {FDIV(frd, fra, frb, rc, false);} void FDIV(u32 frd, u32 fra, u32 frb, bool rc) {FDIV(frd, fra, frb, rc, false);}
void FDIV(u32 frd, u32 fra, u32 frb, bool rc, bool single) void FDIV(u32 frd, u32 fra, u32 frb, bool rc, bool single)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double a = CPU.FPR[fra]; const double a = CPU.FPR[fra];
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b)) if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b))
@ -3960,9 +4008,11 @@ private:
return; return;
} }
} }
feclearexcept(FE_ALL_EXCEPT);
const double res = a / b; const double res = a / b;
if(single) CPU.FPR[frd] = (float)res; if(single) CPU.FPR[frd] = (float)res;
else CPU.FPR[frd] = res; else CPU.FPR[frd] = res;
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
@ -3971,6 +4021,7 @@ private:
void FSUB(u32 frd, u32 fra, u32 frb, bool rc) {FSUB(frd, fra, frb, rc, false);} void FSUB(u32 frd, u32 fra, u32 frb, bool rc) {FSUB(frd, fra, frb, rc, false);}
void FSUB(u32 frd, u32 fra, u32 frb, bool rc, bool single) void FSUB(u32 frd, u32 fra, u32 frb, bool rc, bool single)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double a = CPU.FPR[fra]; const double a = CPU.FPR[fra];
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b)) if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b))
@ -4006,9 +4057,11 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
const double res = a - b; const double res = a - b;
if(single) CPU.FPR[frd] = (float)res; if(single) CPU.FPR[frd] = (float)res;
else CPU.FPR[frd] = res; else CPU.FPR[frd] = res;
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
if(rc) CPU.UpdateCR1(); if(rc) CPU.UpdateCR1();
@ -4016,6 +4069,7 @@ private:
void FADD(u32 frd, u32 fra, u32 frb, bool rc) {FADD(frd, fra, frb, rc, false);} void FADD(u32 frd, u32 fra, u32 frb, bool rc) {FADD(frd, fra, frb, rc, false);}
void FADD(u32 frd, u32 fra, u32 frb, bool rc, bool single) void FADD(u32 frd, u32 fra, u32 frb, bool rc, bool single)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double a = CPU.FPR[fra]; const double a = CPU.FPR[fra];
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b)) if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(b))
@ -4051,9 +4105,11 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
const double res = a + b; const double res = a + b;
if(single) CPU.FPR[frd] = (float)res; if(single) CPU.FPR[frd] = (float)res;
else CPU.FPR[frd] = res; else CPU.FPR[frd] = res;
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
if(rc) CPU.UpdateCR1(); if(rc) CPU.UpdateCR1();
@ -4061,6 +4117,7 @@ private:
void FSQRT(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, false);} void FSQRT(u32 frd, u32 frb, bool rc) {FSQRT(frd, frb, rc, false);}
void FSQRT(u32 frd, u32 frb, bool rc, bool single) void FSQRT(u32 frd, u32 frb, bool rc, bool single)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if(FPRdouble::IsSNaN(b)) if(FPRdouble::IsSNaN(b))
{ {
@ -4091,9 +4148,11 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
const double res = sqrt(b); const double res = sqrt(b);
if(single) CPU.FPR[frd] = (float)res; if(single) CPU.FPR[frd] = (float)res;
else CPU.FPR[frd] = res; else CPU.FPR[frd] = res;
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
if(rc) CPU.UpdateCR1(); if(rc) CPU.UpdateCR1();
@ -4106,6 +4165,7 @@ private:
void FMUL(u32 frd, u32 fra, u32 frc, bool rc) {FMUL(frd, fra, frc, rc, false);} void FMUL(u32 frd, u32 fra, u32 frc, bool rc) {FMUL(frd, fra, frc, rc, false);}
void FMUL(u32 frd, u32 fra, u32 frc, bool rc, bool single) void FMUL(u32 frd, u32 fra, u32 frc, bool rc, bool single)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double a = CPU.FPR[fra]; const double a = CPU.FPR[fra];
const double c = CPU.FPR[frc]; const double c = CPU.FPR[frc];
if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(c)) if(FPRdouble::IsSNaN(a) || FPRdouble::IsSNaN(c))
@ -4141,15 +4201,18 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
const double res = a * c; const double res = a * c;
if(single) CPU.FPR[frd] = (float)res; if(single) CPU.FPR[frd] = (float)res;
else CPU.FPR[frd] = res; else CPU.FPR[frd] = res;
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
if(rc) CPU.UpdateCR1(); if(rc) CPU.UpdateCR1();
} }
void FRSQRTE(u32 frd, u32 frb, bool rc) void FRSQRTE(u32 frd, u32 frb, bool rc)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
if(FPRdouble::IsSNaN(b)) if(FPRdouble::IsSNaN(b))
{ {
@ -4192,7 +4255,9 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
CPU.FPR[frd] = 1.0 / sqrt(b); CPU.FPR[frd] = 1.0 / sqrt(b);
CheckHostFPExceptions();
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
if(rc) CPU.UpdateCR1(); if(rc) CPU.UpdateCR1();
@ -4201,6 +4266,7 @@ private:
void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, false, false);} void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) {FMADD(frd, fra, frc, frb, rc, false, false, false);}
void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc, bool neg, bool sub, bool single) void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc, bool neg, bool sub, bool single)
{ {
SetHostRoundingMode(CPU.FPSCR.RN);
const double a = CPU.FPR[fra]; const double a = CPU.FPR[fra];
const double b = CPU.FPR[frb]; const double b = CPU.FPR[frb];
const double c = CPU.FPR[frc]; const double c = CPU.FPR[frc];
@ -4256,8 +4322,10 @@ private:
} }
else else
{ {
feclearexcept(FE_ALL_EXCEPT);
if(single) CPU.FPR[frd] = (float)(neg ? -res : res); if(single) CPU.FPR[frd] = (float)(neg ? -res : res);
else CPU.FPR[frd] = (neg ? -res : res); else CPU.FPR[frd] = (neg ? -res : res);
CheckHostFPExceptions();
} }
} }
CPU.FPSCR.FPRF = CPU.FPR[frd].GetType(); CPU.FPSCR.FPRF = CPU.FPR[frd].GetType();
@ -4344,6 +4412,7 @@ private:
switch(rn) switch(rn)
{ {
case FPSCR_RN_NEAR: case FPSCR_RN_NEAR:
SetHostRoundingMode(FPSCR_RN_NEAR);
i = (s64)nearbyint(b); i = (s64)nearbyint(b);
break; break;
case FPSCR_RN_ZERO: case FPSCR_RN_ZERO:

View File

@ -1,4 +1,5 @@
#pragma once #pragma once
#include "Emu/Cell/Common.h"
#include "Emu/Cell/PPCThread.h" #include "Emu/Cell/PPCThread.h"
#include "Emu/Memory/vm.h" #include "Emu/Memory/vm.h"
@ -57,14 +58,6 @@ enum FPSCR_EXP
FPSCR_VX_ALL = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI, FPSCR_VX_ALL = FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI,
}; };
enum FPSCR_RN
{
FPSCR_RN_NEAR = 0,
FPSCR_RN_ZERO = 1,
FPSCR_RN_PINF = 2,
FPSCR_RN_MINF = 3,
};
static const u64 DOUBLE_SIGN = 0x8000000000000000ULL; static const u64 DOUBLE_SIGN = 0x8000000000000000ULL;
static const u64 DOUBLE_EXP = 0x7FF0000000000000ULL; static const u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
static const u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL; static const u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;

View File

@ -1,5 +1,26 @@
#pragma once #pragma once
#include <fenv.h>
static void SetHostRoundingMode(u32 rn)
{
switch (rn)
{
case FPSCR_RN_NEAR:
fesetround(FE_TONEAREST);
break;
case FPSCR_RN_ZERO:
fesetround(FE_TOWARDZERO);
break;
case FPSCR_RN_PINF:
fesetround(FE_UPWARD);
break;
case FPSCR_RN_MINF:
fesetround(FE_DOWNWARD);
break;
}
}
#define UNIMPLEMENTED() UNK(__FUNCTION__) #define UNIMPLEMENTED() UNK(__FUNCTION__)
#define MEM_AND_REG_HASH() \ #define MEM_AND_REG_HASH() \
@ -11,6 +32,54 @@
#define LOG5_OPCODE(...) /// #define LOG5_OPCODE(...) ///
// Floating-point utility constants and functions
static const u32 FLOAT_MAX_NORMAL_I = 0x7F7FFFFF;
static const float& FLOAT_MAX_NORMAL = (float&)FLOAT_MAX_NORMAL_I;
static const u32 FLOAT_NAN_I = 0x7FC00000;
static const float& FLOAT_NAN = (float&)FLOAT_NAN_I;
static const u64 DOUBLE_NAN_I = 0x7FF8000000000000ULL;
static const double& DOUBLE_NAN = (double&)DOUBLE_NAN_I;
static inline bool issnan(double x) {return isnan(x) && ((s64&)x)<<12 > 0;}
static inline bool issnan(float x) {return isnan(x) && ((s32&)x)<<9 > 0;}
static inline int fexpf(float x) {return ((u32&)x >> 23) & 0xFF;}
static inline bool isextended(float x) {return fexpf(x) == 255;}
static inline float extended(bool sign, u32 mantissa) // returns -1^sign * 2^127 * (1.mantissa)
{
u32 bits = sign<<31 | 0x7F800000 | mantissa;
return (float&)bits;
}
static inline float ldexpf_extended(float x, int exp) // ldexpf() for extended values, assumes result is in range
{
u32 bits = (u32&)x;
if (bits << 1 != 0) bits += exp * 0x00800000;
return (float&)bits;
}
static inline bool isdenormal(float x)
{
const int fpc = _fpclass(x);
#ifdef __GNUG__
return fpc == FP_SUBNORMAL;
#else
return (fpc & (_FPCLASS_PD | _FPCLASS_ND)) != 0;
#endif
}
static inline bool isdenormal(double x)
{
const int fpc = _fpclass(x);
#ifdef __GNUG__
return fpc == FP_SUBNORMAL;
#else
return (fpc & (_FPCLASS_PD | _FPCLASS_ND)) != 0;
#endif
}
static double SilenceNaN(double x)
{
u64 bits = (u64&)x;
bits |= 0x0008000000000000ULL;
return (double&)bits;
}
class SPUInterpreter : public SPUOpcodes class SPUInterpreter : public SPUOpcodes
{ {
private: private:
@ -47,7 +116,7 @@ private:
} }
void MFSPR(u32 rt, u32 sa) void MFSPR(u32 rt, u32 sa)
{ {
UNIMPLEMENTED(); // not used CPU.GPR[rt].clear(); // All SPRs read as zero.
} }
void RDCH(u32 rt, u32 ra) void RDCH(u32 rt, u32 ra)
{ {
@ -243,7 +312,7 @@ private:
} }
void MTSPR(u32 rt, u32 sa) void MTSPR(u32 rt, u32 sa)
{ {
UNIMPLEMENTED(); // not used // SPR writes are ignored.
} }
void WRCH(u32 ra, u32 rt) void WRCH(u32 ra, u32 rt)
{ {
@ -431,16 +500,41 @@ private:
} }
void FREST(u32 rt, u32 ra) void FREST(u32 rt, u32 ra)
{ {
//CPU.GPR[rt]._m128 = _mm_rcp_ps(CPU.GPR[ra]._m128); SetHostRoundingMode(FPSCR_RN_ZERO);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
CPU.GPR[rt]._f[i] = 1 / CPU.GPR[ra]._f[i]; {
const float a = CPU.GPR[ra]._f[i];
float result;
if (fexpf(a) == 0)
{
CPU.FPSCR.setDivideByZeroFlag(i);
result = extended(std::signbit(a), 0x7FFFFF);
}
else if (isextended(a))
result = 0.0f;
else
result = 1 / a;
CPU.GPR[rt]._f[i] = result;
}
} }
void FRSQEST(u32 rt, u32 ra) void FRSQEST(u32 rt, u32 ra)
{ {
//const __u32x4 FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; SetHostRoundingMode(FPSCR_RN_ZERO);
//CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, FloatAbsMask.m128));
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
CPU.GPR[rt]._f[i] = 1 / sqrt(abs(CPU.GPR[ra]._f[i])); {
const float a = CPU.GPR[ra]._f[i];
float result;
if (fexpf(a) == 0)
{
CPU.FPSCR.setDivideByZeroFlag(i);
result = extended(0, 0x7FFFFF);
}
else if (isextended(a))
result = 0.5f / sqrtf(fabsf(ldexpf_extended(a, -2)));
else
result = 1 / sqrtf(fabsf(a));
CPU.GPR[rt]._f[i] = result;
}
} }
void LQX(u32 rt, u32 ra, u32 rb) void LQX(u32 rt, u32 ra, u32 rb)
{ {
@ -828,37 +922,186 @@ private:
} }
void FCGT(u32 rt, u32 ra, u32 rb) void FCGT(u32 rt, u32 ra, u32 rb)
{ {
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._f[0] > CPU.GPR[rb]._f[0] ? 0xffffffff : 0; for (int i = 0; i < 4; i++)
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._f[1] > CPU.GPR[rb]._f[1] ? 0xffffffff : 0; {
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._f[2] > CPU.GPR[rb]._f[2] ? 0xffffffff : 0; const u32 a = CPU.GPR[ra]._u32[i];
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._f[3] > CPU.GPR[rb]._f[3] ? 0xffffffff : 0; const u32 b = CPU.GPR[rb]._u32[i];
const u32 abs_a = a & 0x7FFFFFFF;
const u32 abs_b = b & 0x7FFFFFFF;
const bool a_zero = (abs_a < 0x00800000);
const bool b_zero = (abs_b < 0x00800000);
bool pass;
if (a_zero)
pass = b >= 0x80800000;
else if (b_zero)
pass = (s32)a >= 0x00800000;
else if (a >= 0x80000000)
pass = (b >= 0x80000000 && a < b);
else
pass = (b >= 0x80000000 || a > b);
CPU.GPR[rt]._u32[i] = pass ? 0xFFFFFFFF : 0;
}
} }
void DFCGT(u32 rt, u32 ra, u32 rb) void DFCGT(u32 rt, u32 ra, u32 rb)
{ {
UNIMPLEMENTED(); // cannot be used UNIMPLEMENTED(); // cannot be used
} }
void FA(u32 rt, u32 ra, u32 rb) void FA_FS(u32 rt, u32 ra, u32 rb, bool sub)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
{ {
CPU.GPR[rt]._f[w] = CPU.GPR[ra]._f[w] + CPU.GPR[rb]._f[w]; const float a = CPU.GPR[ra]._f[w];
//if (CPU.GPR[rt]._f[w] == -0.0f) CPU.GPR[rt]._f[w] = 0.0f; const float b = sub ? -CPU.GPR[rb]._f[w] : CPU.GPR[rb]._f[w];
} float result;
} if (isdenormal(a))
void FS(u32 rt, u32 ra, u32 rb) {
{ CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
for (int w = 0; w < 4; w++) if (b == 0.0f || isdenormal(b))
{ result = +0.0f;
CPU.GPR[rt]._f[w] = CPU.GPR[ra]._f[w] - CPU.GPR[rb]._f[w]; else
//if (CPU.GPR[rt]._f[w] == -0.0f) CPU.GPR[rt]._f[w] = 0.0f; result = b;
}
else if (isdenormal(b))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
if (a == 0.0f)
result = +0.0f;
else
result = a;
}
else if (isextended(a) || isextended(b))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
if (isextended(a) && fexpf(b) < 255-32)
{
if (std::signbit(a) != std::signbit(b))
{
const u32 bits = (u32&)a - 1;
result = (float&)bits;
}
else
result = a;
}
else if (isextended(b) && fexpf(a) < 255-32)
{
if (std::signbit(a) != std::signbit(b))
{
const u32 bits = (u32&)b - 1;
result = (float&)bits;
}
else
result = b;
}
else
{
feclearexcept(FE_ALL_EXCEPT);
result = ldexpf_extended(a, -1) + ldexpf_extended(b, -1);
result = ldexpf_extended(result, 1);
if (fetestexcept(FE_OVERFLOW))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(std::signbit(result), 0x7FFFFF);
}
}
}
else
{
result = a + b;
if (result == copysignf(FLOAT_MAX_NORMAL, result))
{
result = ldexpf_extended(ldexpf(a,-1) + ldexpf(b,-1), 1);
if (isextended(result))
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
}
else if (isdenormal(result))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SUNF | FPSCR_SDIFF);
result = +0.0f;
}
else if (result == 0.0f)
{
if (fabsf(a) != fabsf(b))
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SUNF | FPSCR_SDIFF);
result = +0.0f;
}
}
CPU.GPR[rt]._f[w] = result;
} }
} }
void FA(u32 rt, u32 ra, u32 rb) {FA_FS(rt, ra, rb, false);}
void FS(u32 rt, u32 ra, u32 rb) {FA_FS(rt, ra, rb, true);}
void FM(u32 rt, u32 ra, u32 rb) void FM(u32 rt, u32 ra, u32 rb)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
{ {
CPU.GPR[rt]._f[w] = CPU.GPR[ra]._f[w] * CPU.GPR[rb]._f[w]; const float a = CPU.GPR[ra]._f[w];
//if (CPU.GPR[rt]._f[w] == -0.0f) CPU.GPR[rt]._f[w] = 0.0f; const float b = CPU.GPR[rb]._f[w];
float result;
if (isdenormal(a) || isdenormal(b))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
result = +0.0f;
}
else if (isextended(a) || isextended(b))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
const bool sign = std::signbit(a) ^ std::signbit(b);
if (a == 0.0f || b == 0.0f)
{
result = +0.0f;
}
else if ((fexpf(a)-127) + (fexpf(b)-127) >= 129)
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(sign, 0x7FFFFF);
}
else
{
if (isextended(a))
result = ldexpf_extended(a, -1) * b;
else
result = a * ldexpf_extended(b, -1);
if (result == copysignf(FLOAT_MAX_NORMAL, result))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(sign, 0x7FFFFF);
}
else
result = ldexpf_extended(result, 1);
}
}
else
{
result = a * b;
if (result == copysignf(FLOAT_MAX_NORMAL, result))
{
feclearexcept(FE_ALL_EXCEPT);
if (fexpf(a) > fexpf(b))
result = ldexpf(a, -1) * b;
else
result = a * ldexpf(b, -1);
result = ldexpf_extended(result, 1);
if (isextended(result))
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
if (fetestexcept(FE_OVERFLOW))
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
}
else if (isdenormal(result))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SUNF | FPSCR_SDIFF);
result = +0.0f;
}
else if (result == 0.0f)
{
if (a != 0.0f & b != 0.0f)
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SUNF | FPSCR_SDIFF);
result = +0.0f;
}
}
CPU.GPR[rt]._f[w] = result;
} }
} }
void CLGTH(u32 rt, u32 ra, u32 rb) void CLGTH(u32 rt, u32 ra, u32 rb)
@ -873,30 +1116,84 @@ private:
} }
void FCMGT(u32 rt, u32 ra, u32 rb) void FCMGT(u32 rt, u32 ra, u32 rb)
{ {
CPU.GPR[rt]._u32[0] = fabs(CPU.GPR[ra]._f[0]) > fabs(CPU.GPR[rb]._f[0]) ? 0xffffffff : 0; for (int i = 0; i < 4; i++)
CPU.GPR[rt]._u32[1] = fabs(CPU.GPR[ra]._f[1]) > fabs(CPU.GPR[rb]._f[1]) ? 0xffffffff : 0; {
CPU.GPR[rt]._u32[2] = fabs(CPU.GPR[ra]._f[2]) > fabs(CPU.GPR[rb]._f[2]) ? 0xffffffff : 0; const u32 a = CPU.GPR[ra]._u32[i];
CPU.GPR[rt]._u32[3] = fabs(CPU.GPR[ra]._f[3]) > fabs(CPU.GPR[rb]._f[3]) ? 0xffffffff : 0; const u32 b = CPU.GPR[rb]._u32[i];
const u32 abs_a = a & 0x7FFFFFFF;
const u32 abs_b = b & 0x7FFFFFFF;
const bool a_zero = (abs_a < 0x00800000);
const bool b_zero = (abs_b < 0x00800000);
bool pass;
if (a_zero)
pass = false;
else if (b_zero)
pass = !a_zero;
else
pass = abs_a > abs_b;
CPU.GPR[rt]._u32[i] = pass ? 0xFFFFFFFF : 0;
}
} }
void DFCMGT(u32 rt, u32 ra, u32 rb) void DFCMGT(u32 rt, u32 ra, u32 rb)
{ {
UNIMPLEMENTED(); // cannot be used UNIMPLEMENTED(); // cannot be used
} }
void DFA(u32 rt, u32 ra, u32 rb) enum DoubleOp {DFASM_A, DFASM_S, DFASM_M};
void DFASM(u32 rt, u32 ra, u32 rb, DoubleOp op)
{ {
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] + CPU.GPR[rb]._d[0]; for (int i = 0; i < 2; i++)
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] + CPU.GPR[rb]._d[1]; {
} double a = CPU.GPR[ra]._d[i];
void DFS(u32 rt, u32 ra, u32 rb) double b = CPU.GPR[rb]._d[i];
{ if (isdenormal(a))
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] - CPU.GPR[rb]._d[0]; {
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] - CPU.GPR[rb]._d[1]; CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DDENORM);
} a = copysign(0.0, a);
void DFM(u32 rt, u32 ra, u32 rb) }
{ if (isdenormal(b))
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0]; {
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1]; CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DDENORM);
b = copysign(0.0, b);
}
double result;
if (isnan(a) || isnan(b))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DNAN);
if (issnan(a) || issnan(b))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
result = DOUBLE_NAN;
}
else
{
SetHostRoundingMode(CPU.FPSCR.checkSliceRounding(i));
feclearexcept(FE_ALL_EXCEPT);
switch (op)
{
case DFASM_A: result = a + b; break;
case DFASM_S: result = a - b; break;
case DFASM_M: result = a * b; break;
}
if (fetestexcept(FE_INVALID))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
result = DOUBLE_NAN;
}
else
{
if (fetestexcept(FE_OVERFLOW))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
if (fetestexcept(FE_UNDERFLOW))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
if (fetestexcept(FE_INEXACT))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
}
}
CPU.GPR[rt]._d[i] = result;
}
} }
void DFA(u32 rt, u32 ra, u32 rb) {DFASM(rt, ra, rb, DFASM_A);}
void DFS(u32 rt, u32 ra, u32 rb) {DFASM(rt, ra, rb, DFASM_S);}
void DFM(u32 rt, u32 ra, u32 rb) {DFASM(rt, ra, rb, DFASM_M);}
void CLGTB(u32 rt, u32 ra, u32 rb) void CLGTB(u32 rt, u32 ra, u32 rb)
{ {
for (int b = 0; b < 16; b++) for (int b = 0; b < 16; b++)
@ -910,26 +1207,64 @@ private:
CPU.Stop(); CPU.Stop();
} }
} }
void DFMA(u32 rt, u32 ra, u32 rb) void DFMA(u32 rt, u32 ra, u32 rb, bool neg, bool sub)
{ {
CPU.GPR[rt]._d[0] += CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0]; for (int i = 0; i < 2; i++)
CPU.GPR[rt]._d[1] += CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1]; {
} double a = CPU.GPR[ra]._d[i];
void DFMS(u32 rt, u32 ra, u32 rb) double b = CPU.GPR[rb]._d[i];
{ double c = CPU.GPR[rt]._d[i];
CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] - CPU.GPR[rt]._d[0]; if (isdenormal(a))
CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] - CPU.GPR[rt]._d[1]; {
} CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DDENORM);
void DFNMS(u32 rt, u32 ra, u32 rb) a = copysign(0.0, a);
{ }
CPU.GPR[rt]._d[0] -= CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0]; if (isdenormal(b))
CPU.GPR[rt]._d[1] -= CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1]; {
} CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DDENORM);
void DFNMA(u32 rt, u32 ra, u32 rb) b = copysign(0.0, b);
{ }
CPU.GPR[rt]._d[0] = -(CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] + CPU.GPR[rt]._d[0]); if (isdenormal(c))
CPU.GPR[rt]._d[1] = -(CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] + CPU.GPR[rt]._d[1]); {
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DDENORM);
c = copysign(0.0, c);
}
double result;
if (isnan(a) || isnan(b) || isnan(c))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DNAN);
if (issnan(a) || issnan(b) || issnan(c) || (isinf(a) && b == 0.0f) || (a == 0.0f && isinf(b)))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
result = DOUBLE_NAN;
}
else
{
SetHostRoundingMode(CPU.FPSCR.checkSliceRounding(i));
feclearexcept(FE_ALL_EXCEPT);
result = fma(a, b, sub ? -c : c);
if (fetestexcept(FE_INVALID))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
result = DOUBLE_NAN;
}
else
{
if (fetestexcept(FE_OVERFLOW))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
if (fetestexcept(FE_UNDERFLOW))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
if (fetestexcept(FE_INEXACT))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
if (neg) result = -result;
}
}
CPU.GPR[rt]._d[i] = result;
}
} }
void DFMA(u32 rt, u32 ra, u32 rb) {DFMA(rt, ra, rb, false, false);}
void DFMS(u32 rt, u32 ra, u32 rb) {DFMA(rt, ra, rb, false, true);}
void DFNMS(u32 rt, u32 ra, u32 rb) {DFMA(rt, ra, rb, true, true);}
void DFNMA(u32 rt, u32 ra, u32 rb) {DFMA(rt, ra, rb, true, false);}
void CEQ(u32 rt, u32 ra, u32 rb) void CEQ(u32 rt, u32 ra, u32 rb)
{ {
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
@ -981,29 +1316,67 @@ private:
void FSCRRD(u32 rt) void FSCRRD(u32 rt)
{ {
// TODO (rarely used) CPU.GPR[rt]._u32[3] = CPU.FPSCR._u32[3];
CPU.GPR[rt].clear(); CPU.GPR[rt]._u32[2] = CPU.FPSCR._u32[2];
CPU.GPR[rt]._u32[1] = CPU.FPSCR._u32[1];
CPU.GPR[rt]._u32[0] = CPU.FPSCR._u32[0];
} }
void FESD(u32 rt, u32 ra) void FESD(u32 rt, u32 ra)
{ {
CPU.GPR[rt]._d[0] = (double)CPU.GPR[ra]._f[1]; for (int i = 0; i < 2; i++)
CPU.GPR[rt]._d[1] = (double)CPU.GPR[ra]._f[3]; {
const float a = CPU.GPR[ra]._f[i*2+1];
if (isnan(a))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DNAN);
if (issnan(a))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
CPU.GPR[rt]._d[i] = DOUBLE_NAN;
}
else if (isdenormal(a))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DDENORM);
CPU.GPR[rt]._d[i] = 0.0;
}
else
{
CPU.GPR[rt]._d[i] = (double)a;
}
}
} }
void FRDS(u32 rt, u32 ra) void FRDS(u32 rt, u32 ra)
{ {
CPU.GPR[rt]._f[1] = (float)CPU.GPR[ra]._d[0]; for (int i = 0; i < 2; i++)
CPU.GPR[rt]._u32[0] = 0x00000000; {
CPU.GPR[rt]._f[3] = (float)CPU.GPR[ra]._d[1]; SetHostRoundingMode(CPU.FPSCR.checkSliceRounding(i));
CPU.GPR[rt]._u32[2] = 0x00000000; const double a = CPU.GPR[ra]._d[i];
if (isnan(a))
{
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DNAN);
if (issnan(a))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
CPU.GPR[rt]._f[i*2+1] = FLOAT_NAN;
}
else
{
feclearexcept(FE_ALL_EXCEPT);
CPU.GPR[rt]._f[i*2+1] = (float)a;
if (fetestexcept(FE_OVERFLOW))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
if (fetestexcept(FE_UNDERFLOW))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
if (fetestexcept(FE_INEXACT))
CPU.FPSCR.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
}
CPU.GPR[rt]._u32[i*2] = 0;
}
} }
void FSCRWR(u32 rt, u32 ra) void FSCRWR(u32 rt, u32 ra)
{ {
// TODO (rarely used) CPU.FPSCR._u32[3] = CPU.GPR[ra]._u32[3] & 0x00000F07;
if (CPU.GPR[ra]._u64[0] || CPU.GPR[ra]._u64[1]) CPU.FPSCR._u32[2] = CPU.GPR[ra]._u32[2] & 0x00003F07;
{ CPU.FPSCR._u32[1] = CPU.GPR[ra]._u32[1] & 0x00003F07;
LOG_ERROR(SPU, "FSCRWR(%d,%d): value = %s", rt, ra, CPU.GPR[ra].to_hex().c_str()); CPU.FPSCR._u32[0] = CPU.GPR[ra]._u32[0] & 0x00000F07;
UNIMPLEMENTED();
}
} }
void DFTSV(u32 rt, u32 ra, s32 i7) void DFTSV(u32 rt, u32 ra, s32 i7)
{ {
@ -1011,10 +1384,17 @@ private:
} }
void FCEQ(u32 rt, u32 ra, u32 rb) void FCEQ(u32 rt, u32 ra, u32 rb)
{ {
CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._f[0] == CPU.GPR[rb]._f[0] ? 0xffffffff : 0; for (int i = 0; i < 4; i++)
CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._f[1] == CPU.GPR[rb]._f[1] ? 0xffffffff : 0; {
CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._f[2] == CPU.GPR[rb]._f[2] ? 0xffffffff : 0; const u32 a = CPU.GPR[ra]._u32[i];
CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._f[3] == CPU.GPR[rb]._f[3] ? 0xffffffff : 0; const u32 b = CPU.GPR[rb]._u32[i];
const u32 abs_a = a & 0x7FFFFFFF;
const u32 abs_b = b & 0x7FFFFFFF;
const bool a_zero = (abs_a < 0x00800000);
const bool b_zero = (abs_b < 0x00800000);
const bool pass = a == b || (a_zero && b_zero);
CPU.GPR[rt]._u32[i] = pass ? 0xFFFFFFFF : 0;
}
} }
void DFCEQ(u32 rt, u32 ra, u32 rb) void DFCEQ(u32 rt, u32 ra, u32 rb)
{ {
@ -1047,10 +1427,17 @@ private:
} }
void FCMEQ(u32 rt, u32 ra, u32 rb) void FCMEQ(u32 rt, u32 ra, u32 rb)
{ {
CPU.GPR[rt]._u32[0] = fabs(CPU.GPR[ra]._f[0]) == fabs(CPU.GPR[rb]._f[0]) ? 0xffffffff : 0; for (int i = 0; i < 4; i++)
CPU.GPR[rt]._u32[1] = fabs(CPU.GPR[ra]._f[1]) == fabs(CPU.GPR[rb]._f[1]) ? 0xffffffff : 0; {
CPU.GPR[rt]._u32[2] = fabs(CPU.GPR[ra]._f[2]) == fabs(CPU.GPR[rb]._f[2]) ? 0xffffffff : 0; const u32 a = CPU.GPR[ra]._u32[i];
CPU.GPR[rt]._u32[3] = fabs(CPU.GPR[ra]._f[3]) == fabs(CPU.GPR[rb]._f[3]) ? 0xffffffff : 0; const u32 b = CPU.GPR[rb]._u32[i];
const u32 abs_a = a & 0x7FFFFFFF;
const u32 abs_b = b & 0x7FFFFFFF;
const bool a_zero = (abs_a < 0x00800000);
const bool b_zero = (abs_b < 0x00800000);
const bool pass = abs_a == abs_b || (a_zero && b_zero);
CPU.GPR[rt]._u32[i] = pass ? 0xFFFFFFFF : 0;
}
} }
void DFCMEQ(u32 rt, u32 ra, u32 rb) void DFCMEQ(u32 rt, u32 ra, u32 rb)
{ {
@ -1084,53 +1471,54 @@ private:
//0 - 9 //0 - 9
void CFLTS(u32 rt, u32 ra, s32 i8) void CFLTS(u32 rt, u32 ra, s32 i8)
{ {
const u32 scale = 173 - (i8 & 0xff); //unsigned immediate const int scale = 173 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale; const float a = CPU.GPR[ra]._f[i];
float scaled;
if (exp > 255) if ((fexpf(a)-127) + scale >= 32)
exp = 255; scaled = copysignf(4294967296.0f, a);
CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23);
if (CPU.GPR[rt]._f[i] > 0x7fffffff)
CPU.GPR[rt]._u32[i] = 0x7fffffff;
else if (CPU.GPR[rt]._f[i] < -pow(2, 31))
CPU.GPR[rt]._u32[i] = 0x80000000;
else else
CPU.GPR[rt]._s32[i] = (s32)CPU.GPR[rt]._f[i]; //trunc scaled = ldexpf(a, scale);
s32 result;
if (scaled >= 2147483648.0f)
result = 0x7FFFFFFF;
else if (scaled < -2147483648.0f)
result = 0x80000000;
else
result = (s32)scaled;
CPU.GPR[rt]._s32[i] = result;
} }
} }
void CFLTU(u32 rt, u32 ra, s32 i8) void CFLTU(u32 rt, u32 ra, s32 i8)
{ {
const u32 scale = 173 - (i8 & 0xff); //unsigned immediate const int scale = 173 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale; const float a = CPU.GPR[ra]._f[i];
float scaled;
if (exp > 255) if ((fexpf(a)-127) + scale >= 32)
exp = 255; scaled = copysignf(4294967296.0f, a);
if (CPU.GPR[ra]._u32[i] & 0x80000000) //if negative, result = 0
CPU.GPR[rt]._u32[i] = 0;
else else
{ scaled = ldexpf(a, scale);
CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23); u32 result;
if (scaled >= 4294967296.0f)
if (CPU.GPR[rt]._f[i] > 0xffffffff) //if big, result = max result = 0xFFFFFFFF;
CPU.GPR[rt]._u32[i] = 0xffffffff; else if (scaled < 0.0f)
else result = 0;
CPU.GPR[rt]._u32[i] = (u32)floor(CPU.GPR[rt]._f[i]); else
} result = (u32)scaled;
CPU.GPR[rt]._u32[i] = result;
} }
} }
void CSFLT(u32 rt, u32 ra, s32 i8) void CSFLT(u32 rt, u32 ra, s32 i8)
{ {
const u32 scale = 155 - (i8 & 0xff); //unsigned immediate SetHostRoundingMode(FPSCR_RN_ZERO);
const int scale = 155 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
CPU.GPR[rt]._f[i] = (float)CPU.GPR[ra]._s32[i]; const s32 a = CPU.GPR[ra]._s32[i];
CPU.GPR[rt]._f[i] = (float)a;
u32 exp = ((CPU.GPR[rt]._u32[i] >> 23) & 0xff) - scale; u32 exp = ((CPU.GPR[rt]._u32[i] >> 23) & 0xff) - scale;
@ -1138,14 +1526,21 @@ private:
exp = 0; exp = 0;
CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23); CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23);
if (isdenormal(CPU.GPR[rt]._f[i]) || (CPU.GPR[rt]._f[i] == 0.0f && a != 0))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(i, FPSCR_SUNF | FPSCR_SDIFF);
CPU.GPR[rt]._f[i] = 0.0f;
}
} }
} }
void CUFLT(u32 rt, u32 ra, s32 i8) void CUFLT(u32 rt, u32 ra, s32 i8)
{ {
const u32 scale = 155 - (i8 & 0xff); //unsigned immediate SetHostRoundingMode(FPSCR_RN_ZERO);
const int scale = 155 - (i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
CPU.GPR[rt]._f[i] = (float)CPU.GPR[ra]._u32[i]; const u32 a = CPU.GPR[ra]._u32[i];
CPU.GPR[rt]._f[i] = (float)a;
u32 exp = ((CPU.GPR[rt]._u32[i] >> 23) & 0xff) - scale; u32 exp = ((CPU.GPR[rt]._u32[i] >> 23) & 0xff) - scale;
@ -1153,6 +1548,11 @@ private:
exp = 0; exp = 0;
CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23); CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23);
if (isdenormal(CPU.GPR[rt]._f[i]) || (CPU.GPR[rt]._f[i] == 0.0f && a != 0))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(i, FPSCR_SUNF | FPSCR_SDIFF);
CPU.GPR[rt]._f[i] = 0.0f;
}
} }
} }
@ -1524,28 +1924,151 @@ private:
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
CPU.GPR[rt]._s32[w] = CPU.GPR[ra]._s16[w*2] * CPU.GPR[rb]._s16[w*2] + CPU.GPR[rc]._s32[w]; CPU.GPR[rt]._s32[w] = CPU.GPR[ra]._s16[w*2] * CPU.GPR[rb]._s16[w*2] + CPU.GPR[rc]._s32[w];
} }
void FNMS(u32 rt, u32 ra, u32 rb, u32 rc) void FNMS(u32 rt, u32 ra, u32 rb, u32 rc) {FMA(rt, ra, rb, rc, true, true);}
void FMA(u32 rt, u32 ra, u32 rb, u32 rc) {FMA(rt, ra, rb, rc, false, false);}
void FMS(u32 rt, u32 ra, u32 rb, u32 rc) {FMA(rt, ra, rb, rc, false, true);}
void FMA(u32 rt, u32 ra, u32 rb, u32 rc, bool neg, bool sub)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
{ {
CPU.GPR[rt]._f[w] = CPU.GPR[rc]._f[w] - CPU.GPR[ra]._f[w] * CPU.GPR[rb]._f[w]; float a = CPU.GPR[ra]._f[w];
//if (CPU.GPR[rt]._f[w] == -0.0f) CPU.GPR[rt]._f[w] = 0.0f; float b = neg ? -CPU.GPR[rb]._f[w] : CPU.GPR[rb]._f[w];
} float c = (neg != sub) ? -CPU.GPR[rc]._f[w] : CPU.GPR[rc]._f[w];
} if (isdenormal(a))
void FMA(u32 rt, u32 ra, u32 rb, u32 rc) {
{ CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
for (int w = 0; w < 4; w++) a = 0.0f;
{ }
CPU.GPR[rt]._f[w] = CPU.GPR[rc]._f[w] + CPU.GPR[ra]._f[w] * CPU.GPR[rb]._f[w]; if (isdenormal(b))
//if (CPU.GPR[rt]._f[w] == -0.0f) CPU.GPR[rt]._f[w] = 0.0f; {
} CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
} b = 0.0f;
void FMS(u32 rt, u32 ra, u32 rb, u32 rc) }
{ if (isdenormal(c))
for (int w = 0; w < 4; w++) {
{ CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
CPU.GPR[rt]._f[w] = CPU.GPR[ra]._f[w] * CPU.GPR[rb]._f[w] - CPU.GPR[rc]._f[w]; c = 0.0f;
//if (CPU.GPR[rt]._f[w] == -0.0f) CPU.GPR[rt]._f[w] = 0.0f; }
const bool sign = std::signbit(a) ^ std::signbit(b);
float result;
if (isextended(a) || isextended(b))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
if (a == 0.0f || b == 0.0f)
{
result = c;
}
else if ((fexpf(a)-127) + (fexpf(b)-127) >= 130)
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(sign, 0x7FFFFF);
}
else
{
float new_a, new_b;
if (isextended(a))
{
new_a = ldexpf_extended(a, -2);
new_b = b;
}
else
{
new_a = a;
new_b = ldexpf_extended(b, -2);
}
if (fexpf(c) < 3)
{
result = new_a * new_b;
if (c != 0.0f && std::signbit(c) != sign)
{
u32 bits = (u32&)result - 1;
result = (float&)bits;
}
}
else
{
result = fmaf(new_a, new_b, ldexpf_extended(c, -2));
}
if (fabsf(result) >= ldexpf(1.0f, 127))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(sign, 0x7FFFFF);
}
else
{
result = ldexpf_extended(result, 2);
}
}
}
else if (isextended(c))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
if (a == 0.0f || b == 0.0f)
{
result = c;
}
else if ((fexpf(a)-127) + (fexpf(b)-127) < 96)
{
result = c;
if (sign != std::signbit(c))
{
u32 bits = (u32&)result - 1;
result = (float&)bits;
}
}
else
{
result = fmaf(ldexpf(a,-1), ldexpf(b,-1), ldexpf_extended(c,-2));
if (fabsf(result) >= ldexpf(1.0f, 127))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(sign, 0x7FFFFF);
}
else
{
result = ldexpf_extended(result, 2);
}
}
}
else
{
feclearexcept(FE_ALL_EXCEPT);
result = fmaf(a, b, c);
if (fetestexcept(FE_OVERFLOW))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SDIFF);
if (fexpf(a) > fexpf(b))
result = fmaf(ldexpf(a,-2), b, ldexpf(c,-2));
else
result = fmaf(a, ldexpf(b,-2), ldexpf(c,-2));
if (fabsf(result) >= ldexpf(1.0f, 127))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SOVF);
result = extended(sign, 0x7FFFFF);
}
else
{
result = ldexpf_extended(result, 2);
}
}
else if (fetestexcept(FE_UNDERFLOW))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SUNF | FPSCR_SDIFF);
}
}
if (isdenormal(result))
{
CPU.FPSCR.setSinglePrecisionExceptionFlags(w, FPSCR_SUNF | FPSCR_SDIFF);
result = 0.0f;
}
else if (result == 0.0f)
{
result = +0.0f;
}
CPU.GPR[rt]._f[w] = result;
} }
} }

View File

@ -558,6 +558,8 @@ u32 SPUThread::GetChannelCount(u32 ch)
switch (ch) switch (ch)
{ {
case SPU_WrSRR0: res = 1; break;
case SPU_RdSRR0: res = 1; break;
case SPU_WrOutMbox: res = SPU.Out_MBox.GetFreeCount(); break; case SPU_WrOutMbox: res = SPU.Out_MBox.GetFreeCount(); break;
case SPU_WrOutIntrMbox: res = SPU.Out_IntrMBox.GetFreeCount(); break; case SPU_WrOutIntrMbox: res = SPU.Out_IntrMBox.GetFreeCount(); break;
case SPU_RdInMbox: res = SPU.In_MBox.GetCount(); break; case SPU_RdInMbox: res = SPU.In_MBox.GetCount(); break;
@ -589,6 +591,9 @@ void SPUThread::WriteChannel(u32 ch, const u128& r)
switch (ch) switch (ch)
{ {
case SPU_WrSRR0:
SRR0 = v & 0x3FFFC; //LSLR & ~3
break;
case SPU_WrOutIntrMbox: case SPU_WrOutIntrMbox:
{ {
if (!group) // if RawSPU if (!group) // if RawSPU
@ -910,6 +915,9 @@ void SPUThread::ReadChannel(u128& r, u32 ch)
switch (ch) switch (ch)
{ {
case SPU_RdSRR0:
v = SRR0;
break;
case SPU_RdInMbox: case SPU_RdInMbox:
{ {
while (!SPU.In_MBox.Pop(v) && !Emu.IsStopped()) while (!SPU.In_MBox.Pop(v) && !Emu.IsStopped())
@ -1223,4 +1231,4 @@ spu_thread::spu_thread(u32 entry, const std::string& name, u32 stack_size, u32 p
thread->SetPrio(prio ? prio : Emu.GetInfo().GetProcParam().primary_prio); thread->SetPrio(prio ? prio : Emu.GetInfo().GetProcParam().primary_prio);
argc = 0; argc = 0;
} }

View File

@ -1,4 +1,5 @@
#pragma once #pragma once
#include "Emu/Cell/Common.h"
#include "Emu/Memory/atomic_type.h" #include "Emu/Memory/atomic_type.h"
#include "PPCThread.h" #include "PPCThread.h"
#include "Emu/SysCalls/lv2/sleep_queue_type.h" #include "Emu/SysCalls/lv2/sleep_queue_type.h"
@ -167,45 +168,46 @@ struct g_imm_table_struct
extern const g_imm_table_struct g_imm_table; extern const g_imm_table_struct g_imm_table;
//Floating point status and control register. Unsure if this is one of the GPRs or SPRs enum FPSCR_EX
{
//Single-precision exceptions
FPSCR_SOVF = 1 << 2, //Overflow
FPSCR_SUNF = 1 << 1, //Underflow
FPSCR_SDIFF = 1 << 0, //Different (could be IEEE non-compliant)
//Double-precision exceptions
FPSCR_DOVF = 1 << 13, //Overflow
FPSCR_DUNF = 1 << 12, //Underflow
FPSCR_DINX = 1 << 11, //Inexact
FPSCR_DINV = 1 << 10, //Invalid operation
FPSCR_DNAN = 1 << 9, //NaN
FPSCR_DDENORM = 1 << 8, //Denormal
};
//Is 128 bits, but bits 0-19, 24-28, 32-49, 56-60, 64-81, 88-92, 96-115, 120-124 are unused //Is 128 bits, but bits 0-19, 24-28, 32-49, 56-60, 64-81, 88-92, 96-115, 120-124 are unused
class FPSCR class SPU_FPSCR
{ {
public: public:
u64 low; u32 _u32[4];
u64 hi;
FPSCR() {} SPU_FPSCR() {}
std::string ToString() const std::string ToString() const
{ {
return "FPSCR writer not yet implemented"; //fmt::Format("%08x%08x%08x%08x", _u32[3], _u32[2], _u32[1], _u32[0]); return fmt::Format("%08x%08x%08x%08x", _u32[3], _u32[2], _u32[1], _u32[0]);
} }
void Reset() void Reset()
{ {
memset(this, 0, sizeof(*this)); memset(this, 0, sizeof(*this));
} }
//slice -> 0 - 1 (4 slices total, only two have rounding) //slice -> 0 - 1 (double-precision slice index)
//0 -> round even //NOTE: slices follow u128 indexing, i.e. slice 0 is RIGHT end of register!
//1 -> round towards zero (truncate) //roundTo -> FPSCR_RN_*
//2 -> round towards positive inf
//3 -> round towards neg inf
void setSliceRounding(u8 slice, u8 roundTo) void setSliceRounding(u8 slice, u8 roundTo)
{ {
u64 mask = roundTo; int shift = 8 + 2*slice;
switch(slice) //rounding is located in the left end of the FPSCR
{ this->_u32[3] = (this->_u32[3] & ~(3 << shift)) | (roundTo << shift);
case 0:
mask = mask << 20;
break;
case 1:
mask = mask << 22;
break;
}
//rounding is located in the low end of the FPSCR
this->low = this->low & mask;
} }
//Slice 0 or 1 //Slice 0 or 1
u8 checkSliceRounding(u8 slice) const u8 checkSliceRounding(u8 slice) const
@ -213,10 +215,10 @@ public:
switch(slice) switch(slice)
{ {
case 0: case 0:
return this->low >> 20 & 0x3; return this->_u32[3] >> 8 & 0x3;
case 1: case 1:
return this->low >> 22 & 0x3; return this->_u32[3] >> 10 & 0x3;
default: default:
throw fmt::Format("Unexpected slice value in FPSCR::checkSliceRounding(): %d", slice); throw fmt::Format("Unexpected slice value in FPSCR::checkSliceRounding(): %d", slice);
@ -224,34 +226,28 @@ public:
} }
} }
//Single Precision Exception Flags (all 3 slices) //Single-precision exception flags (all 4 slices)
//slice -> slice number (0-3) //slice -> slice number (0-3)
//exception: 1 -> Overflow 2 -> Underflow 4-> Diff (could be IE^3 non compliant) //exception: FPSCR_S* bitmask
void setSinglePrecisionExceptionFlags(u8 slice, u8 exception) void setSinglePrecisionExceptionFlags(u8 slice, u32 exceptions)
{ {
u64 mask = exception; _u32[slice] |= exceptions;
switch(slice) }
{
case 0: //Single-precision divide-by-zero flags (all 4 slices)
mask = mask << 29; //slice -> slice number (0-3)
this->low = this->low & mask; void setDivideByZeroFlag(u8 slice)
break; {
case 1: _u32[0] |= 1 << (8 + slice);
mask = mask << 61; }
this->low = this->low & mask;
break; //Double-precision exception flags
case 2: //slice -> slice number (0-1)
mask = mask << 29; //exception: FPSCR_D* bitmask
this->hi = this->hi & mask; void setDoublePrecisionExceptionFlags(u8 slice, u32 exceptions)
break; {
case 3: _u32[1+slice] |= exceptions;
mask = mask << 61;
this->hi = this->hi & mask;
break;
}
} }
}; };
union SPU_SNRConfig_hdr union SPU_SNRConfig_hdr
@ -277,7 +273,8 @@ class SPUThread : public PPCThread
{ {
public: public:
u128 GPR[128]; // General-Purpose Registers u128 GPR[128]; // General-Purpose Registers
//FPSCR FPSCR; SPU_FPSCR FPSCR;
u32 SRR0;
SPU_SNRConfig_hdr cfg; // Signal Notification Registers Configuration (OR-mode enabled: 0x1 for SNR1, 0x2 for SNR2) SPU_SNRConfig_hdr cfg; // Signal Notification Registers Configuration (OR-mode enabled: 0x1 for SNR1, 0x2 for SNR2)
u64 R_ADDR; // reservation address u64 R_ADDR; // reservation address
@ -630,4 +627,4 @@ public:
return *this; return *this;
} }
}; };

View File

@ -350,7 +350,7 @@ std::string GLFragmentDecompilerThread::BuildCode()
p += param.Format(); p += param.Format();
} }
return std::string("#version 330\n" return std::string("#version 420\n"
"\n" "\n"
+ p + "\n" + p + "\n"
"void main()\n{\n" + main + "}\n"); "void main()\n{\n" + main + "}\n");

View File

@ -734,7 +734,7 @@ void DrawCursorObj::Draw()
void DrawCursorObj::InitializeShaders() void DrawCursorObj::InitializeShaders()
{ {
m_vp.shader = m_vp.shader =
"#version 330\n" "#version 420\n"
"\n" "\n"
"uniform vec4 in_pos;\n" "uniform vec4 in_pos;\n"
"uniform vec2 in_tc;\n" "uniform vec2 in_tc;\n"
@ -747,10 +747,10 @@ void DrawCursorObj::InitializeShaders()
"}\n"; "}\n";
m_fp.shader = m_fp.shader =
"#version 330\n" "#version 420\n"
"\n" "\n"
"in vec2 tc;\n" "in vec2 tc;\n"
"uniform sampler2D tex0;\n" "layout (binding = 0) uniform sampler2D tex0;\n"
"layout (location = 0) out vec4 res;\n" "layout (location = 0) out vec4 res;\n"
"\n" "\n"
"void main()\n" "void main()\n"

View File

@ -498,7 +498,7 @@ std::string GLVertexDecompilerThread::BuildCode()
} }
static const std::string& prot = static const std::string& prot =
"#version 330\n" "#version 420\n"
"\n" "\n"
"uniform mat4 scaleOffsetMat = mat4(1.0);\n" "uniform mat4 scaleOffsetMat = mat4(1.0);\n"
"%s\n" "%s\n"