From 260d5b7aa78e384dfb7d723eb52cae12e1b8da1b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 1 Feb 2018 17:36:30 +1000 Subject: [PATCH] BPMemory: Handle fog configuration where both A and C are infinity/NaN The console appears to behave against standard IEEE754 specification here, in particular around how NaNs are handled. NaNs appear to have no effect on the result, and are treated the same as positive or negative infinity, based on the sign bit. However, when the result would be NaN (inf - inf, or (-inf) - (-inf)), this results in a completely fogged color, or unfogged color respectively. We handle this by returning a constant zero for the A varaible, and positive or negative infinity for C depending on the sign bits of the A and C registers. This ensures that no NaN value is passed to the GPU in the first place, and that the result of the fog calculation cannot be NaN. --- Source/Core/VideoBackends/Software/Tev.cpp | 6 +-- Source/Core/VideoCommon/BPMemory.cpp | 44 ++++++++++++++++--- Source/Core/VideoCommon/BPMemory.h | 21 +++++---- .../Core/VideoCommon/PixelShaderManager.cpp | 4 +- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 836299e715..572da58f9c 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -751,14 +751,14 @@ void Tev::Draw() // ze = A/(B - (Zs >> B_SHF)) const s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift); // in addition downscale magnitude and zs to 0.24 bits - ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom; + ze = (bpmem.fog.GetA() * 16777215.0f) / static_cast(denom); } else { // orthographic // ze = a*Zs // in addition downscale zs to 0.24 bits - ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f); + ze = bpmem.fog.GetA() * (static_cast(Position[2]) / 16777215.0f); } if (bpmem.fogRange.Base.Enabled) @@ -796,7 +796,7 @@ void Tev::Draw() // GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b } - ze -= bpmem.fog.c_proj_fsel.GetC(); + ze -= bpmem.fog.GetC(); // clamp 0 to 1 float fog = (ze < 0.0f) ? 0.0f : ((ze > 1.0f) ? 1.0f : ze); diff --git a/Source/Core/VideoCommon/BPMemory.cpp b/Source/Core/VideoCommon/BPMemory.cpp index 8c0a2ec43a..fe6e62ed6e 100644 --- a/Source/Core/VideoCommon/BPMemory.cpp +++ b/Source/Core/VideoCommon/BPMemory.cpp @@ -23,21 +23,55 @@ bool BlendMode::UseLogicOp() const return true; } -float FogParam0::GetA() const +bool FogParams::IsNaNCase() const { + // Check for the case where both a and c are infinity or NaN. + // On hardware, this results in the following colors: + // + // ------------------------------------------------------- + // | A | C | Result | A | C | Result | + // ------------------------------------------------------- + // | inf | inf | Fogged | inf | nan | Fogged | + // | inf | -inf | Unfogged | inf | -nan | Unfogged | + // | -inf | inf | Unfogged | -inf | nan | Unfogged | + // | -inf | -inf | Unfogged | -inf | -nan | Unfogged | + // ------------------------------------------------------- + // | nan | inf | Fogged | nan | nan | Fogged | + // | nan | -inf | Unfogged | nan | -nan | Unfogged | + // | -nan | inf | Unfogged | -nan | nan | Unfogged | + // | -nan | -inf | Unfogged | -nan | -nan | Unfogged | + // ------------------------------------------------------- + // + // We replicate this by returning A=0, and C=inf for the inf/inf case, otherwise -inf. + // This ensures we do not pass a NaN to the GPU, and -inf/inf clamp to 0/1 respectively. + return a.exp == 255 && c_proj_fsel.c_exp == 255; +} + +float FogParams::GetA() const +{ + if (IsNaNCase()) + return 0.0f; + // scale mantissa from 11 to 23 bits - const u32 integral = (static_cast(sign) << 31) | (static_cast(exponent) << 23) | - (static_cast(mantissa) << 12); + const u32 integral = (static_cast(a.sign) << 31) | (static_cast(a.exp) << 23) | + (static_cast(a.mant) << 12); float real; std::memcpy(&real, &integral, sizeof(u32)); return real; } -float FogParam3::GetC() const +float FogParams::GetC() const { + if (IsNaNCase()) + { + constexpr float inf = std::numeric_limits::infinity(); + return !a.sign && !c_proj_fsel.c_sign ? -inf : inf; + } + // scale mantissa from 11 to 23 bits - const u32 integral = (c_sign.Value() << 31) | (c_exp.Value() << 23) | (c_mant.Value() << 12); + const u32 integral = (c_proj_fsel.c_sign.Value() << 31) | (c_proj_fsel.c_exp.Value() << 23) | + (c_proj_fsel.c_mant.Value() << 12); float real; std::memcpy(&real, &integral, sizeof(u32)); diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index fce322c4fd..0157b5aed3 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -654,14 +654,9 @@ union BlendMode union FogParam0 { - struct - { - u32 mantissa : 11; - u32 exponent : 8; - u32 sign : 1; - }; - - float GetA() const; + BitField<0, 11, u32> mant; + BitField<11, 8, u32> exp; + BitField<19, 1, u32> sign; u32 hex; }; @@ -675,9 +670,6 @@ union FogParam3 BitField<21, 3, u32> fsel; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 - // backward exp, 7 - backward exp2 - // amount to subtract from eyespacez after range adjustment - float GetC() const; - u32 hex; }; @@ -721,6 +713,13 @@ struct FogParams }; FogColor color; // 0:b 8:g 16:r - nice! + + // Special case where a and c are infinite and the sign matches, resulting in a result of NaN. + bool IsNaNCase() const; + float GetA() const; + + // amount to subtract from eyespacez after range adjustment + float GetC() const; }; union ZMode diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 8ada00ef3b..783f5e179c 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -409,9 +409,9 @@ void PixelShaderManager::SetFogParamChanged() { if (!g_ActiveConfig.bDisableFog) { - constants.fogf[1][0] = bpmem.fog.a.GetA(); + constants.fogf[1][0] = bpmem.fog.GetA(); constants.fogi[1] = bpmem.fog.b_magnitude; - constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC(); + constants.fogf[1][2] = bpmem.fog.GetC(); constants.fogi[3] = bpmem.fog.b_shift; constants.fogParam3 = bpmem.fog.c_proj_fsel.hex; }