diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index 55bb4cae02..5254ba7bd7 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -144,6 +144,11 @@ #define TEVALPHAARG_KONST 6 #define TEVALPHAARG_ZERO 7 +#define GX_TEVPREV 0 +#define GX_TEVREG0 1 +#define GX_TEVREG1 2 +#define GX_TEVREG2 3 + #define ALPHACMP_NEVER 0 #define ALPHACMP_LESS 1 #define ALPHACMP_EQUAL 2 @@ -371,7 +376,7 @@ struct TevStageCombiner union TwoTevStageOrders { - struct + struct { u32 texmap0 : 3; // indirect tex stage texmap u32 texcoord0 : 3; diff --git a/Source/Core/VideoCommon/Src/Debugger.cpp b/Source/Core/VideoCommon/Src/Debugger.cpp index 32b7343a1e..85a9ba61f6 100644 --- a/Source/Core/VideoCommon/Src/Debugger.cpp +++ b/Source/Core/VideoCommon/Src/Debugger.cpp @@ -89,21 +89,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path) if (!useDstAlpha) { output = "Destination alpha disabled:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } else { if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend) { output = "Using dual source blending for destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } else { output = "Using two passes for emulating destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); output += "\n\nDestination alpha pass shader:\n"; - output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } } @@ -117,7 +117,7 @@ void GFXDebuggerBase::DumpVertexShader(const char* path) sprintf(filename, "%sdump_vs.txt", path); File::CreateEmptyFile(filename); - File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename); +/// File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename); } void GFXDebuggerBase::DumpPixelShaderConstants(const char* path) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp index 4be33e9eac..58da3612f5 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp @@ -5,217 +5,3 @@ #include "LightingShaderGen.h" #include "NativeVertexFormat.h" #include "XFMemory.h" - -#define WRITE p+=sprintf - -int GetLightingShaderId(u32* out) -{ - for (u32 i = 0; i < xfregs.numChan.numColorChans; ++i) - { - out[i] = xfregs.color[i].enablelighting ? - (u32)xfregs.color[i].hex : - (u32)xfregs.color[i].matsource; - out[i] |= (xfregs.alpha[i].enablelighting ? - (u32)xfregs.alpha[i].hex : - (u32)xfregs.alpha[i].matsource) << 15; - } - _assert_(xfregs.numChan.numColorChans <= 2); - return xfregs.numChan.numColorChans; -} - -// coloralpha - 1 if color, 2 if alpha -char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha) -{ - const char* swizzle = "xyzw"; - - if (coloralpha == 1 ) - swizzle = "xyz"; - else if (coloralpha == 2 ) - swizzle = "w"; - - if (!(chan.attnfunc & 1)) - { - // attenuation disabled - switch (chan.diffusefunc) - { - case LIGHTDIF_NONE: - WRITE(p, "lacc.%s += %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - WRITE(p, "ldir = normalize(%s[%d + 3].xyz - pos.xyz);\n", lightsName, index * 5); - WRITE(p, "lacc.%s += %sdot(ldir, _norm0)) * %s[%d].%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index * 5, swizzle); - break; - default: _assert_(0); - } - } - else // spec and spot - { - if (chan.attnfunc == 3) - { // spot - WRITE(p, "ldir = %s[%d + 3].xyz - pos.xyz;\n", lightsName, index * 5); - WRITE(p, "dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s[%d + 4].xyz));\n", lightsName, index * 5); - WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1.0f, attn, attn*attn))) / dot(%s[%d + 2].xyz, float3(1.0f,dist,dist2));\n", lightsName, index * 5, lightsName, index * 5); - } - else if (chan.attnfunc == 1) - { // specular - WRITE(p, "ldir = normalize(%s[%d + 3].xyz);\n", lightsName, index * 5); - WRITE(p, "attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s[%d + 4].xyz)) : 0.0f;\n", lightsName, index * 5); - WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1,attn,attn*attn))) / dot(%s[%d + 2].xyz, float3(1,attn,attn*attn));\n", lightsName, index * 5, lightsName, index * 5); - } - - switch (chan.diffusefunc) - { - case LIGHTDIF_NONE: - WRITE(p, "lacc.%s += attn * %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - WRITE(p, "lacc.%s += attn * %sdot(ldir, _norm0)) * %s[%d].%s;\n", - swizzle, - chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - lightsName, - index * 5, - swizzle); - break; - default: _assert_(0); - } - } - WRITE(p, "\n"); - return p; -} - -// vertex shader -// lights/colors -// materials name is I_MATERIALS in vs and I_PMATERIALS in ps -// inColorName is color in vs and colors_ in ps -// dest is o.colors_ in vs and colors_ in ps -char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) -{ - for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) - { - const LitChannel& color = xfregs.color[j]; - const LitChannel& alpha = xfregs.alpha[j]; - - WRITE(p, "{\n"); - - if (color.matsource) // from vertex - { - if (components & (VB_HAS_COL0 << j)) - WRITE(p, "mat = %s%d;\n", inColorName, j); - else if (components & VB_HAS_COL0) - WRITE(p, "mat = %s0;\n", inColorName); - else - WRITE(p, "mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - else // from color - { - WRITE(p, "mat = %s[%d];\n", materialsName, j+2); - } - - if (color.enablelighting) - { - if (color.ambsource) // from vertex - { - if (components & (VB_HAS_COL0< +static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) +{ + const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; + const char* swizzle = "xyzw"; + if (coloralpha == 1) + swizzle = "xyz"; + else if (coloralpha == 2) + swizzle = "w"; + + uid_data.attnfunc |= chan.attnfunc << (2*litchan_index); + uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index); + if (!(chan.attnfunc & 1)) + { + // atten disabled + switch (chan.diffusefunc) + { + case LIGHTDIF_NONE: + object.Write("lacc.%s += %s;\n", swizzle, LightCol(lightsName, index, swizzle)); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(lightsName, index)); + object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s;\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LightCol(lightsName, index, swizzle)); + break; + default: _assert_(0); + } + } + else // spec and spot + { + if (chan.attnfunc == 3) + { // spot + object.Write("ldir = %s.xyz - pos.xyz;\n", LightPos(lightsName, index)); + object.Write("dist2 = dot(ldir, ldir);\n" + "dist = sqrt(dist2);\n" + "ldir = ldir / dist;\n" + "attn = max(0.0f, dot(ldir, %s.xyz));\n", LightDir(lightsName, index)); + object.Write("attn = max(0.0f, dot(%s.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.xyz, float3(1.0f,dist,dist2));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index)); + } + else if (chan.attnfunc == 1) + { // specular + object.Write("ldir = normalize(%s.xyz);\n", LightPos(lightsName, index)); + object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.xyz)) : 0.0f;\n", LightDir(lightsName, index)); + object.Write("attn = max(0.0f, dot(%s.xyz, float3(1,attn,attn*attn))) / dot(%s.xyz, float3(1,attn,attn*attn));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index)); + } + + switch (chan.diffusefunc) + { + case LIGHTDIF_NONE: + object.Write("lacc.%s += attn * %s;\n", swizzle, LightCol(lightsName, index, swizzle)); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s;\n", + swizzle, + chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", + LightCol(lightsName, index, swizzle)); + break; + default: _assert_(0); + } + } + object.Write("\n"); +} + +// vertex shader +// lights/colors +// materials name is I_MATERIALS in vs and I_PMATERIALS in ps +// inColorName is color in vs and colors_ in ps +// dest is o.colors_ in vs and colors_ in ps +template +static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +{ + for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) + { + const LitChannel& color = xfregs.color[j]; + const LitChannel& alpha = xfregs.alpha[j]; + + object.Write("{\n"); + + uid_data.matsource |= xfregs.color[j].matsource << j; + if (color.matsource) // from vertex + { + if (components & (VB_HAS_COL0 << j)) + object.Write("mat = %s%d;\n", inColorName, j); + else if (components & VB_HAS_COL0) + object.Write("mat = %s0;\n", inColorName); + else + object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + } + else // from color + { + object.Write("mat = %s[%d];\n", materialsName, j+2); + } + + uid_data.enablelighting |= xfregs.color[j].enablelighting << j; + if (color.enablelighting) + { + uid_data.ambsource |= xfregs.color[j].ambsource << j; + if (color.ambsource) // from vertex + { + if (components & (VB_HAS_COL0<(object, uid_data, i, j, lightsName, 3); + } + } + } + } + + // no shared lights + for (int i = 0; i < 8; ++i) + { + if (!(mask&(1<(object, uid_data, i, j, lightsName, 1); + if (!(mask&(1<(object, uid_data, i, j+2, lightsName, 2); + } + } + else if (color.enablelighting || alpha.enablelighting) + { + // lights are disabled on one channel so process only the active ones + const LitChannel& workingchannel = color.enablelighting ? color : alpha; + const int lit_index = color.enablelighting ? j : (j+2); + int coloralpha = color.enablelighting ? 1 : 2; + + uid_data.light_mask |= workingchannel.GetFullLightMask() << (8*lit_index); + for (int i = 0; i < 8; ++i) + { + if (workingchannel.GetFullLightMask() & (1<(object, uid_data, i, lit_index, lightsName, coloralpha); + } + } + object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j); + object.Write("}\n"); + } +} #endif // _LIGHTINGSHADERGEN_H_ diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index cfd236da69..70931a9261 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -18,252 +18,6 @@ #include "NativeVertexFormat.h" -static void StageHash(u32 stage, u32* out) -{ - out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24 - u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now - out[0] |= (alphaC&0xF0) << 24; // 8 - out[1] |= alphaC >> 8; // 16 - - // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3 - out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1 - // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan - - bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages; - out[2] |= bHasIndStage << 2; // 1 - - bool needstexcoord = false; - - if (bHasIndStage) - { - out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation - needstexcoord = true; - } - - - TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC; - TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC; - - if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC - || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC - || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC - || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC - || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA - || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) - { - out[0] |= bpmem.combiners[stage].alphaC.rswap; - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2 - out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23; - out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1; - } - - out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1); - if (bpmem.tevorders[stage/2].getEnable(stage&1)) - { - if (bHasIndStage) - needstexcoord = true; - - out[0] |= bpmem.combiners[stage].alphaC.tswap; - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2 - out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16; - } - - if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST - || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) - { - out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5 - out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5 - } - - if (needstexcoord) - { - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16; - } -} - -// Mash together all the inputs that contribute to the code of a generated pixel shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -// It would likely be a lot more efficient to build this incrementally as the attributes -// are set... -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - memset(uid->values, 0, sizeof(uid->values)); - uid->values[0] |= bpmem.genMode.numtevstages; // 4 - uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 - uid->values[0] |= dstAlphaMode << 8; // 2 - uid->values[0] |= g_ActiveConfig.bFastDepthCalc << 10; // 1 - - bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - uid->values[0] |= enablePL << 11; // 1 - - if (!enablePL) - { - uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4 - } - - AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult(); - uid->values[0] |= alphaPreTest << 16; // 2 - - // numtexgens should be <= 8 - for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) - { - uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1 - } - - uid->values[1] = bpmem.genMode.numindstages; // 3 - u32 indirectStagesUsed = 0; - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - indirectStagesUsed |= (1 << bpmem.tevind[i].bt); - } - - assert(indirectStagesUsed == (indirectStagesUsed & 0xF)); - - uid->values[1] |= indirectStagesUsed << 3; // 4; - - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - { - if (indirectStagesUsed & (1 << i)) - { - uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (7 + 3*i); // 1 - if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) - uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (8 + 3*i); // 2 - } - } - - u32* ptr = &uid->values[2]; - for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i) - { - StageHash(i, ptr); - ptr += 4; // max: ptr = &uid->values[66] - } - - ptr[0] |= bpmem.alpha_test.comp0; // 3 - ptr[0] |= bpmem.alpha_test.comp1 << 3; // 3 - ptr[0] |= bpmem.alpha_test.logic << 6; // 2 - - ptr[0] |= bpmem.ztex2.op << 8; // 2 - ptr[0] |= bpmem.zcontrol.early_ztest << 10; // 1 - ptr[0] |= bpmem.zmode.testenable << 11; // 1 - ptr[0] |= bpmem.zmode.updateenable << 12; // 1 - - if (dstAlphaMode != DSTALPHA_ALPHA_PASS) - { - ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 13; // 3 - if (bpmem.fog.c_proj_fsel.fsel != 0) - { - ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1 - ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1 - } - } - - ++ptr; - if (enablePL) - { - ptr += GetLightingShaderId(ptr); - *ptr++ = components; - } - - uid->num_values = int(ptr - uid->values); -} - -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - memset(uid->values, 0, sizeof(uid->values)); - u32* ptr = uid->values; - *ptr++ = dstAlphaMode; // 0 - *ptr++ = bpmem.genMode.hex; // 1 - *ptr++ = bpmem.ztex2.hex; // 2 - *ptr++ = bpmem.zcontrol.hex; // 3 - *ptr++ = bpmem.zmode.hex; // 4 - *ptr++ = g_ActiveConfig.bFastDepthCalc; // 5 - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6 - *ptr++ = xfregs.numTexGen.hex; // 7 - - if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - *ptr++ = xfregs.color[0].hex; - *ptr++ = xfregs.alpha[0].hex; - *ptr++ = xfregs.color[1].hex; - *ptr++ = xfregs.alpha[1].hex; - *ptr++ = components; - } - - for (unsigned int i = 0; i < 8; ++i) - *ptr++ = xfregs.texMtxInfo[i].hex; // 8-15 - - for (unsigned int i = 0; i < 16; ++i) - *ptr++ = bpmem.tevind[i].hex; // 16-31 - - *ptr++ = bpmem.tevindref.hex; // 32 - - for (u32 i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times - { - *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i - *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i - *ptr++ = bpmem.tevind[i].hex; // 35+5*i - *ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i - *ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i - } - - ptr = &uid->values[113]; - - *ptr++ = bpmem.alpha_test.hex; // 113 - - *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114 - *ptr++ = bpmem.fogRange.Base.hex; // 115 - - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - PIXELSHADERUIDSAFE new_id; - GetSafePixelShaderId(&new_id, dstAlphaMode, components); - - if (!(old_id == new_id)) - { - std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components)); - if (old_code != new_code) - { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - OpenFStream(file, szTemp, std::ios_base::out); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); - } - } -} - // old tev->pixelshader notes // // color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 @@ -273,12 +27,6 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std:: // output is given by .outreg // tevtemp is set according to swapmodetables and -static void WriteStage(char *&p, int n, API_TYPE ApiType); -static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -// static void WriteAlphaCompare(char *&p, int num, int comp); -static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -static void WriteFog(char *&p, API_TYPE ApiType); - static const char *tevKSelTableC[] = // KCSEL { "1.0f,1.0f,1.0f", // 1 = 0x00 @@ -457,12 +205,6 @@ static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" }; static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" }; -#define WRITE p+=sprintf - -static char swapModeTable[4][5]; - -static char text[16384]; - struct RegisterState { bool ColorNeedOverflowControl; @@ -470,20 +212,9 @@ struct RegisterState bool AuxStored; }; -static RegisterState RegisterStates[4]; +static char swapModeTable[4][5]; -static void BuildSwapModeTable() -{ - static const char *swapColors = "rgba"; - for (int i = 0; i < 4; i++) - { - swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1]; - swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2]; - swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1]; - swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2]; - swapModeTable[i][4] = 0; - } -} +static char text[16384]; // We can't use function defines since the Qualcomm shader compiler doesn't support it static const char *GLSLConvertFunctions[] = @@ -496,150 +227,140 @@ static const char *GLSLConvertFunctions[] = #define FUNC_FRAC 0 #define FUNC_LERP 2 -const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num) +static void BuildSwapModeTable() { - if (ApiType == API_OPENGL) - return ""; // Nothing to do here - static char result[64]; - sprintf(result, " : register(%s%d)", prefix, num); - return result; + static const char *swapColors = "rgba"; + for (int i = 0; i < 4; i++) + { + swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1]; + swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2]; + swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1]; + swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2]; + swapModeTable[i][4] = '\0'; + } } -const char *WriteLocation(API_TYPE ApiType) -{ - if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - return ""; - static char result[64]; - sprintf(result, "uniform "); - return result; -} +template static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]); +template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +template static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +template static void WriteFog(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType); -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +template +static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { + // Non-uid template parameters will write to the dummy data (=> gets optimized out) + pixel_shader_uid_data dummy_data; + pixel_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) + ? out.template GetUidData() : dummy_data; + + out.SetBuffer(text); #ifndef ANDROID - locale_t locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation - locale_t old_locale = uselocale(locale); // Apply the locale for this thread + locale_t locale; + locale_t old_locale; + if (out.GetBuffer() != NULL) + { + locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation + old_locale = uselocale(locale); // Apply the locale for this thread + } #endif + text[sizeof(text) - 1] = 0x7C; // canary - BuildSwapModeTable(); // Needed for WriteStage - int numStages = bpmem.genMode.numtevstages + 1; - int numTexgen = bpmem.genMode.numtexgens; + unsigned int numStages = bpmem.genMode.numtevstages + 1; + unsigned int numTexgen = bpmem.genMode.numtexgens; - bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; - bool bOpenGL = ApiType == API_OPENGL; - char *p = text; - WRITE(p, "//Pixel Shader for TEV stages\n"); - WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n", - numStages, numTexgen/*, bpmem.genMode.numindstages*/); + const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; + const bool bOpenGL = ApiType == API_OPENGL; - int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) - { - for (int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } + out.Write("//Pixel Shader for TEV stages\n"); + out.Write("//%i TEV stages, %i texgens, %i IND stages\n", + numStages, numTexgen, bpmem.genMode.numindstages); + + uid_data.dstAlphaMode = dstAlphaMode; + uid_data.genMode_numindstages = bpmem.genMode.numindstages; + uid_data.genMode_numtevstages = bpmem.genMode.numtevstages; + uid_data.genMode_numtexgens = bpmem.genMode.numtexgens; if (ApiType == API_OPENGL) { - - // A function here // Fmod implementation gleaned from Nvidia // At http://http.developer.nvidia.com/Cg/fmod.html - WRITE(p, "float fmod( float x, float y )\n"); - WRITE(p, "{\n"); - WRITE(p, "\tfloat z = fract( abs( x / y) ) * abs( y );\n"); - WRITE(p, "\treturn (x < 0.0) ? -z : z;\n"); - WRITE(p, "}\n"); + out.Write("float fmod( float x, float y )\n"); + out.Write("{\n"); + out.Write("\tfloat z = fract( abs( x / y) ) * abs( y );\n"); + out.Write("\treturn (x < 0.0) ? -z : z;\n"); + out.Write("}\n"); + // Declare samplers for (int i = 0; i < 8; ++i) - WRITE(p, "uniform sampler2D samp%d;\n", i); + out.Write("uniform sampler2D samp%d;\n", i); } else { // Declare samplers - if (ApiType != API_D3D11) - { - WRITE(p, "uniform sampler2D "); - } - else - { - WRITE(p, "sampler "); - } - - bool bfirst = true; for (int i = 0; i < 8; ++i) - { - WRITE(p, "%s samp%d %s", bfirst?"":",", i, WriteRegister(ApiType, "s", i)); - bfirst = false; - } - WRITE(p, ";\n"); + out.Write("%s samp%d : register(s%d);\n", (ApiType == API_D3D11) ? "sampler" : "uniform sampler2D", i, i); + if (ApiType == API_D3D11) { - WRITE(p, "Texture2D "); - bfirst = true; + out.Write("\n"); for (int i = 0; i < 8; ++i) { - WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i); - bfirst = false; + out.Write("Texture2D Tex%d : register(t%d);\n", i, i); } - WRITE(p, ";\n"); } } + out.Write("\n"); - WRITE(p, "\n"); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - WRITE(p, "layout(std140) uniform PSBlock {\n"); - - WRITE(p, "\t%sfloat4 " I_COLORS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_COLORS)); - WRITE(p, "\t%sfloat4 " I_KCOLORS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_KCOLORS)); - WRITE(p, "\t%sfloat4 " I_ALPHA"[1] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ALPHA)); - WRITE(p, "\t%sfloat4 " I_TEXDIMS"[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXDIMS)); - WRITE(p, "\t%sfloat4 " I_ZBIAS"[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ZBIAS)); - WRITE(p, "\t%sfloat4 " I_INDTEXSCALE"[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXSCALE)); - WRITE(p, "\t%sfloat4 " I_INDTEXMTX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXMTX)); - WRITE(p, "\t%sfloat4 " I_FOG"[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG)); - - // For pixel lighting - WRITE(p, "\t%sfloat4 " I_PLIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); - WRITE(p, "\t%sfloat4 " I_PMATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS)); - + out.Write("layout(std140) uniform PSBlock {\n"); + + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_COLORS, "float4", I_COLORS"[4]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_KCOLORS, "float4", I_KCOLORS"[4]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_ALPHA, "float4", I_ALPHA"[1]"); // TODO: Why is this an array...-.- + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_ZBIAS, "float4", I_ZBIAS"[2]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_INDTEXMTX, "float4", I_INDTEXMTX"[6]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_FOG, "float4", I_FOG"[3]"); + + // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PLIGHTS, "float4", I_PLIGHTS"[40]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PMATERIALS, "float4", I_PMATERIALS"[4]"); + if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - WRITE(p, "};\n"); + out.Write("};\n"); if (ApiType == API_OPENGL) { - WRITE(p, "COLOROUT(ocol0)\n"); + out.Write("COLOROUT(ocol0)\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) - WRITE(p, "COLOROUT(ocol1)\n"); - - if (per_pixel_depth) - WRITE(p, "#define depth gl_FragDepth\n"); - WRITE(p, "float4 rawpos = gl_FragCoord;\n"); + out.Write("COLOROUT(ocol1)\n"); - WRITE(p, "VARYIN float4 colors_02;\n"); - WRITE(p, "VARYIN float4 colors_12;\n"); - WRITE(p, "float4 colors_0 = colors_02;\n"); - WRITE(p, "float4 colors_1 = colors_12;\n"); + if (per_pixel_depth) + out.Write("#define depth gl_FragDepth\n"); + out.Write("float4 rawpos = gl_FragCoord;\n"); + + out.Write("VARYIN float4 colors_02;\n"); + out.Write("VARYIN float4 colors_12;\n"); + out.Write("float4 colors_0 = colors_02;\n"); + out.Write("float4 colors_1 = colors_12;\n"); // compute window position if needed because binding semantic WPOS is not widely supported - // Let's set up attributes + // Let's set up attributes if (xfregs.numTexGen.numTexGens < 7) { for (int i = 0; i < 8; ++i) { - WRITE(p, "VARYIN float3 uv%d_2;\n", i); - WRITE(p, "float3 uv%d = uv%d_2;\n", i, i); + out.Write("VARYIN float3 uv%d_2;\n", i); + out.Write("float3 uv%d = uv%d_2;\n", i, i); } - WRITE(p, "VARYIN float4 clipPos_2;\n"); - WRITE(p, "float4 clipPos = clipPos_2;\n"); + out.Write("VARYIN float4 clipPos_2;\n"); + out.Write("float4 clipPos = clipPos_2;\n"); if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - WRITE(p, "VARYIN float4 Normal_2;\n"); - WRITE(p, "float4 Normal = Normal_2;\n"); + out.Write("VARYIN float4 Normal_2;\n"); + out.Write("float4 Normal = Normal_2;\n"); } } else @@ -649,35 +370,35 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType { for (int i = 0; i < 8; ++i) { - WRITE(p, "VARYIN float4 uv%d_2;\n", i); - WRITE(p, "float4 uv%d = uv%d_2;\n", i, i); + out.Write("VARYIN float4 uv%d_2;\n", i); + out.Write("float4 uv%d = uv%d_2;\n", i, i); } } else { for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - WRITE(p, "VARYIN float%d uv%d_2;\n", i < 4 ? 4 : 3 , i); - WRITE(p, "float%d uv%d = uv%d_2;\n", i < 4 ? 4 : 3 , i, i); + out.Write("VARYIN float%d uv%d_2;\n", i < 4 ? 4 : 3 , i); + out.Write("float%d uv%d = uv%d_2;\n", i < 4 ? 4 : 3 , i, i); } } - WRITE(p, "float4 clipPos;\n"); + out.Write("float4 clipPos;\n"); } - WRITE(p, "void main()\n{\n"); + out.Write("void main()\n{\n"); } else { - WRITE(p, "void main(\n"); - if (ApiType != API_D3D11) + out.Write("void main(\n"); + if(ApiType != API_D3D11) { - WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", + out.Write(" out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "", per_pixel_depth ? "\n out float depth : DEPTH," : "", ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS"); } else { - WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", + out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", per_pixel_depth ? "\n out float depth : SV_Depth," : ""); } @@ -685,18 +406,18 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType // "centroid" attribute is only supported by D3D11 const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : ""); - WRITE(p, " in %s float4 colors_0 : COLOR0,\n", optCentroid); - WRITE(p, " in %s float4 colors_1 : COLOR1", optCentroid); + out.Write(" in %s float4 colors_0 : COLOR0,\n", optCentroid); + out.Write(" in %s float4 colors_1 : COLOR1", optCentroid); // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, ",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i); - WRITE(p, ",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen); + for (unsigned int i = 0; i < numTexgen; ++i) + out.Write(",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i); + out.Write(",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, ",\n in %s float4 Normal : TEXCOORD%d", optCentroid, numTexgen + 1); - WRITE(p, " ) {\n"); + out.Write(",\n in %s float4 Normal : TEXCOORD%d", optCentroid, numTexgen + 1); + out.Write(" ) {\n"); } else { @@ -704,19 +425,19 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { for (int i = 0; i < 8; ++i) - WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i); + out.Write(",\n in float4 uv%d : TEXCOORD%d", i, i); } else { for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); + out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); } - WRITE(p, " ) {\n"); - WRITE(p, "\tfloat4 clipPos = float4(0.0f, 0.0f, 0.0f, 0.0f);"); + out.Write(" ) {\n"); + out.Write("\tfloat4 clipPos = float4(0.0f, 0.0f, 0.0f, 0.0f);"); } } - WRITE(p, " float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" + out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n" " float alphabump=0.0f;\n" " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n" @@ -727,68 +448,90 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { + uid_data.xfregs_numTexGen_numTexGens = xfregs.numTexGen.numTexGens; if (xfregs.numTexGen.numTexGens < 7) { - WRITE(p,"\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); - WRITE(p,"\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); + out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); + out.Write("\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); } else { - WRITE(p,"\tfloat3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); - WRITE(p,"\tfloat3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); + out.Write("\tfloat3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); + out.Write("\tfloat3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); } + out.Write("\tfloat4 mat, lacc;\n" + "\tfloat3 ldir, h;\n" + "\tfloat dist, dist2, attn;\n"); - WRITE(p, "\tfloat4 mat, lacc;\n" - "\tfloat3 ldir, h;\n" - "\tfloat dist, dist2, attn;\n"); - - p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); + out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further + out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); + uid_data.components = components; + GenerateLightingShader(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } if (numTexgen < 7) - WRITE(p, "\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); + out.Write("\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); else - WRITE(p, "\tclipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); + out.Write("\tfloat4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { - WRITE(p, "\tfloat3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); + out.Write("\tfloat3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); } else { - for (int i = 0; i < numTexgen; ++i) + out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS+numTexgen-1); + for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides + uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i; if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { - WRITE(p, "\tif (uv%d.z != 0.0f)", i); - WRITE(p, "\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); + out.Write("\tif (uv%d.z != 0.0f)", i); + out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } - WRITE(p, "uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); + out.Write("uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); } } // indirect texture map lookup - for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) + int nIndirectStagesUsed = 0; + if (bpmem.genMode.numindstages > 0) { - if (nIndirectStagesUsed & (1<(out, buffer, "tempcoord", "abg", texmap, ApiType); + } + } + + RegisterState RegisterStates[4]; RegisterStates[0].AlphaNeedOverflowControl = false; RegisterStates[0].ColorNeedOverflowControl = false; RegisterStates[0].AuxStored = false; @@ -799,8 +542,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType RegisterStates[i].AuxStored = false; } - for (int i = 0; i < numStages; i++) - WriteStage(p, i, ApiType); //build the equation for this stage + // Uid fields for BuildSwapModeTable are set in WriteStage + BuildSwapModeTable(); + for (unsigned int i = 0; i < numStages; i++) + WriteStage(out, uid_data, i, ApiType, RegisterStates); // build the equation for this stage if (numStages) { @@ -809,94 +554,112 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if(bpmem.combiners[numStages - 1].colorC.dest != 0) { bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; - WRITE(p, "\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); + out.Write("\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; } if(bpmem.combiners[numStages - 1].alphaC.dest != 0) { bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored; - WRITE(p, "\tprev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); + out.Write("\tprev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl; } } // emulation of unsigned 8 overflow when casting if needed if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) - WRITE(p, "\tprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("\tprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); + uid_data.Pretest = Pretest; if (Pretest == AlphaTest::UNDETERMINED) - WriteAlphaTest(p, ApiType, dstAlphaMode, per_pixel_depth); + WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, per_pixel_depth); - - // dx9 doesn't support readback of depth in pixel shader, so we always have to calculate it again - // shouldn't be a performance issue as the written depth is usually still from perspective division + + // D3D9 doesn't support readback of depth in pixel shader, so we always have to calculate it again. + // This shouldn't be a performance issue as the written depth is usually still from perspective division // but this isn't true for z-textures, so there will be depth issues between enabled and disabled z-textures fragments - if((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc) - WRITE(p, "float zCoord = rawpos.z;\n"); + if ((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc) + out.Write("float zCoord = rawpos.z;\n"); else + { + out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); // the screen space depth value = far z + (clip z / clip w) * z range - WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); + out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); + } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either - bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; - + const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; + + uid_data.ztex_op = bpmem.ztex2.op; + uid_data.per_pixel_depth = per_pixel_depth; + uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc; + uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; + // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.zcontrol.early_ztest) - WRITE(p, "depth = zCoord;\n"); - + out.Write("depth = zCoord;\n"); + if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... - WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", + out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); + out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); - // scale to make result from frac correct - WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n"); - WRITE(p, "zCoord = %s(zCoord);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); - WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n"); + // U24 overflow emulation + out.Write("zCoord = zCoord * (16777215.0f/16777216.0f);\n"); + out.Write("zCoord = %s(zCoord);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("zCoord = zCoord * (16777216.0f/16777215.0f);\n"); + + // Note: depth texture output is only written to depth buffer if late depth test is used + // final depth value is used for fog calculation, though + if (per_pixel_depth) + out.Write("depth = zCoord;\n"); } - - if (per_pixel_depth && !bpmem.zcontrol.early_ztest) - WRITE(p, "depth = zCoord;\n"); + else if (per_pixel_depth && !bpmem.zcontrol.early_ztest) + out.Write("depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { - WRITE(p, "\tocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); + out.SetConstantsUsed(C_ALPHA, C_ALPHA); + out.Write("\tocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); } else { - WriteFog(p, ApiType); - WRITE(p, "\tocol0 = prev;\n"); + WriteFog(out, uid_data, ApiType); + out.Write("\tocol0 = prev;\n"); } - // Use dual-source color blending to perform dst alpha in a - // single pass + // Use dual-source color blending to perform dst alpha in a single pass if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) { + out.SetConstantsUsed(C_ALPHA, C_ALPHA); if(ApiType & API_D3D9) { // alpha component must be 0 or the shader will not compile (Direct3D 9Ex restriction) - // Colors will be blended against the color from ocol1 in D3D 9... - WRITE(p, "\tocol1 = float4(prev.a, prev.a, prev.a, 0.0f);\n"); + // Colors will be blended against the color from ocol1 in D3D 9... + out.Write("\tocol1 = float4(prev.a, prev.a, prev.a, 0.0f);\n"); } else { // Colors will be blended against the alpha from ocol1... - WRITE(p, "\tocol1 = prev;\n"); + out.Write("\tocol1 = prev;\n"); } // ...and the alpha from ocol0 will be written to the framebuffer. - WRITE(p, "\tocol0.a = " I_ALPHA"[0].a;\n"); + out.Write("\tocol0.a = " I_ALPHA"[0].a;\n"); } - - WRITE(p, "}\n"); + + out.Write("}\n"); + if (text[sizeof(text) - 1] != 0x7C) PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); #ifndef ANDROID - uselocale(old_locale); // restore locale - freelocale(locale); + if (out.GetBuffer() != NULL) + { + uselocale(old_locale); // restore locale + freelocale(locale); + } #endif - return text; } @@ -943,7 +706,8 @@ static const char *TEVCMPAlphaOPTable[16] = " %s.a + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s.a : 0.0f)"//#define TEVCMP_A8_EQ 15 }; -static void WriteStage(char *&p, int n, API_TYPE ApiType) +template +static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -953,86 +717,121 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) if (!bHasTexCoord) texcoord = 0; - WRITE(p, "// TEV stage %d\n", n); + out.Write("// TEV stage %d\n", n); + uid_data.bHasIndStage |= bHasIndStage << n; + uid_data.tevorders_n_texcoord |= (u64)texcoord << (3 * n); if (bHasIndStage) { - WRITE(p, "// indirect op\n"); + uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n); + uid_data.tevind_n_bt |= bpmem.tevind[n].bt << (2*n); + uid_data.tevind_n_fmt |= bpmem.tevind[n].fmt << (2*n); + + out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { - WRITE(p, "alphabump = indtex%d.%s %s;\n", + out.Write("alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); } // format - WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); + out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias + uid_data.Set_tevind_bias(n, bpmem.tevind[n].bias); if (bpmem.tevind[n].bias != ITB_NONE ) - WRITE(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale + uid_data.Set_tevind_mid(n, bpmem.tevind[n].mid); if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1); - WRITE(p, "float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", - n, mtxidx, n, mtxidx+1, n); + out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); + out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", + n, mtxidx, n, mtxidx+1, n); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); - WRITE(p, "float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); + out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); - WRITE(p, "float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); + out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); } else { - WRITE(p, "float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n); + out.Write("float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n); } } else { - WRITE(p, "float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n); + out.Write("float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n); } // --------- // Wrapping // --------- + uid_data.Set_tevind_sw(n, bpmem.tevind[n].sw); + uid_data.Set_tevind_tw(n, bpmem.tevind[n].tw); + uid_data.tevind_n_fb_addprev |= bpmem.tevind[n].fb_addprev << n; // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - WRITE(p, "wrappedcoord.x = uv%d.x;\n", texcoord); + out.Write("wrappedcoord.x = uv%d.x;\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) - WRITE(p, "wrappedcoord.x = 0.0f;\n"); + out.Write("wrappedcoord.x = 0.0f;\n"); else - WRITE(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - WRITE(p, "wrappedcoord.y = uv%d.y;\n", texcoord); + out.Write("wrappedcoord.y = uv%d.y;\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) - WRITE(p, "wrappedcoord.y = 0.0f;\n"); + out.Write("wrappedcoord.y = 0.0f;\n"); else - WRITE(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord - WRITE(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); + out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); else - WRITE(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); + out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); } TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; + uid_data.cc_n_d |= (u64)cc.d << (4*n); + uid_data.cc_n_c |= (u64)cc.c << (4*n); + uid_data.cc_n_b |= (u64)cc.b << (4*n); + uid_data.cc_n_a |= (u64)cc.a << (4*n); + uid_data.cc_n_bias |= cc.bias << (2*n); + uid_data.cc_n_op |= cc.op << n; + uid_data.cc_n_clamp |= cc.clamp << n; + uid_data.cc_n_shift |= cc.shift << (2*n); + uid_data.cc_n_dest |= cc.dest << (2*n); + uid_data.ac_n_rswap |= ac.rswap << (2*n); + uid_data.ac_n_tswap |= ac.tswap << (2*n); + uid_data.ac_n_d |= (u64)ac.d << (3*n); + uid_data.ac_n_c |= (u64)ac.c << (3*n); + uid_data.ac_n_b |= (u64)ac.b << (3*n); + uid_data.ac_n_a |= (u64)ac.a << (3*n); + uid_data.ac_n_bias |= ac.bias << (2*n); + uid_data.ac_n_op |= ac.op << n; + uid_data.ac_n_clamp |= ac.clamp << n; + uid_data.ac_n_shift |= ac.shift << (2*n); + uid_data.ac_n_dest |= ac.dest << (2*n); + if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC @@ -1040,9 +839,15 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { + const int i = bpmem.combiners[n].alphaC.rswap; + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); + char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); - WRITE(p, "crastemp = %s(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + out.Write("crastemp = %s(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); } @@ -1051,19 +856,26 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) if (!bHasIndStage) { // calc tevcord - if (bHasTexCoord) - WRITE(p, "tevcoord.xy = uv%d.xy;\n", texcoord); + if(bHasTexCoord) + out.Write("tevcoord.xy = uv%d.xy;\n", texcoord); else - WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n"); + out.Write("tevcoord.xy = float2(0.0f, 0.0f);\n"); } + const int i = bpmem.combiners[n].alphaC.tswap; + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); + char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); - SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType); + uid_data.SetTevindrefTexmap(i, texmap); + SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); } else { - WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + out.Write("textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); } @@ -1072,15 +884,21 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); - WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); - if (kc > 7 || ka > 7) + uid_data.set_tevksel_kcsel(n/2, n & 1, kc); + uid_data.set_tevksel_kasel(n/2, n & 1, ka); + out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + if(kc > 7 || ka > 7) { - WRITE(p, "ckonsttemp = %s(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("ckonsttemp = %s(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); } else { - WRITE(p, "ckonsttemp = konsttemp;\n"); + out.Write("ckonsttemp = konsttemp;\n"); } + if (kc > 7) + out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4)); + if (ka > 7) + out.SetConstantsUsed(C_KCOLORS+((ka-0xc)%4),C_KCOLORS+((ka-0xc)%4)); } if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV @@ -1090,13 +908,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) { - WRITE(p, "cprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("cprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); RegisterStates[0].AlphaNeedOverflowControl = false; RegisterStates[0].ColorNeedOverflowControl = false; } else { - WRITE(p, "cprev = prev;\n"); + out.Write("cprev = prev;\n"); } RegisterStates[0].AuxStored = true; } @@ -1106,15 +924,16 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0) { + out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) { - WRITE(p, "cc0 = %s(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("cc0 = %s(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); RegisterStates[1].AlphaNeedOverflowControl = false; RegisterStates[1].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc0 = c0;\n"); + out.Write("cc0 = c0;\n"); } RegisterStates[1].AuxStored = true; } @@ -1124,15 +943,16 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1 || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1) { + out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) { - WRITE(p, "cc1 = %s(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("cc1 = %s(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); RegisterStates[2].AlphaNeedOverflowControl = false; RegisterStates[2].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc1 = c1;\n"); + out.Write("cc1 = c1;\n"); } RegisterStates[2].AuxStored = true; } @@ -1142,15 +962,16 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2 || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2) { + out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) { - WRITE(p, "cc2 = %s(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("cc2 = %s(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); RegisterStates[3].AlphaNeedOverflowControl = false; RegisterStates[3].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc2 = c2;\n"); + out.Write("cc2 = c2;\n"); } RegisterStates[3].AuxStored = true; } @@ -1158,124 +979,139 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); RegisterStates[cc.dest].AuxStored = false; - // combine the color channel - WRITE(p, "// color combine\n"); + if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0) + out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); + + if (cc.d == TEVCOLORARG_C1 || cc.d == TEVCOLORARG_A1 || ac.d == TEVALPHAARG_A1) + out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); + + if (cc.d == TEVCOLORARG_C2 || cc.d == TEVCOLORARG_A2 || ac.d == TEVALPHAARG_A2) + out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); + + if (cc.dest >= GX_TEVREG0 && cc.dest <= GX_TEVREG2) + out.SetConstantsUsed(C_COLORS+cc.dest, C_COLORS+cc.dest); + + if (ac.dest >= GX_TEVREG0 && ac.dest <= GX_TEVREG2) + out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); + + out.Write("// color combine\n"); if (cc.clamp) - WRITE(p, "%s = clamp(", tevCOutputTable[cc.dest]); + out.Write("%s = clamp(", tevCOutputTable[cc.dest]); else - WRITE(p, "%s = ", tevCOutputTable[cc.dest]); + out.Write("%s = ", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare { //normal color combiner goes here if (cc.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[cc.shift]); + out.Write("%s*(", tevScaleTable[cc.shift]); - if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - WRITE(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); + if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) + out.Write("%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); if (cc.a == cc.b) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); + out.Write("%s", tevCInputTable[cc.a + 16]); else if (cc.c == TEVCOLORARG_ZERO) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); + out.Write("%s", tevCInputTable[cc.a + 16]); else if (cc.c == TEVCOLORARG_ONE) - WRITE(p, "%s", tevCInputTable[cc.b + 16]); + out.Write("%s", tevCInputTable[cc.b + 16]); else if (cc.a == TEVCOLORARG_ZERO) - WRITE(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); else if (cc.b == TEVCOLORARG_ZERO) - WRITE(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); + out.Write("%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); else - WRITE(p, "%s(%s, %s, %s)", GLSLConvertFunctions[FUNC_LERP + bOpenGL], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("%s(%s, %s, %s)", GLSLConvertFunctions[FUNC_LERP + bOpenGL], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); - WRITE(p, "%s", tevBiasTable[cc.bias]); + out.Write("%s", tevBiasTable[cc.bias]); if (cc.shift > TEVSCALE_1) - WRITE(p, ")"); + out.Write(")"); } else { int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here - WRITE(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table + out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table tevCInputTable[cc.d], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); } if (cc.clamp) - WRITE(p, ", 0.0, 1.0)"); - WRITE(p,";\n"); + out.Write(", 0.0, 1.0)"); + out.Write(";\n"); RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); RegisterStates[ac.dest].AuxStored = false; - // combine the alpha channel - WRITE(p, "// alpha combine\n"); + out.Write("// alpha combine\n"); if (ac.clamp) - WRITE(p, "%s = clamp(", tevAOutputTable[ac.dest]); + out.Write("%s = clamp(", tevAOutputTable[ac.dest]); else - WRITE(p, "%s = ", tevAOutputTable[ac.dest]); + out.Write("%s = ", tevAOutputTable[ac.dest]); if (ac.bias != TevBias_COMPARE) // if not compare { //normal alpha combiner goes here if (ac.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[ac.shift]); + out.Write("%s*(", tevScaleTable[ac.shift]); - if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - WRITE(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); + if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) + out.Write("%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); if (ac.a == ac.b) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + out.Write("%s.a", tevAInputTable[ac.a + 8]); else if (ac.c == TEVALPHAARG_ZERO) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + out.Write("%s.a", tevAInputTable[ac.a + 8]); else if (ac.a == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); else if (ac.b == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); + out.Write("%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); else - WRITE(p, "%s(%s.a, %s.a, %s.a)", GLSLConvertFunctions[FUNC_LERP + bOpenGL], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("%s(%s.a, %s.a, %s.a)", GLSLConvertFunctions[FUNC_LERP + bOpenGL], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); - WRITE(p, "%s",tevBiasTable[ac.bias]); + out.Write("%s",tevBiasTable[ac.bias]); - if (ac.shift > 0) - WRITE(p, ")"); + if (ac.shift>0) + out.Write(")"); } else { //compare alpha combiner goes here int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - WRITE(p, TEVCMPAlphaOPTable[cmp], + out.Write(TEVCMPAlphaOPTable[cmp], tevAInputTable[ac.d], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); } if (ac.clamp) - WRITE(p, ", 0.0, 1.0)"); - WRITE(p, ";\n\n"); - WRITE(p, "// TEV done\n"); + out.Write(", 0.0, 1.0)"); + out.Write(";\n\n"); + out.Write("// TEV done\n"); } -void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) +template +void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { + out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); if (ApiType == API_D3D11) - WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); + out.Write("%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); else - WRITE(p, "%s=%s(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, ApiType == API_OPENGL ? "texture" : "tex2D", texmap, texcoords, texmap, texswap); + out.Write("%s=%s(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, ApiType == API_OPENGL ? "texture" : "tex2D", texmap, texcoords, texmap, texswap); } static const char *tevAlphaFuncsTable[] = { - "(false)", //ALPHACMP_NEVER 0 - "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 - "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 - "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 - "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4 - "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5 - "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6 - "(true)" //ALPHACMP_ALWAYS 7 + "(false)", // NEVER + "(prev.a <= %s - (0.25f/255.0f))", // LESS + "(abs( prev.a - %s ) < (0.5f/255.0f))", // EQUAL + "(prev.a < %s + (0.25f/255.0f))", // LEQUAL + "(prev.a >= %s + (0.25f/255.0f))", // GREATER + "(abs( prev.a - %s ) >= (0.5f/255.0f))", // NEQUAL + "(prev.a > %s - (0.25f/255.0f))", // GEQUAL + "(true)" // ALWAYS }; static const char *tevAlphaFunclogicTable[] = @@ -1286,7 +1122,8 @@ static const char *tevAlphaFunclogicTable[] = " == " // xnor }; -static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) +template +static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char *alphaRef[2] = { @@ -1294,24 +1131,31 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode I_ALPHA"[0].g" }; + out.SetConstantsUsed(C_ALPHA, C_ALPHA); // using discard then return works the same in cg and dx9 but not in dx11 - WRITE(p, "\tif(!( "); + out.Write("\tif(!( "); + uid_data.alpha_test_comp0 = bpmem.alpha_test.comp0; + uid_data.alpha_test_logic = bpmem.alpha_test.comp1; + uid_data.alpha_test_logic = bpmem.alpha_test.logic; + + // Lookup the first component from the alpha function table int compindex = bpmem.alpha_test.comp0; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table + out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); - WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op + out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op + // Lookup the second component from the alpha function table compindex = bpmem.alpha_test.comp1; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table - WRITE(p, ")) {\n"); + out.Write(tevAlphaFuncsTable[compindex], alphaRef[1]); + out.Write(")) {\n"); - WRITE(p, "\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); + out.Write("\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) - WRITE(p, "\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); + out.Write("\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); if(per_pixel_depth) - WRITE(p, "depth = 1.f;\n"); + out.Write("\t\tdepth = 1.f;\n"); // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before // or after texturing and alpha test. PC GPUs have no way to support this @@ -1323,14 +1167,15 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode // when the alpha test fail. This is not a correct implementation because // even if the depth test fails the fragment could be alpha blended, but // we don't have a choice. + uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable; if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) { - WRITE(p, "\t\tdiscard;\n"); + out.Write("\t\tdiscard;\n"); if (ApiType != API_D3D11) - WRITE(p, "\t\treturn;\n"); + out.Write("\t\treturn;\n"); } - WRITE(p, "}\n"); + out.Write("}\n"); } static const char *tevFogFuncsTable[] = @@ -1345,41 +1190,48 @@ static const char *tevFogFuncsTable[] = "\tfog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" // backward exp2 }; -static void WriteFog(char *&p, API_TYPE ApiType) +template +static void WriteFog(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType) { bool bOpenGL = ApiType == API_OPENGL; - if (bpmem.fog.c_proj_fsel.fsel == 0) + uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; + if(bpmem.fog.c_proj_fsel.fsel == 0) return; // no Fog + uid_data.fog_proj = bpmem.fog.c_proj_fsel.proj; + + out.SetConstantsUsed(C_FOG, C_FOG+1); if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective // ze = A/(B - (Zs >> B_SHF) - WRITE (p, "\tfloat ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); + out.Write("\tfloat ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - WRITE (p, "\tfloat ze = " I_FOG"[1].x * zCoord;\n"); + out.Write("\tfloat ze = " I_FOG"[1].x * zCoord;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust - //this is completely theoretical as the real hardware seems to use a table instead of calculating the values. + // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. + uid_data.fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (bpmem.fogRange.Base.Enabled) { - WRITE (p, "\tfloat x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); - WRITE (p, "\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); - WRITE (p, "\tze *= x_adjust;\n"); + out.SetConstantsUsed(C_FOG+2, C_FOG+2); + out.Write("\tfloat x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); + out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); + out.Write("\tze *= x_adjust;\n"); } - WRITE (p, "\tfloat fog = clamp(ze - " I_FOG"[1].z, 0.0, 1.0);\n"); + out.Write("\tfloat fog = clamp(ze - " I_FOG"[1].z, 0.0, 1.0);\n"); if (bpmem.fog.c_proj_fsel.fsel > 3) { - WRITE(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); + out.Write("%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); } else { @@ -1387,5 +1239,21 @@ static void WriteFog(char *&p, API_TYPE ApiType) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - WRITE(p, "\tprev.rgb = %s(prev.rgb, " I_FOG"[0].rgb, fog);\n", GLSLConvertFunctions[FUNC_LERP + bOpenGL]); + out.Write("\tprev.rgb = %s(prev.rgb, " I_FOG"[0].rgb, fog);\n", GLSLConvertFunctions[FUNC_LERP + bOpenGL]); } + +void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dstAlphaMode, ApiType, components); +} + +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dstAlphaMode, ApiType, components); +} + +void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dstAlphaMode, ApiType, components); +} + diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index d2a3046939..826ec561d0 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -6,6 +6,8 @@ #define GCOGL_PIXELSHADER_H #include "VideoCommon.h" +#include "ShaderGenCommon.h" +#include "BPMemory.h" #define I_COLORS "color" #define I_KCOLORS "k" @@ -31,8 +33,14 @@ #define C_PLIGHTS (C_FOG + 3) #define C_PMATERIALS (C_PLIGHTS + 40) #define C_PENVCONST_END (C_PMATERIALS + 4) -#define PIXELSHADERUID_MAX_VALUES 70 -#define PIXELSHADERUID_MAX_VALUES_SAFE 116 + +// Different ways to achieve rendering with destination alpha +enum DSTALPHA_MODE +{ + DSTALPHA_NONE, // Render normally, without destination alpha + DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha + DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending +}; // Annoying sure, can be removed once we get up to GLSL ~1.3 const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, @@ -47,90 +55,135 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, {I_PMATERIALS, C_PMATERIALS, 4 }, }; -// DO NOT make anything in this class virtual. -template -class _PIXELSHADERUID +// TODO: Should compact packing be enabled? +//#pragma pack(4) +struct pixel_shader_uid_data { -public: - u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES]; - int num_values; + // TODO: Optimize field order for easy access! - _PIXELSHADERUID() + u32 components; + u32 dstAlphaMode : 2; + u32 Pretest : 2; + + u32 genMode_numtexgens : 4; + u32 genMode_numtevstages : 4; + u32 genMode_numindstages : 3; + + u32 nIndirectStagesUsed : 8; + + u32 texMtxInfo_n_projection : 8; // 8x1 bit + + u32 tevindref_bi0 : 3; + u32 tevindref_bc0 : 3; + u32 tevindref_bi1 : 3; + u32 tevindref_bc1 : 3; + u32 tevindref_bi2 : 3; + u32 tevindref_bc3 : 3; + u32 tevindref_bi4 : 3; + u32 tevindref_bc4 : 3; + inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap) { + if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; } + else if (index == 1) { tevindref_bc1 = texcoord; tevindref_bi1 = texmap; } + else if (index == 2) { tevindref_bc3 = texcoord; tevindref_bi2 = texmap; } + else if (index == 3) { tevindref_bc4 = texcoord; tevindref_bi4 = texmap; } + } + inline void SetTevindrefTexmap(int index, u32 texmap) + { + if (index == 0) { tevindref_bi0 = texmap; } + else if (index == 1) { tevindref_bi1 = texmap; } + else if (index == 2) { tevindref_bi2 = texmap; } + else if (index == 3) { tevindref_bi4 = texmap; } } - _PIXELSHADERUID(const _PIXELSHADERUID& r) - { - num_values = r.num_values; + u64 tevorders_n_texcoord : 48; // 16 x 3 bits - if (safe) - memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE); - else - memcpy(values, r.values, r.GetNumValues() * sizeof(values[0])); + u64 tevind_n_sw : 48; // 16 x 3 bits + u64 tevind_n_tw : 48; // 16 x 3 bits + u32 tevind_n_fb_addprev : 16; // 16 x 1 bit + u32 tevind_n_bs : 32; // 16 x 2 bits + u32 tevind_n_fmt : 32; // 16 x 2 bits + u32 tevind_n_bt : 32; // 16 x 2 bits + u64 tevind_n_bias : 48; // 16 x 3 bits + u64 tevind_n_mid : 64; // 16 x 4 bits + + // NOTE: These assume that the affected bits are zero before calling + void Set_tevind_sw(int index, u64 val) + { + tevind_n_sw |= val << (3*index); + } + void Set_tevind_tw(int index, u64 val) + { + tevind_n_tw |= val << (3*index); + } + void Set_tevind_bias(int index, u64 val) + { + tevind_n_bias |= val << (3*index); + } + void Set_tevind_mid(int index, u64 val) + { + tevind_n_mid |= val << (4*index); } - int GetNumValues() const - { - if (safe) - return (sizeof(values) / sizeof(u32)); - else - return num_values; - } + u32 tevksel_n_swap1 : 16; // 8x2 bits + u32 tevksel_n_swap2 : 16; // 8x2 bits + u64 tevksel_n_kcsel0 : 40; // 8x5 bits + u64 tevksel_n_kasel0 : 40; // 8x5 bits + u64 tevksel_n_kcsel1 : 40; // 8x5 bits + u64 tevksel_n_kasel1 : 40; // 8x5 bits + void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); } + void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); } - bool operator <(const _PIXELSHADERUID& _Right) const - { - int N = GetNumValues(); + u64 cc_n_d : 64; // 16x4 bits + u64 cc_n_c : 64; // 16x4 bits + u64 cc_n_b : 64; // 16x4 bits + u64 cc_n_a : 64; // 16x4 bits + u32 cc_n_bias : 32; // 16x2 bits + u32 cc_n_op : 16; // 16x1 bit + u32 cc_n_clamp : 16; // 16x1 bit + u32 cc_n_shift : 32; // 16x2 bits + u32 cc_n_dest : 32; // 16x2 bits - if (N < _Right.GetNumValues()) - return true; - else if (N > _Right.GetNumValues()) - return false; + u32 ac_n_rswap : 32; // 16x2 bits + u32 ac_n_tswap : 32; // 16x2 bits + u64 ac_n_d : 48; // 16x3 bits + u64 ac_n_c : 48; // 16x3 bits + u64 ac_n_b : 48; // 16x3 bits + u64 ac_n_a : 48; // 16x3 bits + u32 ac_n_bias : 32; // 16x2 bits + u32 ac_n_op : 16; // 16x1 bit + u32 ac_n_clamp : 16; // 16x1 bit + u32 ac_n_shift : 32; // 16x2 bits + u32 ac_n_dest : 32; // 16x2 bits - for (int i = 0; i < N; ++i) - { - if (values[i] < _Right.values[i]) - return true; - else if (values[i] > _Right.values[i]) - return false; - } + u32 alpha_test_comp0 : 3; + u32 alpha_test_comp1 : 3; + u32 alpha_test_logic : 2; + u32 alpha_test_use_zcomploc_hack : 1; - return false; - } + u32 fog_proj : 1; + u32 fog_fsel : 3; + u32 fog_RangeBaseEnabled : 1; - bool operator ==(const _PIXELSHADERUID& _Right) const - { - int N = GetNumValues(); + u32 ztex_op : 2; - if (N != _Right.GetNumValues()) - return false; + u32 fast_depth_calc : 1; + u32 per_pixel_depth : 1; + u32 bHasIndStage : 16; - for (int i = 0; i < N; ++i) - { - if (values[i] != _Right.values[i]) - return false; - } + u32 xfregs_numTexGen_numTexGens : 4; - return true; - } + // TODO: I think we're fine without an enablePixelLighting field, should probably double check, though.. + LightingUidData lighting; }; +//#pragma pack() -typedef _PIXELSHADERUID PIXELSHADERUID; -typedef _PIXELSHADERUID PIXELSHADERUIDSAFE; +typedef ShaderUid PixelShaderUid; +typedef ShaderCode PixelShaderCode; // TODO: Obsolete +typedef ShaderConstantProfile PixelShaderConstantProfile; // TODO: Obsolete -// Different ways to achieve rendering with destination alpha -enum DSTALPHA_MODE -{ - DSTALPHA_NONE, // Render normally, without destination alpha - DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha - DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending -}; - -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); - -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components); -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components); - -// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components); +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); #endif // GCOGL_PIXELSHADER_H diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 1d1ac8f5c6..9fbe096607 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -29,19 +29,45 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30 static u32 lastZBias; static int nMaterialsChanged; +static float s_constant_cache[C_PENVCONST_END*4]; + inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) { +// if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 && +// s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4) +// return; + g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4); + s_constant_cache[const_number*4] = f1; + s_constant_cache[const_number*4+1] = f2; + s_constant_cache[const_number*4+2] = f3; + s_constant_cache[const_number*4+3] = f4; } inline void SetPSConstant4fv(unsigned int const_number, const float *f) { +// if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] && +// s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3]) +// return; + g_renderer->SetPSConstant4fv(const_number, f); + s_constant_cache[const_number*4] = f[0]; + s_constant_cache[const_number*4+1] = f[1]; + s_constant_cache[const_number*4+2] = f[2]; + s_constant_cache[const_number*4+3] = f[3]; } inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) { +// for (unsigned int i = 0; i < 4*count; ++i) +// if (s_constant_cache[const_number*4+i] != f[i]) +// break; +// else if (i == 4*count-1) +// return; + g_renderer->SetMultiPSConstant4fv(const_number, count, f); + for (unsigned int i = 0; i < 4*count; ++i) + s_constant_cache[const_number*4+i] = f[i]; } void PixelShaderManager::Init() @@ -50,6 +76,7 @@ void PixelShaderManager::Init() memset(lastTexDims, 0, sizeof(lastTexDims)); lastZBias = 0; memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); + memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side.... Dirty(); } @@ -70,11 +97,24 @@ void PixelShaderManager::Shutdown() } -void PixelShaderManager::SetConstants() +void PixelShaderManager::SetConstants(u32 components) { if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO) Dirty(); + // TODO: Probably broken in the non-UBO path + PixelShaderConstantProfile constant_profile(C_PENVCONST_END); + /// TODO: dst alpha/api/components type parameter... + GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components); + + static int saved_updates = 0; + static int necessary_updates = 0; + +// TODO: Remove this! +#define IncStuff() { \ + saved_updates++; \ + /*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ } + for (int i = 0; i < 2; ++i) { if (s_nColorsChanged[i]) @@ -82,30 +122,37 @@ void PixelShaderManager::SetConstants() int baseind = i ? C_KCOLORS : C_COLORS; for (int j = 0; j < 4; ++j) { - if (s_nColorsChanged[i] & (1 << j)) + if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j)) + { SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]); + s_nColorsChanged[i] &= ~(1<>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); s_bAlphaChanged = false; - } + } else if (s_bAlphaChanged) IncStuff(); - if (s_bZTextureTypeChanged) + if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS)) { float ftemp[4]; switch (bpmem.ztex2.type) @@ -121,13 +168,14 @@ void PixelShaderManager::SetConstants() case 2: // 24 bits ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0; - break; - } + break; + } + ++necessary_updates; SetPSConstant4fv(C_ZBIAS, ftemp); s_bZTextureTypeChanged = false; - } + } else if (s_bZTextureTypeChanged) IncStuff(); - if (s_bZBiasChanged || s_bDepthRangeChanged) + if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1)) { // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) // [0] = width/2 @@ -138,9 +186,10 @@ void PixelShaderManager::SetConstants() // [5] = 16777215 * farz //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); + ++necessary_updates; SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f); s_bZBiasChanged = s_bDepthRangeChanged = false; - } + }else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff(); // indirect incoming texture scales if (s_nIndTexScaleChanged) @@ -148,45 +197,51 @@ void PixelShaderManager::SetConstants() // set as two sets of vec4s, each containing S and T of two ind stages. float f[8]; - if (s_nIndTexScaleChanged & 0x03) + if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE)) { for (u32 i = 0; i < 2; ++i) { - f[2 * i] = bpmem.texscale[0].getScaleS(i & 1); - f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1); - PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); - } + f[2 * i] = bpmem.texscale[0].getScaleS(i & 1); + f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1); + PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); + } + ++necessary_updates; SetPSConstant4fv(C_INDTEXSCALE, f); - } + s_nIndTexScaleChanged &= ~0x03; + } + else if ((s_nIndTexScaleChanged & 0x03)) IncStuff(); - if (s_nIndTexScaleChanged & 0x0c) + if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1)) { - for (u32 i = 2; i < 4; ++i) + for (u32 i = 2; i < 4; ++i) { - f[2 * i] = bpmem.texscale[1].getScaleS(i & 1); - f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1); - PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); - } + f[2 * i] = bpmem.texscale[1].getScaleS(i & 1); + f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1); + PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); + } + ++necessary_updates; SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]); - } - - s_nIndTexScaleChanged = 0; - } + s_nIndTexScaleChanged &= ~0x0c; + } + else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff(); + } if (s_nIndTexMtxChanged) { for (int i = 0; i < 3; ++i) { - if (s_nIndTexMtxChanged & (1 << i)) + if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1))) { - int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) | - ((u32)bpmem.indmtx[i].col1.s1 << 2) | - ((u32)bpmem.indmtx[i].col2.s2 << 4); - float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f; + int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) | + ((u32)bpmem.indmtx[i].col1.s1 << 2) | + ((u32)bpmem.indmtx[i].col2.s2 << 4); + float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f; - // xyz - static matrix - // TODO w - dynamic matrix scale / 256...... somehow / 4 works better - // rev 2972 - now using / 256.... verify that this works + // xyz - static matrix + // TODO w - dynamic matrix scale / 256...... somehow / 4 works better + // rev 2972 - now using / 256.... verify that this works + ++necessary_updates; + ++necessary_updates; SetPSConstant4f(C_INDTEXMTX + 2 * i, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, @@ -198,23 +253,26 @@ void PixelShaderManager::SetConstants() bpmem.indmtx[i].col2.mf * fscale, fscale * 4.0f); - PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n", - i, 1024.0f*fscale, - bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale, - bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale); - } - } - s_nIndTexMtxChanged = 0; - } + PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n", + i, 1024.0f*fscale, + bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale, + bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale); - if (s_bFogColorChanged) + s_nIndTexMtxChanged &= ~(1 << i); + }else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();} + } + } + + if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG)) { + ++necessary_updates; SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0); s_bFogColorChanged = false; - } + }else if (s_bFogColorChanged) IncStuff(); - if (s_bFogParamChanged) + if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1)) { + ++necessary_updates; if(!g_ActiveConfig.bDisableFog) { //downscale magnitude to 0.24 bits @@ -226,11 +284,12 @@ void PixelShaderManager::SetConstants() else SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0); - s_bFogParamChanged = false; - } + s_bFogParamChanged = false; + }else if ( s_bFogParamChanged) IncStuff(); - if (s_bFogRangeAdjustChanged) + if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2)) { + ++necessary_updates; if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1) { //bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342; @@ -251,8 +310,9 @@ void PixelShaderManager::SetConstants() } s_bFogRangeAdjustChanged = false; - } + }else if ( s_bFogRangeAdjustChanged) IncStuff(); + // TODO: use constant profile here! if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f { if (nLightsChanged[0] >= 0) @@ -349,8 +409,10 @@ void PixelShaderManager::SetPSTextureDims(int texid) SetPSConstant4fv(C_TEXDIMS + texid, fdims); } -// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value +// This one is high in profiles (0.5%). +// TODO: Move conversion out, only store the raw color value // and update it when the shader constant is set, only. +// TODO: Conversion should be checked in the context of tev_fixes.. void PixelShaderManager::SetColorChanged(int type, int num, bool high) { float *pf = &lastRGBAfull[type][num][0]; diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.h b/Source/Core/VideoCommon/Src/PixelShaderManager.h index 84b4c0bd6f..7f63fb3f46 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.h +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.h @@ -21,7 +21,7 @@ public: static void Shutdown(); static void DoState(PointerWrap &p); - static void SetConstants(); // sets pixel shader constants + static void SetConstants(u32 components); // sets pixel shader constants // constant management, should be called after memory is committed static void SetColorChanged(int type, int index, bool high); diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h new file mode 100644 index 0000000000..b8820e2e98 --- /dev/null +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -0,0 +1,286 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _SHADERGENCOMMON_H +#define _SHADERGENCOMMON_H + +#include +#include +#include +#include +#include + +#include "CommonTypes.h" +#include "VideoCommon.h" + +/** + * Common interface for classes that need to go through the shader generation path (GenerateVertexShader, GeneratePixelShader) + * In particular, this includes the shader code generator (ShaderCode). + * A different class (ShaderUid) can be used to uniquely identify each ShaderCode object. + * More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader constants are being used. This can be used to optimize buffer management. + * Each of the ShaderCode, ShaderUid and ShaderConstantProfile child classes only implement the subset of ShaderGeneratorInterface methods that are required for the specific tasks. + */ +class ShaderGeneratorInterface +{ +public: + /* + * Used when the shader generator would write a piece of ShaderCode. + * Can be used like printf. + * @note In the ShaderCode implementation, this does indeed write the parameter string to an internal buffer. However, you're free to do whatever you like with the parameter. + */ + void Write(const char* fmt, ...) {} + + /* + * Returns a read pointer to the internal buffer. + * @note When implementing this method in a child class, you likely want to return the argument of the last SetBuffer call here + * @note SetBuffer() should be called before using GetBuffer(). + */ + const char* GetBuffer() { return NULL; } + + /* + * Can be used to give the object a place to write to. This should be called before using Write(). + * @param buffer pointer to a char buffer that the object can write to + */ + void SetBuffer(char* buffer) { } + + /* + * Tells us that a specific constant range (including last_index) is being used by the shader + */ + inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} + + /* + * Returns a pointer to an internally stored object of the uid_data type. + * @warning since most child classes use the default implementation you shouldn't access this directly without adding precautions against NULL access (e.g. via adding a dummy structure, cf. the vertex/pixel shader generators) + */ + template + uid_data& GetUidData() { return *(uid_data*)NULL; } +}; + +/** + * Shader UID class used to uniquely identify the ShaderCode output written in the shader generator. + * uid_data can be any struct of parameters that uniquely identify each shader code output. + * Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint. + * Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw u32 values from a union. + */ +template +class ShaderUid : public ShaderGeneratorInterface +{ +public: + ShaderUid() + { + // TODO: Move to Shadergen => can be optimized out + memset(values, 0, sizeof(values)); + } + + bool operator == (const ShaderUid& obj) const + { + return memcmp(this->values, obj.values, sizeof(values)) == 0; + } + + bool operator != (const ShaderUid& obj) const + { + return memcmp(this->values, obj.values, sizeof(values)) != 0; + } + + // determines the storage order inside STL containers + bool operator < (const ShaderUid& obj) const + { + // TODO: Store last frame used and order by that? makes much more sense anyway... + for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) + { + if (this->values[i] < obj.values[i]) + return true; + else if (this->values[i] > obj.values[i]) + return false; + } + return false; + } + + template + inline T& GetUidData() { return data; } + + const uid_data& GetUidData() const { return data; } + size_t GetUidDataSize() const { return sizeof(values); } + +private: + union + { + uid_data data; + u32 values[sizeof(uid_data) / sizeof(u32)]; + }; +}; + +class ShaderCode : public ShaderGeneratorInterface +{ +public: + ShaderCode() : buf(NULL), write_ptr(NULL) + { + + } + + void Write(const char* fmt, ...) + { + va_list arglist; + va_start(arglist, fmt); + write_ptr += vsprintf(write_ptr, fmt, arglist); + va_end(arglist); + } + + const char* GetBuffer() { return buf; } + void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } + +private: + const char* buf; + char* write_ptr; +}; + +/** + * Generates a shader constant profile which can be used to query which constants are used in a shader + */ +class ShaderConstantProfile : public ShaderGeneratorInterface +{ +public: + ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); } + + inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) + { + for (unsigned int i = first_index; i < last_index+1; ++i) + constant_usage[i] = true; + } + + inline bool ConstantIsUsed(unsigned int index) + { + // TODO: Not ready for usage yet + return true; +// return constant_usage[index]; + } +private: + std::vector constant_usage; // TODO: Is vector appropriate here? +}; + +template +static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num) +{ + if (ApiType == API_OPENGL) + return; // Nothing to do here + + object.Write(" : register(%s%d)", prefix, num); +} + +template +static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos) +{ + if (using_ubos) + return; + + object.Write("uniform "); +} + +template +static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name) +{ + WriteLocation(object, api_type, using_ubos); + object.Write("%s %s ", type, name); + WriteRegister(object, api_type, "c", num); + object.Write(";\n"); +} + +#pragma pack(4) +/** + * Common uid data used for shader generators that use lighting calculations. + * Expected to be stored as a member called "lighting". + */ +struct LightingUidData +{ + u32 matsource : 4; // 4x1 bit + u32 enablelighting : 4; // 4x1 bit + u32 ambsource : 4; // 4x1 bit + u32 diffusefunc : 8; // 4x2 bits + u32 attnfunc : 8; // 4x2 bits + u32 light_mask : 32; // 4x8 bits +}; +#pragma pack() + +/** + * Checks if there has been + */ +template +class UidChecker +{ +public: + void Invalidate() + { + m_shaders.clear(); + m_uids.clear(); + } + + void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix) + { + bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end(); + if (!uid_is_indexed) + { + m_uids.push_back(new_uid); + m_shaders[new_uid] = new_code.GetBuffer(); + } + else + { + // uid is already in the index => check if there's a shader with the same uid but different code + auto& old_code = m_shaders[new_uid]; + if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0) + { + static int num_failures = 0; + + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), + dump_prefix, + ++num_failures); + + // TODO: Should also dump uids + std::ofstream file; + OpenFStream(file, szTemp, std::ios_base::out); + file << "Old shader code:\n" << old_code; + file << "\n\nNew shader code:\n" << new_code.GetBuffer(); + file << "\n\nShader uid:\n"; + for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i) + { + u32 value = ((u32*)&new_uid.GetUidData())[i]; + if ((i % 4) == 0) + { + unsigned int last_value = (i+3 < new_uid.GetUidDataSize()-1) ? i+3 : new_uid.GetUidDataSize(); + file << std::setfill(' ') << std::dec; + file << "Values " << std::setw(2) << i << " - " << last_value << ": "; + } + + file << std::setw(8) << std::setfill('0') << std::hex << value << std::setw(1); + if ((i % 4) < 3) + file << ' '; + else + file << std::endl; + } + file.close(); + + ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, szTemp); + } + } + } + +private: + std::map m_shaders; + std::vector m_uids; +}; + +#endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index d076593423..3edb51c505 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -17,235 +17,134 @@ #include "VertexShaderGen.h" #include "VideoConfig.h" -// Mash together all the inputs that contribute to the code of a generated vertex shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) +static char text[16768]; + +template +static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) { - memset(uid->values, 0, sizeof(uid->values)); - uid->values[0] = components | - (xfregs.numTexGen.numTexGens << 23) | - (xfregs.numChan.numColorChans << 27) | - (xfregs.dualTexTrans.enabled << 29); + object.Write(" %s %s", type, name); + if (var_index != -1) + object.Write("%d", var_index); - // TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here? - GetLightingShaderId(&uid->values[1]); - - uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31; - u32 *pcurvalue = &uid->values[3]; - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + if (api_type == API_OPENGL) + object.Write(";\n"); + else { - TexMtxInfo tinfo = xfregs.texMtxInfo[i]; - if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) - tinfo.hex &= 0x7ff; - if (tinfo.texgentype != XF_TEXGEN_REGULAR) - tinfo.projection = 0; - - u32 val = ((tinfo.hex >> 1) & 0x1ffff); - if (xfregs.dualTexTrans.enabled && tinfo.texgentype == XF_TEXGEN_REGULAR) - { - // rewrite normalization and post index - val |= ((u32)xfregs.postMtxInfo[i].index << 17) | ((u32)xfregs.postMtxInfo[i].normalize << 23); - } - - switch (i & 3) - { - case 0: pcurvalue[0] |= val; break; - case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break; - case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break; - case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break; - } + if (semantic_index != -1) + object.Write(" : %s%d;\n", semantic, semantic_index); + else + object.Write(" : %s;\n", semantic); } } -void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components) +template +static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) { - // Just store all used registers here without caring whether we need all bits or less. - memset(uid->values, 0, sizeof(uid->values)); - u32* ptr = uid->values; - *ptr++ = components; - *ptr++ = xfregs.numTexGen.hex; - *ptr++ = xfregs.numChan.hex; - *ptr++ = xfregs.dualTexTrans.hex; - - for (int i = 0; i < 2; ++i) - { - *ptr++ = xfregs.color[i].hex; - *ptr++ = xfregs.alpha[i].hex; - } - - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - - for (unsigned int i = 0; i < 8; ++i) - { - *ptr++ = xfregs.texMtxInfo[i].hex; - *ptr++ = xfregs.postMtxInfo[i].hex; - } - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - - -void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - VERTEXSHADERUIDSAFE new_id; - GetSafeVertexShaderId(&new_id, components); - - if (!(old_id == new_id)) - { - std::string new_code(GenerateVertexShaderCode(components, api)); - if (old_code != new_code) - { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - OpenFStream(file, szTemp, std::ios_base::out); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); - } - } -} - - -static char text[16384]; - -#define WRITE p+=sprintf - -char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE ApiType) -{ - - // "centroid" attribute is only supported by D3D11 - const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : ""); - - // GLSL makes this ugly - // TODO: Make pretty - WRITE(p, "struct VS_OUTPUT {\n"); - WRITE(p, " %s float4 pos %s POSITION;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":"); - WRITE(p, " %s float4 colors_0 %s COLOR0;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":"); - WRITE(p, " %s float4 colors_1 %s COLOR1;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":"); + object.Write("struct VS_OUTPUT {\n"); + DefineVSOutputStructMember(object, api_type, "float4", "pos", -1, "POSITION"); + DefineVSOutputStructMember(object, api_type, "float4", "colors_", 0, "COLOR", 0); + DefineVSOutputStructMember(object, api_type, "float4", "colors_", 1, "COLOR", 1); if (xfregs.numTexGen.numTexGens < 7) { for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " %s float3 tex%d %s TEXCOORD%d;\n", optCentroid, i, ApiType == API_OPENGL ? ";//" : ":", i); + DefineVSOutputStructMember(object, api_type, "float3", "tex", i, "TEXCOORD", i); - WRITE(p, " %s float4 clipPos %s TEXCOORD%d;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens); + DefineVSOutputStructMember(object, api_type, "float4", "clipPos", -1, "TEXCOORD", xfregs.numTexGen.numTexGens); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, " %s float4 Normal %s TEXCOORD%d;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens + 1); + DefineVSOutputStructMember(object, api_type, "float4", "Normal", -1, "TEXCOORD", xfregs.numTexGen.numTexGens + 1); } else { - // clip position is in w of first 4 texcoords - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - for (int i = 0; i < 8; ++i) - WRITE(p, " %s float4 tex%d %s TEXCOORD%d;\n", optCentroid, i, ApiType == API_OPENGL? ";//" : ":", i); - } - else - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " %s float%d tex%d %s TEXCOORD%d;\n", optCentroid, i < 4 ? 4 : 3 , i, ApiType == API_OPENGL ? ";//" : ":", i); - } + // Store clip position in the w component of first 4 texcoords + bool ppl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; + int num_texcoords = ppl ? 8 : xfregs.numTexGen.numTexGens; + for (int i = 0; i < num_texcoords; ++i) + DefineVSOutputStructMember(object, api_type, (ppl || i < 4) ? "float4" : "float3", "tex", i, "TEXCOORD", i); } - WRITE(p, "};\n"); - - return p; + object.Write("};\n"); } -extern const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num); -extern const char *WriteLocation(API_TYPE ApiType); - -const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) +template +static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { + // Non-uid template parameters will write to the dummy data (=> gets optimized out) + vertex_shader_uid_data dummy_data; + vertex_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) + ? out.template GetUidData() : dummy_data; + + out.SetBuffer(text); #ifndef ANDROID - locale_t locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation - locale_t old_locale = uselocale(locale); // Apply the locale for this thread + locale_t locale; + locale_t old_locale; + if (out.GetBuffer() != NULL) + { + locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation + old_locale = uselocale(locale); // Apply the locale for this thread + } #endif text[sizeof(text) - 1] = 0x7C; // canary _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); - bool is_d3d = (ApiType & API_D3D9 || ApiType == API_D3D11); - u32 lightMask = 0; - if (xfregs.numChan.numColorChans > 0) - lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); - if (xfregs.numChan.numColorChans > 1) - lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); - - char *p = text; - WRITE(p, "//Vertex Shader: comp:%x, \n", components); + bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); // uniforms if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - WRITE(p, "layout(std140) uniform VSBlock {\n"); + out.Write("layout(std140) uniform VSBlock {\n"); - WRITE(p, "%sfloat4 " I_POSNORMALMATRIX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSNORMALMATRIX)); - WRITE(p, "%sfloat4 " I_PROJECTION"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PROJECTION)); - WRITE(p, "%sfloat4 " I_MATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_MATERIALS)); - WRITE(p, "%sfloat4 " I_LIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_LIGHTS)); - WRITE(p, "%sfloat4 " I_TEXMATRICES"[24] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXMATRICES)); // also using tex matrices - WRITE(p, "%sfloat4 " I_TRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType),WriteRegister(ApiType, "c", C_TRANSFORMMATRICES)); - WRITE(p, "%sfloat4 " I_NORMALMATRICES"[32] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_NORMALMATRICES)); - WRITE(p, "%sfloat4 " I_POSTTRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSTTRANSFORMMATRICES)); - WRITE(p, "%sfloat4 " I_DEPTHPARAMS" %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_DEPTHPARAMS)); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PROJECTION, "float4", I_PROJECTION"[4]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_MATERIALS, "float4", I_MATERIALS"[4]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_LIGHTS, "float4", I_LIGHTS"[40]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - WRITE(p, "};\n"); + out.Write("};\n"); - p = GenerateVSOutputStruct(p, components, ApiType); + GenerateVSOutputStruct(out, components, api_type); - if(ApiType == API_OPENGL) + uid_data.numTexGens = xfregs.numTexGen.numTexGens; + uid_data.components = components; + + if(api_type == API_OPENGL) { - WRITE(p, "ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); if (components & VB_HAS_POSMTXIDX) - WRITE(p, "ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) - WRITE(p, "ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); + out.Write("ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) - WRITE(p, "ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); + out.Write("ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); if (components & VB_HAS_NRM2) - WRITE(p, "ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); + out.Write("ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); if (components & VB_HAS_COL0) - WRITE(p, "ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); if (components & VB_HAS_COL1) - WRITE(p, "ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); + out.Write("ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); - WRITE(p, "float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); + out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); + out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); } } if (components & VB_HAS_NRM0) - WRITE(p, "float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); + out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); if (components & VB_HAS_NRM1) - WRITE(p, "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); + out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) - WRITE(p, "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); + out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } else { - WRITE(p, "float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n"); + out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n"); if (components & VB_HAS_NRM0) - WRITE(p, "float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n"); + out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n"); if (components & VB_HAS_NRM1) - WRITE(p, "float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n"); + out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n"); if (components & VB_HAS_NRM2) - WRITE(p, "float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n"); + out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n"); } if (!(components & VB_HAS_NRM0)) - WRITE(p, "float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); + out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); - WRITE(p, "o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); - WRITE(p, "float4 mat, lacc;\n" - "float3 ldir, h;\n" - "float dist, dist2, attn;\n"); + out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); + out.Write("float4 mat, lacc;\n" + "float3 ldir, h;\n" + "float dist, dist2, attn;\n"); + + uid_data.numColorChans = xfregs.numChan.numColorChans; if (xfregs.numChan.numColorChans == 0) { if (components & VB_HAS_COL0) - WRITE(p, "o.colors_0 = color0;\n"); + out.Write("o.colors_0 = color0;\n"); else - WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); } // TODO: This probably isn't necessary if pixel lighting is enabled. - p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + GenerateLightingShader(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); if (xfregs.numChan.numColorChans < 2) { if (components & VB_HAS_COL1) - WRITE(p, "o.colors_1 = color1;\n"); + out.Write("o.colors_1 = color1;\n"); else - WRITE(p, "o.colors_1 = o.colors_0;\n"); + out.Write("o.colors_1 = o.colors_0;\n"); } // special case if only pos and tex coord 0 and tex coord input is AB11 // donko - this has caused problems in some games. removed for now. @@ -400,25 +297,25 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) */ // transform texcoords - WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; - WRITE(p, "{\n"); - WRITE(p, "coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - + out.Write("{\n"); + out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + uid_data.texMtxInfo[i].sourcerow = xfregs.texMtxInfo[i].sourcerow; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = rawpos;\n"); // pos.w is 1 + out.Write("coord = rawpos;\n"); // pos.w is 1 break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n"); + out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n"); } break; case XF_SRCCOLORS_INROW: @@ -428,24 +325,25 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) if (components & VB_HAS_NRM1) { _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n"); + out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n"); + out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n"); } break; default: _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) - WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); + out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } // first transformation + uid_data.texMtxInfo[i].texgentype = xfregs.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map @@ -453,51 +351,57 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { // transform the light dir into tangent space - WRITE(p, "ldir = normalize(" I_LIGHTS"[5*%d + 3].xyz - pos.xyz);\n", texinfo.embosslightshift); - WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); + uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift; + uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift; + out.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(I_LIGHTS, texinfo.embosslightshift)); + out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); } else { _assert_(0); // should have normals - WRITE(p, "o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); + uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift; + out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } break; case XF_TEXGEN_COLOR_STRGBC0: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - WRITE(p, "o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); + out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); break; case XF_TEXGEN_COLOR_STRGBC1: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - WRITE(p, "o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); + out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); break; case XF_TEXGEN_REGULAR: default: + uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection; if (components & (VB_HAS_TEXMTXIDX0<(object, components, api_type); +} + +void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) +{ + GenerateVertexShader(object, components, api_type); +} + +void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type) +{ + GenerateVSOutputStruct(object, components, api_type); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index f3f7d5866b..eb7236678a 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -5,8 +5,10 @@ #ifndef GCOGL_VERTEXSHADER_H #define GCOGL_VERTEXSHADER_H +#include #include "XFMemory.h" #include "VideoCommon.h" +#include "ShaderGenCommon.h" // TODO should be reordered #define SHADER_POSITION_ATTRIB 0 @@ -48,7 +50,8 @@ #define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64) #define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32) #define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) -#define C_VENVCONST_END (C_DEPTHPARAMS + 1) +#define C_VENVCONST_END (C_DEPTHPARAMS + 1) + const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 }, {I_PROJECTION , C_PROJECTION, 4 }, {I_MATERIALS, C_MATERIALS, 4 }, @@ -59,75 +62,40 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 }, {I_POSTTRANSFORMMATRICES, C_POSTTRANSFORMMATRICES, 64 }, {I_DEPTHPARAMS, C_DEPTHPARAMS, 1 }, }; -template -class _VERTEXSHADERUID + +#pragma pack(4) + +struct vertex_shader_uid_data { -#define NUM_VSUID_VALUES_SAFE 25 -public: - u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9]; + u32 components; + u32 numColorChans : 2; + u32 numTexGens : 4; - _VERTEXSHADERUID() - { - } + struct { + u32 projection : 1; // XF_TEXPROJ_X + u32 inputform : 2; // XF_TEXINPUT_X + u32 texgentype : 3; // XF_TEXGEN_X + u32 sourcerow : 5; // XF_SRCGEOM_X + u32 embosssourceshift : 3; // what generated texcoord to use + u32 embosslightshift : 3; // light index that is used + } texMtxInfo[8]; + struct { + u32 index : 6; // base row of dual transform matrix + u32 normalize : 1; // normalize before send operation + } postMtxInfo[8]; + struct { + u32 enabled : 1; + } dualTexTrans; - _VERTEXSHADERUID(const _VERTEXSHADERUID& r) - { - for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) - values[i] = r.values[i]; - } - - int GetNumValues() const - { - if (safe) return NUM_VSUID_VALUES_SAFE; - else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1 - } - - bool operator <(const _VERTEXSHADERUID& _Right) const - { - if (values[0] < _Right.values[0]) - return true; - else if (values[0] > _Right.values[0]) - return false; - - int N = GetNumValues(); - for (int i = 1; i < N; ++i) - { - if (values[i] < _Right.values[i]) - return true; - else if (values[i] > _Right.values[i]) - return false; - } - - return false; - } - - bool operator ==(const _VERTEXSHADERUID& _Right) const - { - if (values[0] != _Right.values[0]) - return false; - - int N = GetNumValues(); - for (int i = 1; i < N; ++i) - { - if (values[i] != _Right.values[i]) - return false; - } - - return true; - } + LightingUidData lighting; }; -typedef _VERTEXSHADERUID VERTEXSHADERUID; -typedef _VERTEXSHADERUID VERTEXSHADERUIDSAFE; +#pragma pack() +typedef ShaderUid VertexShaderUid; +typedef ShaderCode VertexShaderCode; // TODO: Obsolete.. -// components is included in the uid. -char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type); -const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type); - -void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components); -void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components); - -// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes -void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components); +void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); +void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); +void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type); #endif // GCOGL_VERTEXSHADER_H diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index f66dfc6b01..617e4ec567 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -252,6 +252,7 @@ + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 330b23d370..e988d34e12 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -258,6 +258,9 @@ Util + + Shader Generators + @@ -291,4 +294,4 @@ {e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6} - + \ No newline at end of file diff --git a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp index d1acee5600..9009d39029 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp @@ -169,11 +169,12 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth, if (shaderIt == m_shaders.end()) { // Generate new shader. Warning: not thread-safe. - static char code[16384]; - char* p = code; - p = GenerateVSOutputStruct(p, components, API_D3D11); - p += sprintf(p, "\n%s", LINE_GS_COMMON); - + static char buffer[16384]; + ShaderCode code; + code.SetBuffer(buffer); + GenerateVSOutputStructForGS(code, components, API_D3D11); + code.Write("\n%s", LINE_GS_COMMON); + std::stringstream numTexCoordsStream; numTexCoordsStream << xfregs.numTexGen.numTexGens; @@ -185,7 +186,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth, { "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { NULL, NULL } }; - ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros); + ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros); if (!newShader) { WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp index a2bdd360c2..819fd46974 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp @@ -28,9 +28,10 @@ namespace DX11 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry; -PIXELSHADERUID PixelShaderCache::last_uid; +PixelShaderUid PixelShaderCache::last_uid; +UidChecker PixelShaderCache::pixel_uid_checker; -LinearDiskCache g_ps_disk_cache; +LinearDiskCache g_ps_disk_cache; ID3D11PixelShader* s_ColorMatrixProgram[2] = {NULL}; ID3D11PixelShader* s_ColorCopyProgram[2] = {NULL}; @@ -352,10 +353,10 @@ ID3D11Buffer* &PixelShaderCache::GetConstantBuffer() } // this class will load the precompiled shaders into our cache -class PixelShaderCacheInserter : public LinearDiskCacheReader +class PixelShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size) + void Read(const PixelShaderUid &key, const u8 *value, u32 value_size) { PixelShaderCache::InsertByteCode(key, value, value_size); } @@ -414,7 +415,8 @@ void PixelShaderCache::Clear() { for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) iter->second.Destroy(); - PixelShaders.clear(); + PixelShaders.clear(); + pixel_uid_checker.Invalidate(); last_entry = NULL; } @@ -450,8 +452,14 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { - PIXELSHADERUID uid; - GetPixelShaderId(&uid, dstAlphaMode, components); + PixelShaderUid uid; + GetPixelShaderUid(uid, dstAlphaMode, API_D3D11, components); + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components); + pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); + } // Check if the shader is already set if (last_entry) @@ -459,7 +467,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); - ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components); return (last_entry->shader != NULL); } } @@ -475,15 +482,15 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); - ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components); return (entry.shader != NULL); } // Need to compile a new shader - const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components); + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components); D3DBlob* pbytecode; - if (!D3D::CompilePixelShader(code, (unsigned int)strlen(code), &pbytecode)) + if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), &pbytecode)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; @@ -497,15 +504,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { - PixelShaders[uid].code = code; - GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components); + PixelShaders[uid].code = code.GetBuffer(); } GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return success; } -bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen) +bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen) { ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); if (shader == NULL) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h index c8a6c3973b..dee930ccfe 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h @@ -22,7 +22,7 @@ public: static void Clear(); static void Shutdown(); static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader - static bool InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen); + static bool InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen); static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; } static ID3D11Buffer* &GetConstantBuffer(); @@ -41,18 +41,19 @@ private: { ID3D11PixelShader* shader; - PIXELSHADERUIDSAFE safe_uid; std::string code; PSCacheEntry() : shader(NULL) {} void Destroy() { SAFE_RELEASE(shader); } }; - typedef std::map PSCache; + typedef std::map PSCache; static PSCache PixelShaders; static const PSCacheEntry* last_entry; - static PIXELSHADERUID last_uid; + static PixelShaderUid last_uid; + + static UidChecker pixel_uid_checker; }; } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp index 0971b396ce..9d6e02104e 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp @@ -163,10 +163,11 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize, if (shaderIt == m_shaders.end()) { // Generate new shader. Warning: not thread-safe. - static char code[16384]; - char* p = code; - p = GenerateVSOutputStruct(p, components, API_D3D11); - p += sprintf(p, "\n%s", POINT_GS_COMMON); + static char buffer[16384]; + ShaderCode code; + code.SetBuffer(buffer); + GenerateVSOutputStructForGS(code, components, API_D3D11); + code.Write("\n%s", POINT_GS_COMMON); std::stringstream numTexCoordsStream; numTexCoordsStream << xfregs.numTexGen.numTexGens; @@ -179,7 +180,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize, { "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { NULL, NULL } }; - ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros); + ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros); if (!newShader) { WARN_LOG(VIDEO, "Point geometry shader for components 0x%.08X failed to compile", components); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index 46161e25a5..b16a91ad95 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -238,7 +238,7 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp index 267f7c9642..6121824b8e 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp @@ -24,14 +24,15 @@ namespace DX11 { VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; -VERTEXSHADERUID VertexShaderCache::last_uid; +VertexShaderUid VertexShaderCache::last_uid; +UidChecker VertexShaderCache::vertex_uid_checker; static ID3D11VertexShader* SimpleVertexShader = NULL; static ID3D11VertexShader* ClearVertexShader = NULL; static ID3D11InputLayout* SimpleLayout = NULL; static ID3D11InputLayout* ClearLayout = NULL; -LinearDiskCache g_vs_disk_cache; +LinearDiskCache g_vs_disk_cache; ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; } ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; } @@ -57,10 +58,10 @@ ID3D11Buffer* &VertexShaderCache::GetConstantBuffer() } // this class will load the precompiled shaders into our cache -class VertexShaderCacheInserter : public LinearDiskCacheReader +class VertexShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size) + void Read(const VertexShaderUid &key, const u8 *value, u32 value_size) { D3DBlob* blob = new D3DBlob(value_size, value); VertexShaderCache::InsertByteCode(key, blob); @@ -176,6 +177,7 @@ void VertexShaderCache::Clear() for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) iter->second.Destroy(); vshaders.clear(); + vertex_uid_checker.Invalidate(); last_entry = NULL; } @@ -197,14 +199,20 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader(u32 components) { - VERTEXSHADERUID uid; - GetVertexShaderId(&uid, components); + VertexShaderUid uid; + GetVertexShaderUid(uid, components, API_D3D11); + if (g_ActiveConfig.bEnableShaderDebugging) + { + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D11); + vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); + } + if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components); return (last_entry->shader != NULL); } } @@ -218,14 +226,14 @@ bool VertexShaderCache::SetShader(u32 components) last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components); return (entry.shader != NULL); } - const char *code = GenerateVertexShaderCode(components, API_D3D11); + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D11); D3DBlob* pbytecode = NULL; - D3D::CompileVertexShader(code, (int)strlen(code), &pbytecode); + D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &pbytecode); if (pbytecode == NULL) { @@ -239,15 +247,14 @@ bool VertexShaderCache::SetShader(u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { - vshaders[uid].code = code; - GetSafeVertexShaderId(&vshaders[uid].safe_uid, components); + vshaders[uid].code = code.GetBuffer(); } GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; } -bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob) +bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob) { ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob); if (shader == NULL) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h index 4588775d7a..b80dbcd7b1 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h @@ -31,7 +31,7 @@ public: static ID3D11InputLayout* GetSimpleInputLayout(); static ID3D11InputLayout* GetClearInputLayout(); - static bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob); + static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob); private: struct VSCacheEntry @@ -39,7 +39,6 @@ private: ID3D11VertexShader* shader; D3DBlob* bytecode; // needed to initialize the input layout - VERTEXSHADERUIDSAFE safe_uid; std::string code; VSCacheEntry() : shader(NULL), bytecode(NULL) {} @@ -55,11 +54,13 @@ private: SAFE_RELEASE(bytecode); } }; - typedef std::map VSCache; + typedef std::map VSCache; static VSCache vshaders; static const VSCacheEntry* last_entry; - static VERTEXSHADERUID last_uid; + static VertexShaderUid last_uid; + + static UidChecker vertex_uid_checker; }; } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index 69555df0e9..a4a788c7e3 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -31,9 +31,10 @@ namespace DX9 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry; -PIXELSHADERUID PixelShaderCache::last_uid; +PixelShaderUid PixelShaderCache::last_uid; +UidChecker PixelShaderCache::pixel_uid_checker; -static LinearDiskCache g_ps_disk_cache; +static LinearDiskCache g_ps_disk_cache; static std::set unique_shaders; #define MAX_SSAA_SHADERS 3 @@ -55,10 +56,10 @@ static LPDIRECT3DPIXELSHADER9 s_ClearProgram = NULL; static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL; static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL; -class PixelShaderCacheInserter : public LinearDiskCacheReader +class PixelShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size) + void Read(const PixelShaderUid &key, const u8 *value, u32 value_size) { PixelShaderCache::InsertByteCode(key, value, value_size, false); } @@ -287,6 +288,7 @@ void PixelShaderCache::Clear() for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) iter->second.Destroy(); PixelShaders.clear(); + pixel_uid_checker.Invalidate(); last_entry = NULL; } @@ -323,8 +325,14 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; - PIXELSHADERUID uid; - GetPixelShaderId(&uid, dstAlphaMode, components); + PixelShaderUid uid; + GetPixelShaderUid(uid, dstAlphaMode, API_D3D9, components); + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_D3D9, components); + pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); + } // Check if the shader is already set if (last_entry) @@ -332,7 +340,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components); return last_entry->shader != NULL; } } @@ -349,34 +356,34 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (entry.shader) D3D::SetPixelShader(entry.shader); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components); return (entry.shader != NULL); } // Need to compile a new shader - const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components); + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, api, components); if (g_ActiveConfig.bEnableShaderDebugging) { - u32 code_hash = HashAdler32((const u8 *)code, strlen(code)); + u32 code_hash = HashAdler32((const u8 *)code.GetBuffer(), strlen(code.GetBuffer())); unique_shaders.insert(code_hash); SETSTAT(stats.numUniquePixelShaders, unique_shaders.size()); } #if defined(_DEBUG) || defined(DEBUGFAST) - if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { + if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { static int counter = 0; char szTemp[MAX_PATH]; sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, code); + SaveData(szTemp, code.GetBuffer()); } #endif u8 *bytecode = 0; int bytecodelen = 0; - if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) { + if (!D3D::CompilePixelShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } @@ -390,15 +397,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { - PixelShaders[uid].code = code; - GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components); + PixelShaders[uid].code = code.GetBuffer(); } GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return success; } -bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) +bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) { LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index c91b47a27d..733a68233a 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -28,7 +28,6 @@ private: LPDIRECT3DPIXELSHADER9 shader; bool owns_shader; - PIXELSHADERUIDSAFE safe_uid; std::string code; PSCacheEntry() : shader(NULL), owns_shader(true) {} @@ -40,18 +39,20 @@ private: } }; - typedef std::map PSCache; + typedef std::map PSCache; static PSCache PixelShaders; static const PSCacheEntry *last_entry; - static PIXELSHADERUID last_uid; + static PixelShaderUid last_uid; + static UidChecker pixel_uid_checker; + static void Clear(); public: static void Init(); static void Shutdown(); static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 componets); - static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); + static bool InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate); static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, bool depthConversion); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index e0592b12e0..adb4ddf580 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -348,7 +348,7 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); u32 stride = g_nativeVertexFmt->GetVertexStride(); bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 1847911e29..2d11368a6d 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -25,14 +25,15 @@ namespace DX9 VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; -VERTEXSHADERUID VertexShaderCache::last_uid; +VertexShaderUid VertexShaderCache::last_uid; +UidChecker VertexShaderCache::vertex_uid_checker; #define MAX_SSAA_SHADERS 3 static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS]; static LPDIRECT3DVERTEXSHADER9 ClearVertexShader; -LinearDiskCache g_vs_disk_cache; +LinearDiskCache g_vs_disk_cache; LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level) { @@ -45,10 +46,10 @@ LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader() } // this class will load the precompiled shaders into our cache -class VertexShaderCacheInserter : public LinearDiskCacheReader +class VertexShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size) + void Read(const VertexShaderUid &key, const u8 *value, u32 value_size) { VertexShaderCache::InsertByteCode(key, value, value_size, false); } @@ -150,6 +151,7 @@ void VertexShaderCache::Clear() for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) iter->second.Destroy(); vshaders.clear(); + vertex_uid_checker.Invalidate(); last_entry = NULL; } @@ -174,14 +176,20 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader(u32 components) { - VERTEXSHADERUID uid; - GetVertexShaderId(&uid, components); + VertexShaderUid uid; + GetVertexShaderUid(uid, components, API_D3D9); + if (g_ActiveConfig.bEnableShaderDebugging) + { + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D9); + vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); + } + if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components); return (last_entry->shader != NULL); } } @@ -196,14 +204,15 @@ bool VertexShaderCache::SetShader(u32 components) if (entry.shader) D3D::SetVertexShader(entry.shader); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components); return (entry.shader != NULL); } - const char *code = GenerateVertexShaderCode(components, API_D3D9); + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D9); + u8 *bytecode; int bytecodelen; - if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen)) + if (!D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; @@ -213,15 +222,14 @@ bool VertexShaderCache::SetShader(u32 components) bool success = InsertByteCode(uid, bytecode, bytecodelen, true); if (g_ActiveConfig.bEnableShaderDebugging && success) { - vshaders[uid].code = code; - GetSafeVertexShaderId(&vshaders[uid].safe_uid, components); + vshaders[uid].code = code.GetBuffer(); } delete [] bytecode; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; } -bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) { +bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) { LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen); // Make an entry in the table diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h index bc569ff580..32fbea92be 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h @@ -23,7 +23,6 @@ private: LPDIRECT3DVERTEXSHADER9 shader; std::string code; - VERTEXSHADERUIDSAFE safe_uid; VSCacheEntry() : shader(NULL) {} void Destroy() @@ -34,11 +33,14 @@ private: } }; - typedef std::map VSCache; + typedef std::map VSCache; static VSCache vshaders; static const VSCacheEntry *last_entry; - static VERTEXSHADERUID last_uid; + static VertexShaderUid last_uid; + + static UidChecker vertex_uid_checker; + static void Clear(); public: @@ -47,7 +49,7 @@ public: static bool SetShader(u32 components); static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(int level); static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader(); - static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); + static bool InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate); static std::string GetCurrentShaderCode(); }; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 90cf401687..07ba589469 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -31,6 +31,8 @@ static GLuint CurrentProgram = 0; ProgramShaderCache::PCache ProgramShaderCache::pshaders; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry; SHADERUID ProgramShaderCache::last_uid; +UidChecker ProgramShaderCache::pixel_uid_checker; +UidChecker ProgramShaderCache::vertex_uid_checker; static char s_glsl_header[1024] = ""; @@ -186,21 +188,20 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen { SHADERUID uid; GetShaderId(&uid, dstAlphaMode, components); - + // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidateShaderIDs(last_entry, dstAlphaMode, components); last_entry->shader.Bind(); return &last_entry->shader; } } - + last_uid = uid; - + // Check if shader is already in cache PCache::iterator iter = pshaders.find(uid); if (iter != pshaders.end()) @@ -209,24 +210,24 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen last_entry = entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidateShaderIDs(entry, dstAlphaMode, components); last_entry->shader.Bind(); return &last_entry->shader; } - + // Make an entry in the table PCacheEntry& newentry = pshaders[uid]; last_entry = &newentry; newentry.in_cache = 0; - - const char *vcode = GenerateVertexShaderCode(components, API_OPENGL); - const char *pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components); - + + VertexShaderCode vcode; + PixelShaderCode pcode; + GenerateVertexShaderCode(vcode, components, API_OPENGL); + GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); + if (g_ActiveConfig.bEnableShaderDebugging) { - GetSafeShaderId(&newentry.safe_uid, dstAlphaMode, components); - newentry.shader.strvprog = vcode; - newentry.shader.strpprog = pcode; + newentry.shader.strvprog = vcode.GetBuffer(); + newentry.shader.strpprog = pcode.GetBuffer(); } #if defined(_DEBUG) || defined(DEBUGFAST) @@ -234,13 +235,13 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen static int counter = 0; char szTemp[MAX_PATH]; sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, vcode); + SaveData(szTemp, vcode.GetBuffer()); sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, pcode); + SaveData(szTemp, pcode.GetBuffer()); } #endif - if (!CompileShader(newentry.shader, vcode, pcode)) { + if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return NULL; } @@ -257,7 +258,7 @@ bool ProgramShaderCache::CompileShader ( SHADER& shader, const char* vcode, cons { GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode); GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode); - + if(!vsid || !psid) { glDeleteShader(vsid); @@ -380,28 +381,23 @@ GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code ) return result; } - - -void ProgramShaderCache::GetShaderId ( SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components ) +void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components) { - GetPixelShaderId(&uid->puid, dstAlphaMode, components); - GetVertexShaderId(&uid->vuid, components); + GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components); + GetVertexShaderUid(uid->vuid, components, API_OPENGL); + + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode pcode; + GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); + pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); + + VertexShaderCode vcode; + GenerateVertexShaderCode(vcode, components, API_OPENGL); + vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v"); + } } -void ProgramShaderCache::GetSafeShaderId ( SHADERUIDSAFE* uid, DSTALPHA_MODE dstAlphaMode, u32 components ) -{ - GetSafePixelShaderId(&uid->puid, dstAlphaMode, components); - GetSafeVertexShaderId(&uid->vuid, components); -} - -void ProgramShaderCache::ValidateShaderIDs ( PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components ) -{ - ValidateVertexShaderIDs(API_OPENGL, entry->safe_uid.vuid, entry->shader.strvprog, components); - ValidatePixelShaderIDs(API_OPENGL, entry->safe_uid.puid, entry->shader.strpprog, dstAlphaMode, components); -} - - - ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void) { return *last_entry; @@ -497,6 +493,9 @@ void ProgramShaderCache::Shutdown(void) iter->second.Destroy(); pshaders.clear(); + pixel_uid_checker.Invalidate(); + vertex_uid_checker.Invalidate(); + if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) { delete s_buffer; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h index f323c9345c..30428ad6de 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h @@ -16,18 +16,17 @@ namespace OGL { -template -class _SHADERUID +class SHADERUID { public: - _VERTEXSHADERUID vuid; - _PIXELSHADERUID puid; + VertexShaderUid vuid; + PixelShaderUid puid; - _SHADERUID() {} + SHADERUID() {} - _SHADERUID(const _SHADERUID& r) : vuid(r.vuid), puid(r.puid) {} + SHADERUID(const SHADERUID& r) : vuid(r.vuid), puid(r.puid) {} - bool operator <(const _SHADERUID& r) const + bool operator <(const SHADERUID& r) const { if(puid < r.puid) return true; if(r.puid < puid) return false; @@ -35,13 +34,11 @@ public: return false; } - bool operator ==(const _SHADERUID& r) const + bool operator ==(const SHADERUID& r) const { return puid == r.puid && vuid == r.vuid; } }; -typedef _SHADERUID SHADERUID; -typedef _SHADERUID SHADERUIDSAFE; const int NUM_UNIFORMS = 19; @@ -72,7 +69,6 @@ public: struct PCacheEntry { SHADER shader; - SHADERUIDSAFE safe_uid; bool in_cache; void Destroy() @@ -81,12 +77,12 @@ public: } }; + typedef std::map PCache; + static PCacheEntry GetShaderProgram(void); static GLuint GetCurrentProgram(void); static SHADER* SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); static void GetShaderId(SHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components); - static void GetSafeShaderId(SHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components); - static void ValidateShaderIDs(PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components); static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode); static GLuint CompileSingleShader(GLuint type, const char *code); @@ -106,12 +102,13 @@ private: void Read(const SHADERUID &key, const u8 *value, u32 value_size); }; - typedef std::map PCache; - static PCache pshaders; static PCacheEntry* last_entry; static SHADERUID last_uid; + static UidChecker pixel_uid_checker; + static UidChecker vertex_uid_checker; + static GLintptr s_vs_data_size; static GLintptr s_ps_data_size; static GLintptr s_vs_data_offset; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 59ea68504b..2f34723e58 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -257,7 +257,7 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); ProgramShaderCache::UploadConstants(); // setup the pointers @@ -278,7 +278,7 @@ void VertexManager::vFlush() { // Need to set these again, if we don't support UBO VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); } // only update alpha