From 751d67a59a464d95b82687ee987ffb5cbe821380 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 20 Oct 2020 09:00:39 -0400 Subject: [PATCH] UberShaderVertex: Migrate over to fmt Continues migration of the shader generators over to fmt. With this, all that's left to move over are the pixel shaders (regular and ubershader variants) --- Source/Core/VideoCommon/UberShaderVertex.cpp | 584 ++++++++++--------- 1 file changed, 296 insertions(+), 288 deletions(-) diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 9b85b4d1c2..574288fc38 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -23,193 +23,197 @@ VertexShaderUid GetVertexShaderUid() return out; } -static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out); +static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out); -ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, +ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config, const vertex_ubershader_uid_data* uid_data) { const bool msaa = host_config.msaa; const bool ssaa = host_config.ssaa; const bool per_pixel_lighting = host_config.per_pixel_lighting; const bool vertex_rounding = host_config.vertex_rounding; - const u32 numTexgen = uid_data->num_texgens; + const u32 num_texgen = uid_data->num_texgens; ShaderCode out; - out.Write("// Vertex UberShader\n\n"); - out.Write("%s", s_lighting_struct); + out.WriteFmt("// Vertex UberShader\n\n"); + out.WriteFmt("{}", s_lighting_struct); // uniforms - if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n"); + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); else - out.Write("cbuffer VSBlock {\n"); - out.Write(s_shader_uniforms); - out.Write("};\n"); + out.WriteFmt("cbuffer VSBlock {{\n"); + out.WriteFmt("{}", s_shader_uniforms); + out.WriteFmt("}};\n"); - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, numTexgen, host_config, ""); - out.Write("};\n\n"); + out.WriteFmt("struct VS_OUTPUT {{\n"); + GenerateVSOutputMembers(out, api_type, num_texgen, host_config, ""); + out.WriteFmt("}};\n\n"); - WriteUberShaderCommonHeader(out, ApiType, host_config); + WriteUberShaderCommonHeader(out, api_type, host_config); WriteLightingFunction(out); - if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - out.Write("ATTRIBUTE_LOCATION(%d) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); - out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); - out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); - out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) - out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i, i); + out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); if (host_config.backend_geometry_shaders) { - out.Write("VARYING_LOCATION(0) out VertexData {\n"); - GenerateVSOutputMembers(out, ApiType, numTexgen, host_config, + out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"); + GenerateVSOutputMembers(out, api_type, num_texgen, host_config, GetInterpolationQualifier(msaa, ssaa, true, false)); - out.Write("} vs;\n"); + out.WriteFmt("}} vs;\n"); } else { // Let's set up attributes u32 counter = 0; - out.Write("VARYING_LOCATION(%u) %s out float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION(%u) %s out float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - for (u32 i = 0; i < numTexgen; ++i) + out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + for (u32 i = 0; i < num_texgen; ++i) { - out.Write("VARYING_LOCATION(%u) %s out float3 tex%u;\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); + out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); } if (!host_config.fast_depth_calc) { - out.Write("VARYING_LOCATION(%u) %s out float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } if (per_pixel_lighting) { - out.Write("VARYING_LOCATION(%u) %s out float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION(%u) %s out float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } } - out.Write("void main()\n{\n"); + out.WriteFmt("void main()\n{{\n"); } else // D3D { - out.Write("VS_OUTPUT main(\n"); + out.WriteFmt("VS_OUTPUT main(\n"); // inputs - out.Write(" float3 rawnorm0 : NORMAL0,\n"); - out.Write(" float3 rawnorm1 : NORMAL1,\n"); - out.Write(" float3 rawnorm2 : NORMAL2,\n"); - out.Write(" float4 rawcolor0 : COLOR0,\n"); - out.Write(" float4 rawcolor1 : COLOR1,\n"); + out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n" + " float3 rawnorm1 : NORMAL1,\n" + " float3 rawnorm2 : NORMAL2,\n" + " float4 rawcolor0 : COLOR0,\n" + " float4 rawcolor1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) - out.Write(" float3 rawtex%d : TEXCOORD%d,\n", i, i); - out.Write(" uint posmtx : BLENDINDICES,\n"); - out.Write(" float4 rawpos : POSITION) {\n"); + out.WriteFmt(" float3 rawtex{} : TEXCOORD{},\n", i, i); + out.WriteFmt(" uint posmtx : BLENDINDICES,\n"); + out.WriteFmt(" float4 rawpos : POSITION) {{\n"); } - out.Write("VS_OUTPUT o;\n" - "\n"); + out.WriteFmt("VS_OUTPUT o;\n" + "\n"); // Transforms - out.Write("// Position matrix\n" - "float4 P0;\n" - "float4 P1;\n" - "float4 P2;\n" - "\n" - "// Normal matrix\n" - "float3 N0;\n" - "float3 N1;\n" - "float3 N2;\n" - "\n" - "if ((components & %uu) != 0u) {// VB_HAS_POSMTXIDX\n", - VB_HAS_POSMTXIDX); - out.Write(" // Vertex format has a per-vertex matrix\n" - " int posidx = int(posmtx.r);\n" - " P0 = " I_TRANSFORMMATRICES "[posidx];\n" - " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" - " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" - "\n" - " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" - " N0 = " I_NORMALMATRICES "[normidx].xyz;\n" - " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" - " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" - "} else {\n" - " // One shared matrix\n" - " P0 = " I_POSNORMALMATRIX "[0];\n" - " P1 = " I_POSNORMALMATRIX "[1];\n" - " P2 = " I_POSNORMALMATRIX "[2];\n" - " N0 = " I_POSNORMALMATRIX "[3].xyz;\n" - " N1 = " I_POSNORMALMATRIX "[4].xyz;\n" - " N2 = " I_POSNORMALMATRIX "[5].xyz;\n" - "}\n" - "\n" - "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" - "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION - "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n" - "\n" - "// Only the first normal gets normalized (TODO: why?)\n" - "float3 _norm0 = float3(0.0, 0.0, 0.0);\n" - "if ((components & %uu) != 0u) // VB_HAS_NRM0\n", - VB_HAS_NRM0); - out.Write( + out.WriteFmt("// Position matrix\n" + "float4 P0;\n" + "float4 P1;\n" + "float4 P2;\n" + "\n" + "// Normal matrix\n" + "float3 N0;\n" + "float3 N1;\n" + "float3 N2;\n" + "\n" + "if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n", + VB_HAS_POSMTXIDX); + out.WriteFmt(" // Vertex format has a per-vertex matrix\n" + " int posidx = int(posmtx.r);\n" + " P0 = " I_TRANSFORMMATRICES "[posidx];\n" + " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" + " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" + "\n" + " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" + " N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" + " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" + "}} else {{\n" + " // One shared matrix\n" + " P0 = " I_POSNORMALMATRIX "[0];\n" + " P1 = " I_POSNORMALMATRIX "[1];\n" + " P2 = " I_POSNORMALMATRIX "[2];\n" + " N0 = " I_POSNORMALMATRIX "[3].xyz;\n" + " N1 = " I_POSNORMALMATRIX "[4].xyz;\n" + " N2 = " I_POSNORMALMATRIX "[5].xyz;\n" + "}}\n" + "\n" + "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" + "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION + "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n" + "\n" + "// Only the first normal gets normalized (TODO: why?)\n" + "float3 _norm0 = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_NRM0\n", + VB_HAS_NRM0); + out.WriteFmt( " _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n" "\n" "float3 _norm1 = float3(0.0, 0.0, 0.0);\n" - "if ((components & %uu) != 0u) // VB_HAS_NRM1\n", + "if ((components & {}u) != 0u) // VB_HAS_NRM1\n", VB_HAS_NRM1); - out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n" - "\n" - "float3 _norm2 = float3(0.0, 0.0, 0.0);\n" - "if ((components & %uu) != 0u) // VB_HAS_NRM2\n", - VB_HAS_NRM2); - out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n" - "\n"); + out.WriteFmt(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n" + "\n" + "float3 _norm2 = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_NRM2\n", + VB_HAS_NRM2); + out.WriteFmt(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n" + "\n"); // Hardware Lighting - WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0", + WriteVertexLighting(out, api_type, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0", "o.colors_1"); // Texture Coordinates - if (numTexgen > 0) - GenVertexShaderTexGens(ApiType, numTexgen, out); + if (num_texgen > 0) + GenVertexShaderTexGens(api_type, num_texgen, out); - out.Write("if (xfmem_numColorChans == 0u) {\n"); - out.Write(" if ((components & %uu) != 0u)\n", VB_HAS_COL0); - out.Write(" o.colors_0 = rawcolor0;\n"); - out.Write(" else\n"); - out.Write(" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"); - out.Write("}\n"); - out.Write("if (xfmem_numColorChans < 2u) {\n"); - out.Write(" if ((components & %uu) != 0u)\n", VB_HAS_COL1); - out.Write(" o.colors_0 = rawcolor1;\n"); - out.Write(" else\n"); - out.Write(" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"); - out.Write("}\n"); + out.WriteFmt("if (xfmem_numColorChans == 0u) {{\n" + " if ((components & {}u) != 0u)\n" + " o.colors_0 = rawcolor0;\n" + " else\n" + " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" + "}}\n", + VB_HAS_COL0); + out.WriteFmt("if (xfmem_numColorChans < 2u) {{\n" + " if ((components & {}u) != 0u)\n" + " o.colors_0 = rawcolor1;\n" + " else\n" + " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" + "}}\n", + VB_HAS_COL1); if (!host_config.fast_depth_calc) { // clipPos/w needs to be done in pixel shader, not here - out.Write("o.clipPos = o.pos;\n"); + out.WriteFmt("o.clipPos = o.pos;\n"); } if (per_pixel_lighting) { - out.Write("o.Normal = _norm0;\n"); - out.Write("o.WorldPos = pos.xyz;\n"); - out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.Write(" o.colors_0 = rawcolor0;\n"); - out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1); - out.Write(" o.colors_1 = rawcolor1;\n"); + out.WriteFmt("o.Normal = _norm0;\n" + "o.WorldPos = pos.xyz;\n"); + out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL0\n" + " o.colors_0 = rawcolor0;\n", + VB_HAS_COL0); + out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL1\n" + " o.colors_1 = rawcolor1;\n", + VB_HAS_COL1); } // If we can disable the incorrect depth clipping planes using depth clamping, then we can do @@ -221,13 +225,13 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. // We adjust our depth value for clipping purposes to match the perspective projection in the // software backend, which is a hack to fix Sonic Adventure and Unleashed games. - out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"); - out.Write("float clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w - out.Write("float clipDist1 = -clipDepth;\n"); // Far: z > 0 + out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n" + "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w + "float clipDist1 = -clipDepth;\n"); // Far: z > 0 if (host_config.backend_geometry_shaders) { - out.Write("o.clipDist0 = clipDist0;\n"); - out.Write("o.clipDist1 = clipDist1;\n"); + out.WriteFmt("o.clipDist0 = clipDist0;\n" + "o.clipDist1 = clipDist1;\n"); } } @@ -242,20 +246,20 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, // divide, because some games will use a depth range larger than what is allowed by the // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these // games effectively add a depth bias to the values written to the depth buffer. - out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " - "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " + "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); if (!host_config.backend_clip_control) { // If the graphics API doesn't support a depth range of 0..1, then we need to map z to // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point // operation that can introduce a round-trip error. - out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); + out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); } // Correct for negative viewports by mirroring all vertices. We need to negate the height here, // since the viewport height is already negated by the render backend. - out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); + out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results @@ -263,7 +267,7 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. - out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); + out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); if (vertex_rounding) { @@ -271,219 +275,223 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, // cause an additional pixel offset. Due to a higher pixel density we need to correct this // by converting our clip-space position into the Wii's screen-space. // Acquire the right pixel and then convert it back. - out.Write("if (o.pos.w == 1.0f)\n"); - out.Write("{\n"); + out.WriteFmt("if (o.pos.w == 1.0f)\n" + "{{\n"); - out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"); - out.Write("\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"); + out.WriteFmt("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" + "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"); - out.Write("\tss_pixel_x = round(ss_pixel_x);\n"); - out.Write("\tss_pixel_y = round(ss_pixel_y);\n"); + out.WriteFmt("\tss_pixel_x = round(ss_pixel_x);\n" + "\tss_pixel_y = round(ss_pixel_y);\n"); - out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"); - out.Write("\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"); - out.Write("}\n"); + out.WriteFmt("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" + "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" + "}}\n"); } - if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { if (host_config.backend_geometry_shaders) { - AssignVSOutputMembers(out, "vs", "o", numTexgen, host_config); + AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config); } else { // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. - for (u32 i = 0; i < numTexgen; ++i) - out.Write("tex%d.xyz = o.tex%d;\n", i, i); + for (u32 i = 0; i < num_texgen; ++i) + out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i); if (!host_config.fast_depth_calc) - out.Write("clipPos = o.clipPos;\n"); + out.WriteFmt("clipPos = o.clipPos;\n"); if (per_pixel_lighting) { - out.Write("Normal = o.Normal;\n"); - out.Write("WorldPos = o.WorldPos;\n"); + out.WriteFmt("Normal = o.Normal;\n" + "WorldPos = o.WorldPos;\n"); } - out.Write("colors_0 = o.colors_0;\n"); - out.Write("colors_1 = o.colors_1;\n"); + out.WriteFmt("colors_0 = o.colors_0;\n" + "colors_1 = o.colors_1;\n"); } if (host_config.backend_depth_clamp) { - out.Write("gl_ClipDistance[0] = clipDist0;\n"); - out.Write("gl_ClipDistance[1] = clipDist1;\n"); + out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n" + "gl_ClipDistance[1] = clipDist1;\n"); } // Vulkan NDC space has Y pointing down (right-handed NDC space). - if (ApiType == APIType::Vulkan) - out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + if (api_type == APIType::Vulkan) + out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); else - out.Write("gl_Position = o.pos;\n"); + out.WriteFmt("gl_Position = o.pos;\n"); } else // D3D { - out.Write("return o;\n"); + out.WriteFmt("return o;\n"); } - out.Write("}\n"); + out.WriteFmt("}}\n"); return out; } -void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out) +static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out) { // The HLSL compiler complains that the output texture coordinates are uninitialized when trying // to dynamically index them. - for (u32 i = 0; i < numTexgen; i++) - out.Write("o.tex%u = float3(0.0, 0.0, 0.0);\n", i); + for (u32 i = 0; i < num_texgen; i++) + out.WriteFmt("o.tex{} = float3(0.0, 0.0, 0.0);\n", i); - out.Write("// Texture coordinate generation\n"); - if (numTexgen == 1) - out.Write("{ const uint texgen = 0u;\n"); + out.WriteFmt("// Texture coordinate generation\n"); + if (num_texgen == 1) + { + out.WriteFmt("{{ const uint texgen = 0u;\n"); + } else - out.Write("%sfor (uint texgen = 0u; texgen < %uu; texgen++) {\n", - ApiType == APIType::D3D ? "[loop] " : "", numTexgen); + { + out.WriteFmt("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", + api_type == APIType::D3D ? "[loop] " : "", num_texgen); + } - out.Write(" // Texcoord transforms\n"); - out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n" - " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n"); - out.Write(" switch (%s) {\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow).c_str()); - out.Write(" case %uu: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW); - out.Write(" coord.xyz = rawpos.xyz;\n"); - out.Write(" break;\n\n"); - out.Write(" case %uu: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW); - out.Write( - " coord.xyz = ((components & %uu /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;", + out.WriteFmt(" // Texcoord transforms\n"); + out.WriteFmt(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n" + " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n"); + out.WriteFmt(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow)); + out.WriteFmt(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW); + out.WriteFmt(" coord.xyz = rawpos.xyz;\n"); + out.WriteFmt(" break;\n\n"); + out.WriteFmt(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW); + out.WriteFmt( + " coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;", VB_HAS_NRM0); - out.Write(" break;\n\n"); - out.Write(" case %uu: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW); - out.Write( - " coord.xyz = ((components & %uu /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;", + out.WriteFmt(" break;\n\n"); + out.WriteFmt(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW); + out.WriteFmt( + " coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;", VB_HAS_NRM1); - out.Write(" break;\n\n"); - out.Write(" case %uu: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW); - out.Write( - " coord.xyz = ((components & %uu /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;", + out.WriteFmt(" break;\n\n"); + out.WriteFmt(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW); + out.WriteFmt( + " coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;", VB_HAS_NRM2); - out.Write(" break;\n\n"); + out.WriteFmt(" break;\n\n"); for (u32 i = 0; i < 8; i++) { - out.Write(" case %uu: // XF_SRCTEX%u_INROW\n", XF_SRCTEX0_INROW + i, i); - out.Write( - " coord = ((components & %uu /* VB_HAS_UV%u */) != 0u) ? float4(rawtex%u.x, rawtex%u.y, " + out.WriteFmt(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i); + out.WriteFmt( + " coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, " "1.0, 1.0) : coord;\n", VB_HAS_UV0 << i, i, i, i); - out.Write(" break;\n\n"); + out.WriteFmt(" break;\n\n"); } - out.Write(" }\n"); - out.Write("\n"); + out.WriteFmt(" }}\n" + "\n"); - out.Write(" // Input form of AB11 sets z element to 1.0\n"); - out.Write(" if (%s == %uu) // inputform == XF_TEXINPUT_AB11\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().inputform).c_str(), XF_TEXINPUT_AB11); - out.Write(" coord.z = 1.0f;\n"); - out.Write("\n"); + out.WriteFmt(" // Input form of AB11 sets z element to 1.0\n"); + out.WriteFmt(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11); + out.WriteFmt(" coord.z = 1.0f;\n" + "\n"); - out.Write(" // first transformation\n"); - out.Write(" uint texgentype = %s;\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype).c_str()); - out.Write(" float3 output_tex;\n" - " switch (texgentype)\n" - " {\n"); - out.Write(" case %uu: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP); - out.Write(" {\n"); - out.Write(" uint light = %s;\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift).c_str()); - out.Write(" uint source = %s;\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift).c_str()); - out.Write(" switch (source) {\n"); - for (u32 i = 0; i < numTexgen; i++) - out.Write(" case %uu: output_tex.xyz = o.tex%u; break;\n", i, i); - out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" - " }\n"); - out.Write(" if ((components & %uu) != 0u) { // VB_HAS_NRM1 | VB_HAS_NRM2\n", - VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2 - out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" - " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n" - " }\n" - " }\n" - " break;\n\n"); - out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0); - out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n" - " break;\n\n"); - out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1); - out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n" - " break;\n\n"); - out.Write(" default: // Also XF_TEXGEN_REGULAR\n" - " {\n"); - out.Write(" if ((components & (%uu /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {\n", - VB_HAS_TEXMTXIDX0); - out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n" - " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" - " int tmp = 0;\n" - " switch (texgen) {\n"); - for (u32 i = 0; i < numTexgen; i++) - out.Write(" case %uu: tmp = int(rawtex%u.z); break;\n", i, i); - out.Write(" }\n" - "\n"); - out.Write(" if (%s == %uu) {\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ); - out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" - " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" - " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n" - " } else {\n" - " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" - " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" - " 1.0);\n" - " }\n" - " } else {\n"); - out.Write(" if (%s == %uu) {\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ); - out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" - " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" - " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n" - " } else {\n" - " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" - " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" - " 1.0);\n" - " }\n" - " }\n" - " }\n" - " break;\n\n" - " }\n" - "\n"); + out.WriteFmt(" // first transformation\n"); + out.WriteFmt(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype)); + out.WriteFmt(" float3 output_tex;\n" + " switch (texgentype)\n" + " {{\n"); + out.WriteFmt(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP); + out.WriteFmt(" {{\n"); + out.WriteFmt(" uint light = {};\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift)); + out.WriteFmt(" uint source = {};\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift)); + out.WriteFmt(" switch (source) {{\n"); + for (u32 i = 0; i < num_texgen; i++) + out.WriteFmt(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i); + out.WriteFmt(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" + " }}\n"); + out.WriteFmt(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n", + VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2 + out.WriteFmt(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" + " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n" + " }}\n" + " }}\n" + " break;\n\n"); + out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0); + out.WriteFmt(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n" + " break;\n\n"); + out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1); + out.WriteFmt(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n" + " break;\n\n"); + out.WriteFmt(" default: // Also XF_TEXGEN_REGULAR\n" + " {{\n"); + out.WriteFmt(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", + VB_HAS_TEXMTXIDX0); + out.WriteFmt( + " // This is messy, due to dynamic indexing of the input texture coordinates.\n" + " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" + " int tmp = 0;\n" + " switch (texgen) {{\n"); + for (u32 i = 0; i < num_texgen; i++) + out.WriteFmt(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); + out.WriteFmt(" }}\n" + "\n"); + out.WriteFmt(" if ({} == {}u) {{\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ); + out.WriteFmt(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" + " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" + " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n" + " }} else {{\n" + " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" + " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" + " 1.0);\n" + " }}\n" + " }} else {{\n"); + out.WriteFmt(" if ({} == {}u) {{\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ); + out.WriteFmt( + " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" + " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" + " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n" + " }} else {{\n" + " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" + " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" + " 1.0);\n" + " }}\n" + " }}\n" + " }}\n" + " break;\n\n" + " }}\n" + "\n"); - out.Write(" if (xfmem_dualTexInfo != 0u) {\n"); - out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);"); - out.Write(" uint base_index = %s;\n", - BitfieldExtract("postMtxInfo", PostMtxInfo().index).c_str()); - out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n" - " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n" - " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n" - "\n"); - out.Write(" if (%s != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize).c_str()); - out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n" - "\n" - " // multiply by postmatrix\n" - " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n" - " dot(P1.xyz, output_tex.xyz) + P1.w,\n" - " dot(P2.xyz, output_tex.xyz) + P2.w);\n" - " }\n\n"); + out.WriteFmt(" if (xfmem_dualTexInfo != 0u) {{\n"); + out.WriteFmt(" uint postMtxInfo = xfmem_postMtxInfo(texgen);"); + out.WriteFmt(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index)); + out.WriteFmt(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n" + " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n" + " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n" + "\n"); + out.WriteFmt(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize)); + out.WriteFmt(" output_tex.xyz = normalize(output_tex.xyz);\n" + "\n" + " // multiply by postmatrix\n" + " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n" + " dot(P1.xyz, output_tex.xyz) + P1.w,\n" + " dot(P2.xyz, output_tex.xyz) + P2.w);\n" + " }}\n\n"); // When q is 0, the GameCube appears to have a special case // This can be seen in devkitPro's neheGX Lesson08 example for Wii // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling) - out.Write(" if (texgentype == %uu && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n", - XF_TEXGEN_REGULAR); - out.Write( + out.WriteFmt(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n", + XF_TEXGEN_REGULAR); + out.WriteFmt( " output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n" "\n"); - out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"); - out.Write(" switch (texgen) {\n"); - for (u32 i = 0; i < numTexgen; i++) - out.Write(" case %uu: o.tex%u = output_tex; break;\n", i, i); - out.Write(" }\n" - "}\n"); + out.WriteFmt(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"); + out.WriteFmt(" switch (texgen) {{\n"); + for (u32 i = 0; i < num_texgen; i++) + out.WriteFmt(" case {}u: o.tex{} = output_tex; break;\n", i, i); + out.WriteFmt(" }}\n" + "}}\n"); } void EnumerateVertexShaderUids(const std::function& callback)