From 5dd2704416385946fbdd80b38c01b0f9d7735648 Mon Sep 17 00:00:00 2001 From: iwubcode Date: Wed, 4 May 2022 00:41:34 -0500 Subject: [PATCH] D3D / VideoCommon: generate HLSL from SPIRV --- .../D3D/D3DNativeVertexFormat.cpp | 17 +- Source/Core/VideoBackends/D3D/D3DState.cpp | 1 + .../VideoBackends/D3D12/D3D12Renderer.cpp | 2 + .../Core/VideoBackends/D3D12/DX12Context.cpp | 4 +- Source/Core/VideoBackends/D3D12/DX12Context.h | 1 + .../VideoBackends/D3D12/DX12VertexFormat.cpp | 11 +- .../VideoBackends/D3DCommon/CMakeLists.txt | 1 + .../Core/VideoBackends/D3DCommon/Shader.cpp | 154 ++++++- .../Core/VideoCommon/FramebufferShaderGen.cpp | 120 +----- Source/Core/VideoCommon/GeometryShaderGen.cpp | 11 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 401 ++++++------------ Source/Core/VideoCommon/PostProcessing.cpp | 86 +--- Source/Core/VideoCommon/ShaderGenCommon.cpp | 113 +++-- Source/Core/VideoCommon/ShaderGenCommon.h | 67 +-- .../VideoCommon/TextureConversionShader.cpp | 161 ++----- .../VideoCommon/TextureConverterShaderGen.cpp | 95 ++--- Source/Core/VideoCommon/UberShaderCommon.cpp | 3 +- Source/Core/VideoCommon/UberShaderPixel.cpp | 270 +++++------- Source/Core/VideoCommon/UberShaderVertex.cpp | 174 ++++---- Source/Core/VideoCommon/VertexShaderGen.cpp | 209 ++++----- 20 files changed, 780 insertions(+), 1121 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp index b4da7fc354..08ed40353c 100644 --- a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp @@ -106,7 +106,8 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) const AttributeFormat* format = &vtx_decl.position; if (format->enable) { - m_elems[m_num_elems].SemanticName = "POSITION"; + m_elems[m_num_elems].SemanticName = "TEXCOORD"; + m_elems[m_num_elems].SemanticIndex = SHADER_POSITION_ATTRIB; m_elems[m_num_elems].AlignedByteOffset = format->offset; m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer); m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; @@ -115,12 +116,11 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) for (int i = 0; i < 3; i++) { - static constexpr std::array NAMES = {"NORMAL", "TANGENT", "BINORMAL"}; format = &vtx_decl.normals[i]; if (format->enable) { - m_elems[m_num_elems].SemanticName = NAMES[i]; - m_elems[m_num_elems].SemanticIndex = 0; + m_elems[m_num_elems].SemanticName = "TEXCOORD"; + m_elems[m_num_elems].SemanticIndex = SHADER_NORMAL_ATTRIB + i; m_elems[m_num_elems].AlignedByteOffset = format->offset; m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer); m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; @@ -133,8 +133,8 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) format = &vtx_decl.colors[i]; if (format->enable) { - m_elems[m_num_elems].SemanticName = "COLOR"; - m_elems[m_num_elems].SemanticIndex = i; + m_elems[m_num_elems].SemanticName = "TEXCOORD"; + m_elems[m_num_elems].SemanticIndex = SHADER_COLOR0_ATTRIB + i; m_elems[m_num_elems].AlignedByteOffset = format->offset; m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer); m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; @@ -148,7 +148,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) if (format->enable) { m_elems[m_num_elems].SemanticName = "TEXCOORD"; - m_elems[m_num_elems].SemanticIndex = i; + m_elems[m_num_elems].SemanticIndex = SHADER_TEXTURE0_ATTRIB + i; m_elems[m_num_elems].AlignedByteOffset = format->offset; m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer); m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; @@ -159,7 +159,8 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) format = &vtx_decl.posmtx; if (format->enable) { - m_elems[m_num_elems].SemanticName = "BLENDINDICES"; + m_elems[m_num_elems].SemanticName = "TEXCOORD"; + m_elems[m_num_elems].SemanticIndex = SHADER_POSMTX_ATTRIB; m_elems[m_num_elems].AlignedByteOffset = format->offset; m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer); m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; diff --git a/Source/Core/VideoBackends/D3D/D3DState.cpp b/Source/Core/VideoBackends/D3D/D3DState.cpp index 9a568ae6e4..5712f6fd3c 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D/D3DState.cpp @@ -76,6 +76,7 @@ void StateManager::Apply() if (m_current.vertexConstants != m_pending.vertexConstants) { D3D::context->VSSetConstantBuffers(0, 1, &m_pending.vertexConstants); + D3D::context->VSSetConstantBuffers(1, 1, &m_pending.vertexConstants); m_current.vertexConstants = m_pending.vertexConstants; } diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp index ece8f14d88..d4eb207a45 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp @@ -578,6 +578,8 @@ bool Renderer::ApplyState() { cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_VS_CBV, m_state.constant_buffers[1]); + cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_VS_CBV2, + m_state.constant_buffers[1]); if (g_ActiveConfig.bEnablePixelLighting) { diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.cpp b/Source/Core/VideoBackends/D3D12/DX12Context.cpp index cc6fb72adc..e3ff8a7417 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12Context.cpp @@ -323,7 +323,7 @@ bool DXContext::CreateRootSignatures() bool DXContext::CreateGXRootSignature() { // GX: - // - 3 constant buffers (bindings 0-2), 0/1 visible in PS, 1 visible in VS, 2 visible in GS. + // - 3 constant buffers (bindings 0-2), 0/1 visible in PS, 2 visible in VS, 1 visible in GS. // - 8 textures (visible in PS). // - 8 samplers (visible in PS). // - 1 UAV (visible in PS). @@ -341,6 +341,8 @@ bool DXContext::CreateGXRootSignature() param_count++; SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_VERTEX); param_count++; + SetRootParamCBV(¶ms[param_count], 1, D3D12_SHADER_VISIBILITY_VERTEX); + param_count++; SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY); param_count++; diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.h b/Source/Core/VideoBackends/D3D12/DX12Context.h index 4c5464a123..d1c5b38798 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.h +++ b/Source/Core/VideoBackends/D3D12/DX12Context.h @@ -25,6 +25,7 @@ enum ROOT_PARAMETER ROOT_PARAMETER_PS_SRV, ROOT_PARAMETER_PS_SAMPLERS, ROOT_PARAMETER_VS_CBV, + ROOT_PARAMETER_VS_CBV2, ROOT_PARAMETER_GS_CBV, ROOT_PARAMETER_PS_UAV_OR_CBV2, ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled diff --git a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp index 2d1a3028e0..77082c091a 100644 --- a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp @@ -83,7 +83,7 @@ void DXVertexFormat::MapAttributes() if (m_decl.position.enable) { AddAttribute( - "POSITION", 0, 0, + "TEXCOORD", SHADER_POSITION_ATTRIB, 0, VarToDXGIFormat(m_decl.position.type, m_decl.position.components, m_decl.position.integer), m_decl.position.offset); } @@ -92,8 +92,7 @@ void DXVertexFormat::MapAttributes() { if (m_decl.normals[i].enable) { - static constexpr std::array NAMES = {"NORMAL", "TANGENT", "BINORMAL"}; - AddAttribute(NAMES[i], 0, 0, + AddAttribute("TEXCOORD", SHADER_NORMAL_ATTRIB + i, 0, VarToDXGIFormat(m_decl.normals[i].type, m_decl.normals[i].components, m_decl.normals[i].integer), m_decl.normals[i].offset); @@ -104,7 +103,7 @@ void DXVertexFormat::MapAttributes() { if (m_decl.colors[i].enable) { - AddAttribute("COLOR", i, 0, + AddAttribute("TEXCOORD", SHADER_COLOR0_ATTRIB + i, 0, VarToDXGIFormat(m_decl.colors[i].type, m_decl.colors[i].components, m_decl.colors[i].integer), m_decl.colors[i].offset); @@ -115,7 +114,7 @@ void DXVertexFormat::MapAttributes() { if (m_decl.texcoords[i].enable) { - AddAttribute("TEXCOORD", i, 0, + AddAttribute("TEXCOORD", SHADER_TEXTURE0_ATTRIB + i, 0, VarToDXGIFormat(m_decl.texcoords[i].type, m_decl.texcoords[i].components, m_decl.texcoords[i].integer), m_decl.texcoords[i].offset); @@ -125,7 +124,7 @@ void DXVertexFormat::MapAttributes() if (m_decl.posmtx.enable) { AddAttribute( - "BLENDINDICES", 0, 0, + "TEXCOORD", SHADER_POSMTX_ATTRIB, 0, VarToDXGIFormat(m_decl.posmtx.type, m_decl.posmtx.components, m_decl.posmtx.integer), m_decl.posmtx.offset); } diff --git a/Source/Core/VideoBackends/D3DCommon/CMakeLists.txt b/Source/Core/VideoBackends/D3DCommon/CMakeLists.txt index e4e586e47a..90464c72e7 100644 --- a/Source/Core/VideoBackends/D3DCommon/CMakeLists.txt +++ b/Source/Core/VideoBackends/D3DCommon/CMakeLists.txt @@ -11,6 +11,7 @@ target_link_libraries(videod3dcommon PUBLIC common videocommon + spirv_cross ) if(MSVC) diff --git a/Source/Core/VideoBackends/D3DCommon/Shader.cpp b/Source/Core/VideoBackends/D3DCommon/Shader.cpp index 0d7c84b8e0..236efb77e7 100644 --- a/Source/Core/VideoBackends/D3DCommon/Shader.cpp +++ b/Source/Core/VideoBackends/D3DCommon/Shader.cpp @@ -4,7 +4,13 @@ #include "VideoBackends/D3DCommon/Shader.h" #include +#include +#include + +#include #include +#include "disassemble.h" +#include "spirv_hlsl.hpp" #include "Common/Assert.h" #include "Common/FileUtil.h" @@ -14,9 +20,141 @@ #include "Common/StringUtil.h" #include "Common/Version.h" +#include "VideoCommon/Spirv.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" +namespace +{ +// Regarding the UBO bind points, we subtract one from the binding index because +// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV). +// This allows us to share the same shaders but use bind point #0 in the D3D +// backends. None of the specific shaders use UBOs, instead they use push +// constants, so when/if the GL backend moves to uniform blocks completely this +// subtraction can be removed. +constexpr std::string_view SHADER_HEADER = R"( + // Target GLSL 4.5. + #version 450 core + #define ATTRIBUTE_LOCATION(x) layout(location = x) + #define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x) + #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y) + #define UBO_BINDING(packing, x) layout(packing, binding = (x - 1)) + #define SAMPLER_BINDING(x) layout(binding = x) + #define TEXEL_BUFFER_BINDING(x) layout(binding = x) + #define SSBO_BINDING(x) layout(binding = (x + 2)) + #define VARYING_LOCATION(x) layout(location = x) + #define FORCE_EARLY_Z layout(early_fragment_tests) in + + // hlsl to glsl function translation + #define float2 vec2 + #define float3 vec3 + #define float4 vec4 + #define uint2 uvec2 + #define uint3 uvec3 + #define uint4 uvec4 + #define int2 ivec2 + #define int3 ivec3 + #define int4 ivec4 + #define frac fract + #define lerp mix + + #define API_D3D 1 +)"; +constexpr std::string_view COMPUTE_SHADER_HEADER = R"( + // Target GLSL 4.5. + #version 450 core + // All resources are packed into one descriptor set for compute. + #define UBO_BINDING(packing, x) layout(packing, binding = (x - 1)) + #define SAMPLER_BINDING(x) layout(binding = x) + #define TEXEL_BUFFER_BINDING(x) layout(binding = x) + #define IMAGE_BINDING(format, x) layout(format, binding = x) + + // hlsl to glsl function translation + #define float2 vec2 + #define float3 vec3 + #define float4 vec4 + #define uint2 uvec2 + #define uint3 uvec3 + #define uint4 uvec4 + #define int2 ivec2 + #define int3 ivec3 + #define int4 ivec4 + #define frac fract + #define lerp mix + + #define API_D3D 1 +)"; + +std::optional GetHLSLFromSPIRV(SPIRV::CodeVector spv, D3D_FEATURE_LEVEL feature_level) +{ + spirv_cross::CompilerHLSL::Options options; + switch (feature_level) + { + case D3D_FEATURE_LEVEL_10_0: + options.shader_model = 40; + break; + case D3D_FEATURE_LEVEL_10_1: + options.shader_model = 41; + break; + default: + options.shader_model = 50; + break; + }; + + spirv_cross::CompilerHLSL compiler(std::move(spv)); + compiler.set_hlsl_options(options); + + return compiler.compile(); +} + +std::optional GetSpirv(ShaderStage stage, std::string_view source) +{ + switch (stage) + { + case ShaderStage::Vertex: + { + const auto full_source = fmt::format("{}{}", SHADER_HEADER, source); + return SPIRV::CompileVertexShader(full_source); + } + + case ShaderStage::Geometry: + { + // Spirv cross does not currently support hlsl geometry shaders + return std::nullopt; + } + + case ShaderStage::Pixel: + { + const auto full_source = fmt::format("{}{}", SHADER_HEADER, source); + return SPIRV::CompileFragmentShader(full_source); + } + + case ShaderStage::Compute: + { + const auto full_source = fmt::format("{}{}", COMPUTE_SHADER_HEADER, source); + return SPIRV::CompileComputeShader(full_source); + } + }; + + return std::nullopt; +} + +std::optional GetHLSL(D3D_FEATURE_LEVEL feature_level, ShaderStage stage, + std::string_view source) +{ + if (stage == ShaderStage::Geometry) + { + return std::string{source}; + } + else if (const auto spirv = GetSpirv(stage, source)) + { + return GetHLSLFromSPIRV(std::move(*spirv), feature_level); + } + + return std::nullopt; +} +} // namespace + namespace D3DCommon { Shader::Shader(ShaderStage stage, BinaryData bytecode) @@ -95,6 +233,10 @@ static const char* GetCompileTarget(D3D_FEATURE_LEVEL feature_level, ShaderStage std::optional Shader::CompileShader(D3D_FEATURE_LEVEL feature_level, ShaderStage stage, std::string_view source) { + const auto hlsl = GetHLSL(feature_level, stage, source); + if (!hlsl) + return std::nullopt; + static constexpr D3D_SHADER_MACRO macros[] = {{"API_D3D", "1"}, {nullptr, nullptr}}; const UINT flags = g_ActiveConfig.bEnableValidationLayer ? (D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION) : @@ -103,7 +245,7 @@ std::optional Shader::CompileShader(D3D_FEATURE_LEVEL featur Microsoft::WRL::ComPtr code; Microsoft::WRL::ComPtr errors; - HRESULT hr = d3d_compile(source.data(), source.size(), nullptr, macros, nullptr, "main", target, + HRESULT hr = d3d_compile(hlsl->data(), hlsl->size(), nullptr, macros, nullptr, "main", target, flags, 0, &code, &errors); if (FAILED(hr)) { @@ -111,12 +253,20 @@ std::optional Shader::CompileShader(D3D_FEATURE_LEVEL featur std::string filename = VideoBackendBase::BadShaderFilename(target, num_failures++); std::ofstream file; File::OpenFStream(file, filename, std::ios_base::out); - file.write(source.data(), source.size()); + file.write(hlsl->data(), hlsl->size()); file << "\n"; file.write(static_cast(errors->GetBufferPointer()), errors->GetBufferSize()); file << "\n"; file << "Dolphin Version: " + Common::GetScmRevStr() + "\n"; file << "Video Backend: " + g_video_backend->GetDisplayName(); + + if (const auto spirv = GetSpirv(stage, source)) + { + file << "\nOriginal Source: \n"; + file << source << std::endl; + file << "SPIRV: \n"; + spv::Disassemble(file, *spirv); + } file.close(); PanicAlertFmt("Failed to compile {}: {}\nDebug info ({}):\n{}", filename, Common::HRWrap(hr), diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index d047167e29..40949f34aa 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -25,10 +25,7 @@ APIType GetAPIType() void EmitUniformBufferDeclaration(ShaderCode& code) { - if (GetAPIType() == APIType::D3D) - code.Write("cbuffer PSBlock : register(b0)\n"); - else - code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n"); + code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n"); } void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, @@ -37,17 +34,6 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, switch (GetAPIType()) { case APIType::D3D: - { - const char* array_type = multisampled ? "Texture2DMSArray" : "Texture2DArray"; - - for (u32 i = start; i < end; i++) - { - code.Write("{} tex{} : register(t{});\n", array_type, i, i); - code.Write("SamplerState samp{} : register(s{});\n", i, i); - } - } - break; - case APIType::OpenGL: case APIType::Vulkan: { @@ -69,9 +55,6 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: - code.Write("tex{}.Sample(samp{}, {})", n, n, coords); - break; - case APIType::OpenGL: case APIType::Vulkan: code.Write("texture(samp{}, {})", n, coords); @@ -89,9 +72,6 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: - code.Write("tex{}.Load({})", n, coords); - break; - case APIType::OpenGL: case APIType::Vulkan: code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords); @@ -109,23 +89,6 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col switch (GetAPIType()) { case APIType::D3D: - { - code.Write("void main("); - for (u32 i = 0; i < num_tex_inputs; i++) - code.Write("in float3 rawtex{} : TEXCOORD{}, ", i, i); - for (u32 i = 0; i < num_color_inputs; i++) - code.Write("in float4 rawcolor{} : COLOR{}, ", i, i); - if (position_input) - code.Write("in float4 rawpos : POSITION, "); - code.Write("{}", extra_inputs); - for (u32 i = 0; i < num_tex_outputs; i++) - code.Write("out float3 v_tex{} : TEXCOORD{}, ", i, i); - for (u32 i = 0; i < num_color_outputs; i++) - code.Write("out float4 v_col{} : COLOR{}, ", i, i); - code.Write("out float4 opos : SV_Position)\n"); - } - break; - case APIType::OpenGL: case APIType::Vulkan: { @@ -175,18 +138,6 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo switch (GetAPIType()) { case APIType::D3D: - { - code.Write("void main("); - for (u32 i = 0; i < num_tex_inputs; i++) - code.Write("in float3 v_tex{} : TEXCOORD{}, ", i, i); - for (u32 i = 0; i < num_color_inputs; i++) - code.Write("in float4 v_col{} : COLOR{}, ", i, i); - if (emit_frag_coord) - code.Write("in float4 frag_coord : SV_Position, "); - code.Write("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type); - } - break; - case APIType::OpenGL: case APIType::Vulkan: { @@ -225,8 +176,8 @@ std::string GenerateScreenQuadVertexShader() { ShaderCode code; EmitVertexMainDeclaration(code, 0, 0, false, 1, 0, - GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : - "#define id gl_VertexID\n"); + + "#define id gl_VertexID\n"); code.Write( "{{\n" " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n" @@ -251,7 +202,7 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors) for (u32 i = 0; i < num_tex; i++) code.Write(" float3 tex{} : TEXCOORD{};\n", i, i); for (u32 i = 0; i < num_colors; i++) - code.Write(" float4 color{} : COLOR{};\n", i, i); + code.Write(" float4 color{} : TEXCOORD{};\n", i, i + num_tex); code.Write(" float4 position : SV_Position;\n" "}};\n"); @@ -260,7 +211,7 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors) for (u32 i = 0; i < num_tex; i++) code.Write(" float3 tex{} : TEXCOORD{};\n", i, i); for (u32 i = 0; i < num_colors; i++) - code.Write(" float4 color{} : COLOR{};\n", i, i); + code.Write(" float4 color{} : TEXCOORD{};\n", i, i + num_tex); code.Write(" float4 position : SV_Position;\n" " uint slice : SV_RenderTargetArrayIndex;\n" "}};\n\n"); @@ -343,8 +294,8 @@ std::string GenerateTextureCopyVertexShader() "}};\n\n"); EmitVertexMainDeclaration(code, 0, 0, false, 1, 0, - GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : - "#define id gl_VertexID"); + + "#define id gl_VertexID"); code.Write("{{\n" " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n" " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n" @@ -386,25 +337,15 @@ std::string GenerateResolveDepthPixelShader(u32 samples) { ShaderCode code; EmitSamplerDeclarations(code, 0, 1, true); - EmitPixelMainDeclaration(code, 1, 0, "float", - GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : ""); + EmitPixelMainDeclaration(code, 1, 0, "float", ""); code.Write("{{\n" " int layer = int(v_tex0.z);\n"); - if (GetAPIType() == APIType::D3D) - code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n"); - else - code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); + code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); // Take the minimum of all depth samples. - if (GetAPIType() == APIType::D3D) - code.Write(" ocol0 = tex0.Load(coords, 0).r;\n"); - else - code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n"); + code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n"); code.Write(" for (int i = 1; i < {}; i++)\n", samples); - if (GetAPIType() == APIType::D3D) - code.Write(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n"); - else - code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n"); + code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n"); code.Write("}}\n"); return code.GetBuffer(); @@ -420,8 +361,8 @@ std::string GenerateClearVertexShader() "}};\n"); EmitVertexMainDeclaration(code, 0, 0, false, 0, 1, - GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : - "#define id gl_VertexID\n"); + + "#define id gl_VertexID\n"); code.Write( "{{\n" " float2 coord = float2(float((id << 1) & 2), float(id & 2));\n" @@ -459,45 +400,29 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp { ShaderCode code; EmitSamplerDeclarations(code, 0, 1, samples > 1); - EmitPixelMainDeclaration( - code, 1, 0, "float4", - GetAPIType() == APIType::D3D ? - (g_ActiveConfig.bSSAA ? - "in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " : - "in float4 ipos : SV_Position, ") : - ""); + EmitPixelMainDeclaration(code, 1, 0, "float4", + + ""); code.Write("{{\n" " int layer = int(v_tex0.z);\n"); - if (GetAPIType() == APIType::D3D) - code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n"); - else - code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); + code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); if (samples == 1) { // No MSAA at all. - if (GetAPIType() == APIType::D3D) - code.Write(" float4 val = tex0.Load(int4(coords, 0));\n"); - else - code.Write(" float4 val = texelFetch(samp0, coords, 0);\n"); + code.Write(" float4 val = texelFetch(samp0, coords, 0);\n"); } else if (g_ActiveConfig.bSSAA) { // Sample shading, shader runs once per sample - if (GetAPIType() == APIType::D3D) - code.Write(" float4 val = tex0.Load(coords, isample);"); - else - code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);"); + code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);"); } else { // MSAA without sample shading, average out all samples. code.Write(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); code.Write(" for (int i = 0; i < {}; i++)\n", samples); - if (GetAPIType() == APIType::D3D) - code.Write(" val += tex0.Load(coords, i);\n"); - else - code.Write(" val += texelFetch(samp0, coords, i);\n"); + code.Write(" val += texelFetch(samp0, coords, i);\n"); code.Write(" val /= float({});\n", samples); } @@ -689,13 +614,12 @@ std::string GenerateEFBRestorePixelShader() { ShaderCode code; EmitSamplerDeclarations(code, 0, 2, false); - EmitPixelMainDeclaration(code, 1, 0, "float4", - GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : ""); + EmitPixelMainDeclaration(code, 1, 0, "float4", ""); code.Write("{{\n" " ocol0 = "); EmitSampleTexture(code, 0, "v_tex0"); code.Write(";\n"); - code.Write(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth"); + code.Write(" gl_FragDepth = "); EmitSampleTexture(code, 1, "v_tex0"); code.Write(".r;\n" "}}\n"); diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 150354e4f3..997e809860 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -103,7 +103,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& "}};\n"); out.Write("struct VS_OUTPUT {{\n"); - GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, ""); + GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "", + ShaderStage::Geometry); out.Write("}};\n"); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) @@ -113,12 +114,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& out.Write("VARYING_LOCATION(0) in VertexData {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, - GetInterpolationQualifier(msaa, ssaa, true, true)); + GetInterpolationQualifier(msaa, ssaa, true, true), + ShaderStage::Geometry); out.Write("}} vs[{}];\n", vertex_in); out.Write("VARYING_LOCATION(0) out VertexData {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, - GetInterpolationQualifier(msaa, ssaa, true, false)); + GetInterpolationQualifier(msaa, ssaa, true, false), + ShaderStage::Geometry); if (stereo) out.Write("\tflat int layer;\n"); @@ -134,6 +137,7 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& if (stereo) out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n"); + out.Write("\tfloat4 posout : SV_Position;\n"); out.Write("}};\n"); @@ -344,6 +348,7 @@ static void EmitVertex(ShaderCode& out, const ShaderHostConfig& host_config, else { out.Write("\tps.o = {};\n", vertex); + out.Write("\tps.posout = {}.pos;\n", vertex); } if (stereo) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 950763f5d8..11ac3a0c1a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -379,23 +379,10 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, "int3 iround(float3 x) {{ return int3(round(x)); }}\n" "int4 iround(float4 x) {{ return int4(round(x)); }}\n\n"); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n"); - } - else // D3D - { - // Declare samplers - out.Write("SamplerState samp[8] : register(s0);\n" - "\n" - "Texture2DArray tex[8] : register(t0);\n"); - } + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n"); out.Write("\n"); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"); - else - out.Write("cbuffer PSBlock : register(b0) {{\n"); + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"); out.Write("\tint4 " I_COLORS "[4];\n" "\tint4 " I_KCOLORS "[4];\n" @@ -445,10 +432,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, { out.Write("{}", s_lighting_struct); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); - else - out.Write("cbuffer VSBlock : register(b1) {{\n"); + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); out.Write("{}", s_shader_uniforms); out.Write("}};\n"); @@ -456,18 +440,9 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, if (bounding_box) { - if (api_type == APIType::D3D) - { - out.Write("globallycoherent RWBuffer bbox_data : register(u2);\n" - "#define atomicMin InterlockedMin\n" - "#define atomicMax InterlockedMax"); - } - else - { - out.Write("SSBO_BINDING(0) buffer BBox {{\n" - " int bbox_data[4];\n" - "}};"); - } + out.Write("SSBO_BINDING(0) buffer BBox {{\n" + " int bbox_data[4];\n" + "}};"); out.Write(R"( #define bbox_left bbox_data[0] @@ -535,24 +510,12 @@ void UpdateBoundingBox(float2 rawpos) {{ if (host_config.manual_texture_sampling) { - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write(R"( + out.Write(R"( int4 readTexture(in sampler2DArray tex, uint u, uint v, int layer, int lod) {{ return iround(texelFetch(tex, int3(u, v, layer), lod) * 255.0); }} int4 readTextureLinear(in sampler2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)"); - } - else if (api_type == APIType::D3D) - { - out.Write(R"( -int4 readTexture(in Texture2DArray tex, uint u, uint v, int layer, int lod) {{ - return iround(tex.Load(int4(u, v, layer, lod)) * 255.0); -}} - -int4 readTextureLinear(in Texture2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)"); - } out.Write(R"( int4 result = @@ -621,41 +584,26 @@ uint WrapCoord(int coord, uint wrap, int size) {{ } } - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write("\nint4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {{\n"); - } - else if (api_type == APIType::D3D) - { - out.Write("\nint4 sampleTexture(uint texmap, in Texture2DArray tex, in SamplerState tex_samp, " - "int2 uv, int layer) {{\n"); - } + out.Write("\nint4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {{\n"); if (!host_config.manual_texture_sampling) { out.Write(" float size_s = float(" I_TEXDIMS "[texmap].x * 128);\n" " float size_t = float(" I_TEXDIMS "[texmap].y * 128);\n" " float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer);\n"); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + if (!host_config.backend_sampler_lod_bias) { - if (!host_config.backend_sampler_lod_bias) - { - out.Write(" uint texmode0 = samp_texmode0(texmap);\n" - " float lod_bias = float({}) / 256.0f;\n" - " return iround(255.0 * texture(tex, coords, lod_bias));\n", - BitfieldExtract<&SamplerState::TM0::lod_bias>("texmode0")); - } - else - { - out.Write(" return iround(255.0 * texture(tex, coords));\n"); - } + out.Write(" uint texmode0 = samp_texmode0(texmap);\n" + " float lod_bias = float({}) / 256.0f;\n" + " return iround(255.0 * texture(tex, coords, lod_bias));\n", + BitfieldExtract<&SamplerState::TM0::lod_bias>("texmode0")); + } + else + { + out.Write(" return iround(255.0 * texture(tex, coords));\n"); + } - out.Write("}}\n"); - } - else if (api_type == APIType::D3D) - { - out.Write(" return iround(255.0 * tex.Sample(tex_samp, coords));\n}}\n"); - } + out.Write("}}\n"); } else { @@ -694,31 +642,20 @@ uint WrapCoord(int coord, uint wrap, int size) {{ int native_size_t = )" I_TEXDIMS R"([texmap].y; )"); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write(R"( + out.Write(R"( int3 size = textureSize(tex, 0); int size_s = size.x; int size_t = size.y; )"); - if (g_ActiveConfig.backend_info.bSupportsTextureQueryLevels) - { - out.Write(" int number_of_levels = textureQueryLevels(tex);\n"); - } - else - { - out.Write(" int number_of_levels = 256; // textureQueryLevels is not supported\n"); - ERROR_LOG_FMT(VIDEO, "textureQueryLevels is not supported! Odd graphical results may " - "occur if custom textures are in use!"); - } - } - else if (api_type == APIType::D3D) + if (g_ActiveConfig.backend_info.bSupportsTextureQueryLevels) { - ASSERT(g_ActiveConfig.backend_info.bSupportsTextureQueryLevels); - out.Write(R"( - int size_s, size_t, layers, number_of_levels; - tex.GetDimensions(0, size_s, size_t, layers, number_of_levels); -)"); + out.Write(" int number_of_levels = textureQueryLevels(tex);\n"); + } + else + { + out.Write(" int number_of_levels = 256; // textureQueryLevels is not supported\n"); + ERROR_LOG_FMT(VIDEO, "textureQueryLevels is not supported! Odd graphical results may " + "occur if custom textures are in use!"); } out.Write(R"( @@ -737,34 +674,23 @@ uint WrapCoord(int coord, uint wrap, int size) {{ )"); } - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives) { - if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives) - { - // The software renderer uses the equivalent of coarse derivatives, so use them here for - // consistency. This hasn't been hardware tested. - // Note that bSupportsCoarseDerivatives being false only means dFdxCoarse and dFdxFine don't - // exist. The GPU may still implement dFdx using coarse derivatives; we just don't have the - // ability to specifically require it. - out.Write(R"( + // The software renderer uses the equivalent of coarse derivatives, so use them here for + // consistency. This hasn't been hardware tested. + // Note that bSupportsCoarseDerivatives being false only means dFdxCoarse and dFdxFine don't + // exist. The GPU may still implement dFdx using coarse derivatives; we just don't have the + // ability to specifically require it. + out.Write(R"( float2 uv_delta_x = abs(dFdxCoarse(float2(uv))); float2 uv_delta_y = abs(dFdyCoarse(float2(uv))); )"); - } - else - { - out.Write(R"( + } + else + { + out.Write(R"( float2 uv_delta_x = abs(dFdx(float2(uv))); float2 uv_delta_y = abs(dFdy(float2(uv))); -)"); - } - } - else if (api_type == APIType::D3D) - { - ASSERT(g_ActiveConfig.backend_info.bSupportsCoarseDerivatives); - out.Write(R"( - float2 uv_delta_x = abs(ddx_coarse(float2(uv))); - float2 uv_delta_y = abs(ddy_coarse(float2(uv))); )"); } @@ -869,16 +795,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos WriteBitfieldExtractHeader(out, api_type, host_config); WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " - "sampleTexture(texmap, samp[texmap], uv, layer)\n"); - } - else if (api_type == APIType::D3D) - { - out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " - "sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer)\n"); - } + out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " + "sampleTexture(texmap, samp[texmap], uv, layer)\n"); if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ) { @@ -915,16 +833,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // all of the // ARB_image_load_store extension yet. - // D3D11 also has a way to force the driver to enable early-z, so we're fine here. - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - // This is a #define which signals whatever early-z method the driver supports. - out.Write("FORCE_EARLY_Z; \n"); - } - else - { - out.Write("[earlydepthstencil]\n"); - } + // This is a #define which signals whatever early-z method the driver supports. + out.Write("FORCE_EARLY_Z; \n"); } // Only use dual-source blending when required on drivers that don't support it very well. @@ -943,166 +853,119 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos use_shader_blend || use_shader_logic_op || DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { #ifdef __APPLE__ - // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) - // if we want to use it. - if (api_type == APIType::Vulkan) + // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) + // if we want to use it. + if (api_type == APIType::Vulkan) + { + if (use_dual_source) { - if (use_dual_source) - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", - use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - } - else - { - // Metal doesn't support a single unified variable for both input and output, - // so when using framebuffer fetch, we declare the input separately below. - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n", - use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - } - - if (use_framebuffer_fetch) - { - // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. - out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); - } - } - else -#endif - { - bool has_broken_decoration = - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION); - - out.Write("{} {} vec4 {};\n", - has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", - use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - - if (use_dual_source) - { - out.Write("{} out vec4 ocol1;\n", has_broken_decoration ? - "FRAGMENT_OUTPUT_LOCATION(1)" : - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)"); - } - } - - if (uid_data->per_pixel_depth) - out.Write("#define depth gl_FragDepth\n"); - - if (host_config.backend_geometry_shaders) - { - out.Write("VARYING_LOCATION(0) in VertexData {{\n"); - GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config, - GetInterpolationQualifier(msaa, ssaa, true, true)); - - if (stereo) - out.Write("\tflat int layer;\n"); - - out.Write("}};\n"); } else { - // Let's set up attributes - u32 counter = 0; - out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) - { - out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); - } - if (!host_config.fast_depth_calc) - { - out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - if (per_pixel_lighting) - { - out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } + // Metal doesn't support a single unified variable for both input and output, + // so when using framebuffer fetch, we declare the input separately below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); } - out.Write("void main()\n{{\n"); - out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); - if (use_framebuffer_fetch) { - // Store off a copy of the initial framebuffer value. - // - // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the - // framebuffer), we read from real_ocol0. - out.Write("#ifdef FB_FETCH_VALUE\n" - "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" - "#else\n" - "\tfloat4 initial_ocol0 = real_ocol0;\n" - "#endif\n"); - - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so we pull out the "real" output - // value above and use a temporary for calculations, then set the output value once at the - // end of the shader. - out.Write("\tfloat4 ocol0;\n"); - } - - if (use_shader_blend) - { - out.Write("\tfloat4 ocol1;\n"); + // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. + out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); } } - else // D3D + else +#endif { - out.Write("void main(\n"); - if (uid_data->uint_output) - { - out.Write(" out uint4 ocol0 : SV_Target,\n"); - } - else - { - out.Write(" out float4 ocol0 : SV_Target0,\n" - " out float4 ocol1 : SV_Target1,\n"); - } - out.Write("{}" - " in float4 rawpos : SV_Position,\n", - uid_data->per_pixel_depth ? " out float depth : SV_Depth,\n" : ""); + bool has_broken_decoration = + DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION); - out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa)); - out.Write(" in {} float4 colors_1 : COLOR1\n", GetInterpolationQualifier(msaa, ssaa)); + out.Write("{} {} {} {};\n", + has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", + use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", + uid_data->uint_output ? "uvec4" : "vec4", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - // compute window position if needed because binding semantic WPOS is not widely supported + if (use_dual_source) + { + out.Write("{} out {} ocol1;\n", + has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)", + uid_data->uint_output ? "uvec4" : "vec4"); + } + } + + if (uid_data->per_pixel_depth) + out.Write("#define depth gl_FragDepth\n"); + + if (host_config.backend_geometry_shaders) + { + out.Write("VARYING_LOCATION(0) in VertexData {{\n"); + GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config, + GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel); + + if (stereo) + out.Write("\tflat int layer;\n"); + + out.Write("}};\n"); + } + else + { + // Let's set up attributes + u32 counter = 0; + out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) { - out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i, - i); + out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); } if (!host_config.fast_depth_calc) { - out.Write(",\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - uid_data->genMode_numtexgens); + out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } if (per_pixel_lighting) { - out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - uid_data->genMode_numtexgens + 1); - out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - uid_data->genMode_numtexgens + 2); + out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } - if (host_config.backend_geometry_shaders) - { - out.Write(",\n in float clipDist0 : SV_ClipDistance0\n" - ",\n in float clipDist1 : SV_ClipDistance1\n"); - } - if (stereo) - out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); - out.Write(" ) {{\n"); + } + + out.Write("void main()\n{{\n"); + out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); + + if (use_framebuffer_fetch) + { + // Store off a copy of the initial framebuffer value. + // + // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the + // framebuffer), we read from real_ocol0. + out.Write("#ifdef FB_FETCH_VALUE\n" + "\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" + "#else\n" + "\tfloat4 initial_ocol0 = real_ocol0;\n" + "#endif\n"); + + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader. + out.Write("\tfloat4 ocol0;\n"); + } + + if (use_shader_blend) + { + out.Write("\tfloat4 ocol1;\n"); } if (!stereo) out.Write("\tint layer = 0;\n"); diff --git a/Source/Core/VideoCommon/PostProcessing.cpp b/Source/Core/VideoCommon/PostProcessing.cpp index d9f7a8e8c6..72ee15a776 100644 --- a/Source/Core/VideoCommon/PostProcessing.cpp +++ b/Source/Core/VideoCommon/PostProcessing.cpp @@ -441,10 +441,7 @@ std::string PostProcessing::GetUniformBufferHeader() const { std::ostringstream ss; u32 unused_counter = 1; - if (g_ActiveConfig.backend_info.api_type == APIType::D3D) - ss << "cbuffer PSBlock : register(b0) {\n"; - else - ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; + ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; // Builtin uniforms ss << " float4 resolution;\n"; @@ -499,42 +496,20 @@ std::string PostProcessing::GetHeader() const { std::ostringstream ss; ss << GetUniformBufferHeader(); - if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"; + + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - ss << "Texture2DArray samp0 : register(t0);\n"; - ss << "SamplerState samp0_ss : register(s0);\n"; + ss << "VARYING_LOCATION(0) in VertexData {\n"; + ss << " float3 v_tex0;\n"; + ss << "};\n"; } else { - ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"; - - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) - { - ss << "VARYING_LOCATION(0) in VertexData {\n"; - ss << " float3 v_tex0;\n"; - ss << "};\n"; - } - else - { - ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; - } - - ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; + ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; } - // Rename main, since we need to set up globals - if (g_ActiveConfig.backend_info.api_type == APIType::D3D) - { - ss << R"( -#define main real_main -static float3 v_tex0; -static float4 ocol0; - -// Wrappers for sampling functions. -#define texture(sampler, coords) sampler.Sample(sampler##_ss, coords) -#define textureOffset(sampler, coords, offset) sampler.Sample(sampler##_ss, coords, offset) -)"; - } + ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; ss << R"( float4 Sample() { return texture(samp0, v_tex0); } @@ -591,22 +566,7 @@ void SetOutput(float4 color) std::string PostProcessing::GetFooter() const { - if (g_ActiveConfig.backend_info.api_type == APIType::D3D) - { - return R"( - -#undef main -void main(in float3 v_tex0_ : TEXCOORD0, out float4 ocol0_ : SV_Target) -{ - v_tex0 = v_tex0_; - real_main(); - ocol0_ = ocol0; -})"; - } - else - { - return {}; - } + return {}; } bool PostProcessing::CompileVertexShader() @@ -614,28 +574,20 @@ bool PostProcessing::CompileVertexShader() std::ostringstream ss; ss << GetUniformBufferHeader(); - if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - ss << "void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"; - ss << " out float4 opos : SV_Position) {\n"; + ss << "VARYING_LOCATION(0) out VertexData {\n"; + ss << " float3 v_tex0;\n"; + ss << "};\n"; } else { - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) - { - ss << "VARYING_LOCATION(0) out VertexData {\n"; - ss << " float3 v_tex0;\n"; - ss << "};\n"; - } - else - { - ss << "VARYING_LOCATION(0) out float3 v_tex0;\n"; - } - - ss << "#define id gl_VertexID\n"; - ss << "#define opos gl_Position\n"; - ss << "void main() {\n"; + ss << "VARYING_LOCATION(0) out float3 v_tex0;\n"; } + + ss << "#define id gl_VertexID\n"; + ss << "#define opos gl_Position\n"; + ss << "void main() {\n"; ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; ss << " v_tex0 = float3(src_rect.xy + (src_rect.zw * v_tex0.xy), float(src_layer));\n"; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 0a965e6f62..511643e83a 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -93,20 +93,7 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool void WriteIsNanHeader(ShaderCode& out, APIType api_type) { - if (api_type == APIType::D3D) - { - out.Write("bool dolphin_isnan(float f) {{\n" - " // Workaround for the HLSL compiler deciding that isnan can never be true and\n" - " // optimising away the call, even though the value can actually be NaN\n" - " // Just look for the bit pattern that indicates NaN instead\n" - " return (asint(f) & 0x7FFFFFFF) > 0x7F800000;\n" - "}}\n\n"); - // If isfinite is needed, (asint(f) & 0x7F800000) != 0x7F800000 can be used - } - else - { - out.Write("#define dolphin_isnan(f) isnan(f)\n"); - } + out.Write("#define dolphin_isnan(f) isnan(f)\n"); } void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type, @@ -135,14 +122,15 @@ void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type, static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string_view qualifier, std::string_view type, std::string_view name, int var_index, - std::string_view semantic = {}, int semantic_index = -1) + ShaderStage stage, std::string_view semantic = {}, + int semantic_index = -1) { object.Write("\t{} {} {}", qualifier, type, name); if (var_index != -1) object.Write("{}", var_index); - if (api_type == APIType::D3D && !semantic.empty()) + if (api_type == APIType::D3D && !semantic.empty() && stage == ShaderStage::Geometry) { if (semantic_index != -1) object.Write(" : {}{}", semantic, semantic_index); @@ -154,30 +142,83 @@ static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string } void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens, - const ShaderHostConfig& host_config, std::string_view qualifier) + const ShaderHostConfig& host_config, std::string_view qualifier, + ShaderStage stage) { - DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "SV_Position"); - DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0); - DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1); - - for (unsigned int i = 0; i < texgens; ++i) - DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, "TEXCOORD", i); - - if (!host_config.fast_depth_calc) - DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, "TEXCOORD", texgens); - - if (host_config.per_pixel_lighting) + // SPIRV-Cross names all semantics as "TEXCOORD" + // Unfortunately Geometry shaders (which also uses this function) + // aren't supported. The output semantic name needs to match + // up with the input semantic name for both the next stage (pixel shader) + // and the previous stage (vertex shader), so + // we need to handle geometry in a special way... + if (api_type == APIType::D3D && stage == ShaderStage::Geometry) { - DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, "TEXCOORD", - texgens + 1); - DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD", - texgens + 2); + DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, stage, "TEXCOORD", 0); + DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, stage, "TEXCOORD", 1); + DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, stage, "TEXCOORD", 2); + + const unsigned int index_base = 3; + unsigned int index_offset = 0; + if (host_config.backend_geometry_shaders) + { + DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, stage, "TEXCOORD", + index_base + index_offset); + DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, stage, "TEXCOORD", + index_base + index_offset + 1); + index_offset += 2; + } + + for (unsigned int i = 0; i < texgens; ++i) + { + DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, stage, "TEXCOORD", + index_base + index_offset + i); + } + index_offset += texgens; + + if (!host_config.fast_depth_calc) + { + DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, stage, "TEXCOORD", + index_base + index_offset); + index_offset++; + } + + if (host_config.per_pixel_lighting) + { + DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, stage, "TEXCOORD", + index_base + index_offset); + DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, stage, "TEXCOORD", + index_base + index_offset + 1); + index_offset += 2; + } } - - if (host_config.backend_geometry_shaders) + else { - DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, "SV_ClipDistance", 0); - DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, "SV_ClipDistance", 1); + DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, stage, "SV_Position"); + DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, stage, "COLOR", 0); + DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, stage, "COLOR", 1); + + if (host_config.backend_geometry_shaders) + { + DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, stage, + "SV_ClipDistance", 0); + DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, stage, + "SV_ClipDistance", 1); + } + + for (unsigned int i = 0; i < texgens; ++i) + DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, stage, "TEXCOORD", i); + + if (!host_config.fast_depth_calc) + DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, stage, "TEXCOORD", + texgens); + + if (host_config.per_pixel_lighting) + { + DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, stage, "TEXCOORD", + texgens + 1); + DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, stage, "TEXCOORD", + texgens + 2); + } } } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 88fb5c7353..1cbff2bfcf 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -18,6 +18,7 @@ #include "Common/StringUtil.h" #include "Common/TypeUtils.h" +#include "VideoCommon/AbstractShader.h" #include "VideoCommon/VideoCommon.h" /** @@ -189,7 +190,8 @@ void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type, const ShaderHostConfig& host_config); void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens, - const ShaderHostConfig& host_config, std::string_view qualifier); + const ShaderHostConfig& host_config, std::string_view qualifier, + ShaderStage stage); void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens, const ShaderHostConfig& host_config); @@ -220,57 +222,34 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, const Common::EnumMap& values, int indent, bool break_) { - const bool make_switch = (ApiType == APIType::D3D); - // The second template argument is needed to avoid compile errors from ambiguity with multiple // enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW // and https://godbolt.org/z/hz7Yqq1P5 using enum_type = decltype(last_member); - // {:{}} is used to indent by formatting an empty string with a variable width - if (make_switch) - { - out.Write("{:{}}switch ({}) {{\n", "", indent, variable); - for (u32 i = 0; i <= static_cast(last_member); i++) + // Generate a tree of if statements recursively + // std::function must be used because auto won't capture before initialization and thus can't be + // used recursively + std::function BuildTree = [&](u32 cur_indent, u32 low, u32 high) { + // Each generated statement is for low <= x < high + if (high == low + 1) { - const enum_type key = static_cast(i); - - // Assumes existence of an EnumFormatter - out.Write("{:{}}case {:s}:\n", "", indent, key); + // Down to 1 case (low <= x < low + 1 means x == low) + const enum_type key = static_cast(low); // Note that this indentation behaves poorly for multi-line code - if (!values[key].empty()) - out.Write("{:{}} {}\n", "", indent, values[key]); - if (break_) - out.Write("{:{}} break;\n", "", indent); + out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key); } - out.Write("{:{}}}}\n", "", indent); - } - else - { - // Generate a tree of if statements recursively - // std::function must be used because auto won't capture before initialization and thus can't be - // used recursively - std::function BuildTree = [&](u32 cur_indent, u32 low, u32 high) { - // Each generated statement is for low <= x < high - if (high == low + 1) - { - // Down to 1 case (low <= x < low + 1 means x == low) - const enum_type key = static_cast(low); - // Note that this indentation behaves poorly for multi-line code - out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key); - } - else - { - u32 mid = low + ((high - low) / 2); - out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid); - BuildTree(cur_indent + 2, low, mid); - out.Write("{:{}}}} else {{\n", "", cur_indent); - BuildTree(cur_indent + 2, mid, high); - out.Write("{:{}}}}\n", "", cur_indent); - } - }; - BuildTree(indent, 0, static_cast(last_member) + 1); - } + else + { + u32 mid = low + ((high - low) / 2); + out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid); + BuildTree(cur_indent + 2, low, mid); + out.Write("{:{}}}} else {{\n", "", cur_indent); + BuildTree(cur_indent + 2, mid, high); + out.Write("{:{}}}}\n", "", cur_indent); + } + }; + BuildTree(indent, 0, static_cast(last_member) + 1); } // Constant variable names diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 19f2a384f0..dd87dfe0af 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -56,48 +56,27 @@ u16 GetEncodedSampleCount(EFBCopyFormat format) static void WriteHeader(ShaderCode& code, APIType api_type) { - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + // left, top, of source rectangle within source texture + // width of the destination rectangle, scale_factor (1 or 2) + code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" + " int4 position;\n" + " float y_scale;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float3 filter_coefficients;\n" + "}};\n"); + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - // left, top, of source rectangle within source texture - // width of the destination rectangle, scale_factor (1 or 2) - code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" - " int4 position;\n" - " float y_scale;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float3 filter_coefficients;\n" + code.Write("VARYING_LOCATION(0) in VertexData {{\n" + " float3 v_tex0;\n" "}};\n"); - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) - { - code.Write("VARYING_LOCATION(0) in VertexData {{\n" - " float3 v_tex0;\n" - "}};\n"); - } - else - { - code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n"); - } - code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" - "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"); } - else // D3D + else { - code.Write("cbuffer PSBlock : register(b0) {{\n" - " int4 position;\n" - " float y_scale;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float3 filter_coefficients;\n" - "}};\n" - "sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n"); + code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n"); } - - // D3D does not have roundEven(), only round(), which is specified "to the nearest integer". - // This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL - // we need to use roundEven(). - if (api_type == APIType::D3D) - code.Write("#define roundEven(x) round(x)\n"); + code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" + "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"); // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel. code.Write("float4 RGBA8ToRGB8(float4 src)\n" @@ -149,10 +128,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A code.Write("("); } - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - code.Write("texture(samp0, float3("); - else - code.Write("Tex0.Sample(samp0, float3("); + code.Write("texture(samp0, float3("); code.Write("uv.x + float(xoffset) * pixel_size.x, "); @@ -211,23 +187,10 @@ static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopy WriteHeader(code, api_type); WriteSampleFunction(code, params, api_type); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - code.Write("void main()\n" - "{{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n"); - } - else // D3D - { - code.Write("void main(\n" - " in float3 v_tex0 : TEXCOORD0,\n" - " in float4 rawpos : SV_Position,\n" - " out float4 ocol0 : SV_Target)\n" - "{{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(rawpos.xy);\n"); - } + code.Write("void main()\n" + "{{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n"); const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); @@ -853,11 +816,7 @@ static const char decoding_shader_header[] = R"( #define HAS_PALETTE 1 #endif -#ifdef API_D3D -cbuffer UBO : register(b0) { -#else UBO_BINDING(std140, 1) uniform UBO { -#endif uint2 u_dst_size; uint2 u_src_size; uint u_src_offset; @@ -865,37 +824,6 @@ UBO_BINDING(std140, 1) uniform UBO { uint u_palette_offset; }; -#ifdef API_D3D - -Buffer s_input_buffer : register(t0); -#ifdef HAS_PALETTE -Buffer s_palette_buffer : register(t1); -#endif - -RWTexture2DArray output_image : register(u0); - -// Helpers for reading/writing. -#define texelFetch(buffer, pos) buffer.Load(pos) -#define imageStore(image, coords, value) image[coords] = value -#define GROUP_MEMORY_BARRIER_WITH_SYNC GroupMemoryBarrierWithGroupSync(); -#define GROUP_SHARED groupshared - -#define DEFINE_MAIN(lx, ly) \ - [numthreads(lx, ly, 1)] \ - void main(uint3 gl_WorkGroupID : SV_GroupId, \ - uint3 gl_LocalInvocationID : SV_GroupThreadID, \ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID) - -uint bitfieldExtract(uint val, int off, int size) -{ - // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n" - // Microsoft's HLSL compiler automatically optimises this to a bitfield extract instruction. - uint mask = uint((1 << size) - 1); - return uint(val >> off) & mask; -} - -#else - TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; #ifdef HAS_PALETTE TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; @@ -909,8 +837,6 @@ IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; layout(local_size_x = lx, local_size_y = ly) in; \ void main() -#endif - uint Swap16(uint v) { // Convert BE to LE. @@ -1498,48 +1424,29 @@ float4 DecodePixel(int val) ss << "\n"; - if (api_type == APIType::D3D) - { - ss << "Buffer tex0 : register(t0);\n"; - ss << "Texture2DArray tex1 : register(t1);\n"; - ss << "SamplerState samp1 : register(s1);\n"; - ss << "cbuffer PSBlock : register(b0) {\n"; - } - else - { - ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; - ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"; - ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; - } + ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; + ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"; + ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; ss << " float multiplier;\n"; ss << " int texel_buffer_offset;\n"; ss << "};\n"; - if (api_type == APIType::D3D) + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - ss << "void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target) {\n"; - ss << " int src = int(round(tex1.Sample(samp1, v_tex0).r * multiplier));\n"; - ss << " src = int(tex0.Load(src + texel_buffer_offset).r);\n"; + ss << "VARYING_LOCATION(0) in VertexData {\n"; + ss << " float3 v_tex0;\n"; + ss << "};\n"; } else { - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) - { - ss << "VARYING_LOCATION(0) in VertexData {\n"; - ss << " float3 v_tex0;\n"; - ss << "};\n"; - } - else - { - ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; - } - ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; - ss << "void main() {\n"; - ss << " float3 coords = v_tex0;\n"; - ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n"; - ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; + ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; } + ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; + ss << "void main() {\n"; + ss << " float3 coords = v_tex0;\n"; + ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n"; + ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n"; ss << " ocol0 = DecodePixel(src);\n"; diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index c5a440b801..5b6c027c70 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -29,26 +29,13 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i static void WriteHeader(APIType api_type, ShaderCode& out) { - if (api_type == APIType::D3D) - { - out.Write("cbuffer PSBlock : register(b0) {{\n" - " float2 src_offset, src_size;\n" - " float3 filter_coefficients;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float pixel_height;\n" - "}};\n\n"); - } - else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" - " float2 src_offset, src_size;\n" - " float3 filter_coefficients;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float pixel_height;\n" - "}};\n"); - } + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" + " float2 src_offset, src_size;\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "}};\n"); } ShaderCode GenerateVertexShader(APIType api_type) @@ -56,27 +43,19 @@ ShaderCode GenerateVertexShader(APIType api_type) ShaderCode out; WriteHeader(api_type, out); - if (api_type == APIType::D3D) + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n" - " out float4 opos : SV_Position) {{\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n" + " float3 v_tex0;\n" + "}};\n"); } - else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + else { - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) - { - out.Write("VARYING_LOCATION(0) out VertexData {{\n" - " float3 v_tex0;\n" - "}};\n"); - } - else - { - out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n"); - } - out.Write("#define id gl_VertexID\n" - "#define opos gl_Position\n" - "void main() {{\n"); + out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n"); } + out.Write("#define id gl_VertexID\n" + "#define opos gl_Position\n" + "void main() {{\n"); out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"); out.Write( " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"); @@ -98,38 +77,24 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) ShaderCode out; WriteHeader(api_type, out); - if (api_type == APIType::D3D) + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n" + " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), {}));\n" + "}}\n", + mono_depth ? "0.0" : "uv.z"); + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - out.Write("Texture2DArray tex0 : register(t0);\n" - "SamplerState samp0 : register(s0);\n" - "float4 SampleEFB(float3 uv, float y_offset) {{\n" - " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), {}));\n" - "}}\n\n", - mono_depth ? "0.0" : "uv.z"); - out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n"); + out.Write("VARYING_LOCATION(0) in VertexData {{\n" + " float3 v_tex0;\n" + "}};\n"); } - else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + else { - out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n" - " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), {}));\n" - "}}\n", - mono_depth ? "0.0" : "uv.z"); - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) - { - out.Write("VARYING_LOCATION(0) in VertexData {{\n" - " float3 v_tex0;\n" - "}};\n"); - } - else - { - out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n"); - } - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" - "void main()\n{{\n"); + out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n"); } + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" + "void main()\n{{\n"); // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. diff --git a/Source/Core/VideoCommon/UberShaderCommon.cpp b/Source/Core/VideoCommon/UberShaderCommon.cpp index 4a8fba8839..b9c1306f8b 100644 --- a/Source/Core/VideoCommon/UberShaderCommon.cpp +++ b/Source/Core/VideoCommon/UberShaderCommon.cpp @@ -77,8 +77,7 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view wor std::string_view out_color_1_var) { out.Write("// Lighting\n"); - out.Write("{}for (uint chan = 0u; chan < {}u; chan++) {{\n", - api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS); + out.Write("for (uint chan = 0u; chan < {}u; chan++) {{\n", NUM_XF_COLOR_CHANNELS); out.Write(" uint colorreg = xfmem_color(chan);\n" " uint alphareg = xfmem_alpha(chan);\n" " int4 mat = " I_MATERIALS "[chan + 2u]; \n" diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index a96b15c83d..bfbe1a08f2 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -72,93 +72,89 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, if (per_pixel_lighting) WriteLightingFunction(out); - // Shader inputs/outputs in GLSL (HLSL is in main). - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { #ifdef __APPLE__ - // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) - // if we want to use it. - if (api_type == APIType::Vulkan) + // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) + // if we want to use it. + if (api_type == APIType::Vulkan) + { + if (use_dual_source) { - if (use_dual_source) - { - out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", - use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - } - else - { - // Metal doesn't support a single unified variable for both input and output, - // so when using framebuffer fetch, we declare the input separately below. - out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n", - use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - } - - if (use_framebuffer_fetch) - { - // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. - out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); - } - } - else -#endif - { - bool has_broken_decoration = - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION); - - out.Write("{} {} vec4 {};\n", - has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", - use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", use_framebuffer_fetch ? "real_ocol0" : "ocol0"); - - if (use_dual_source) - { - out.Write("{} out vec4 ocol1;\n", has_broken_decoration ? - "FRAGMENT_OUTPUT_LOCATION(1)" : - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)"); - } - } - - if (per_pixel_depth) - out.Write("#define depth gl_FragDepth\n"); - - if (host_config.backend_geometry_shaders) - { - out.Write("VARYING_LOCATION(0) in VertexData {{\n"); - GenerateVSOutputMembers(out, api_type, numTexgen, host_config, - GetInterpolationQualifier(msaa, ssaa, true, true)); - - if (stereo) - out.Write(" flat int layer;\n"); - - out.Write("}};\n\n"); } else { - // Let's set up attributes - u32 counter = 0; - out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, + // Metal doesn't support a single unified variable for both input and output, + // so when using framebuffer fetch, we declare the input separately below. + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); + } + + if (use_framebuffer_fetch) + { + // Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross. + out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n"); + } + } + else +#endif + { + bool has_broken_decoration = + DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION); + + out.Write("{} {} vec4 {};\n", + has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)", + use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out", + use_framebuffer_fetch ? "real_ocol0" : "ocol0"); + + if (use_dual_source) + { + out.Write("{} out vec4 ocol1;\n", has_broken_decoration ? + "FRAGMENT_OUTPUT_LOCATION(1)" : + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)"); + } + } + + if (per_pixel_depth) + out.Write("#define depth gl_FragDepth\n"); + + if (host_config.backend_geometry_shaders) + { + out.Write("VARYING_LOCATION(0) in VertexData {{\n"); + GenerateVSOutputMembers(out, api_type, numTexgen, host_config, + GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel); + + if (stereo) + out.Write(" flat int layer;\n"); + + out.Write("}};\n\n"); + } + else + { + // Let's set up attributes + u32 counter = 0; + out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + for (u32 i = 0; i < numTexgen; ++i) + { + out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); + } + if (!host_config.fast_depth_calc) + { + out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, + } + if (per_pixel_lighting) + { + out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, GetInterpolationQualifier(msaa, ssaa)); - for (u32 i = 0; i < numTexgen; ++i) - { - out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); - } - if (!host_config.fast_depth_calc) - { - out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - if (per_pixel_lighting) - { - out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } } } @@ -243,10 +239,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // Doesn't look like DirectX supports this. Oh well the code path is here just in case it // supports this in the future. out.Write("int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {{\n"); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n"); - else if (api_type == APIType::D3D) - out.Write(" return sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer);\n"); + out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n"); out.Write("}}\n\n"); } else @@ -259,15 +252,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " switch(sampler_num) {{\n"); for (int i = 0; i < 8; i++) { - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - { - out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i); - } - else if (api_type == APIType::D3D) - { - out.Write(" case {0}u: return sampleTexture({0}u, tex[{0}u], samp[{0}u], uv, layer);\n", - i); - } + out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i); } out.Write(" }}\n" "}}\n\n"); @@ -522,82 +507,34 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write(")\n\n"); } - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + if (early_depth && host_config.backend_early_z) + out.Write("FORCE_EARLY_Z;\n"); + + out.Write("void main()\n{{\n"); + out.Write(" float4 rawpos = gl_FragCoord;\n"); + + if (use_framebuffer_fetch) { - if (early_depth && host_config.backend_early_z) - out.Write("FORCE_EARLY_Z;\n"); + // Store off a copy of the initial framebuffer value. + // + // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the + // framebuffer), we read from real_ocol0. + out.Write("#ifdef FB_FETCH_VALUE\n" + " float4 initial_ocol0 = FB_FETCH_VALUE;\n" + "#else\n" + " float4 initial_ocol0 = real_ocol0;\n" + "#endif\n"); - out.Write("void main()\n{{\n"); - out.Write(" float4 rawpos = gl_FragCoord;\n"); - - if (use_framebuffer_fetch) - { - // Store off a copy of the initial framebuffer value. - // - // If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the - // framebuffer), we read from real_ocol0. - out.Write("#ifdef FB_FETCH_VALUE\n" - " float4 initial_ocol0 = FB_FETCH_VALUE;\n" - "#else\n" - " float4 initial_ocol0 = real_ocol0;\n" - "#endif\n"); - - // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an - // intermediate value with multiple reads & modifications, so we pull out the "real" output - // value above and use a temporary for calculations, then set the output value once at the - // end of the shader. - out.Write(" float4 ocol0;\n"); - } - - if (use_shader_blend) - { - out.Write(" float4 ocol1;\n"); - } + // QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an + // intermediate value with multiple reads & modifications, so we pull out the "real" output + // value above and use a temporary for calculations, then set the output value once at the + // end of the shader. + out.Write(" float4 ocol0;\n"); } - else // D3D + + if (use_shader_blend) { - if (early_depth && host_config.backend_early_z) - out.Write("[earlydepthstencil]\n"); - - out.Write("void main(\n"); - if (uid_data->uint_output) - { - out.Write(" out uint4 ocol0 : SV_Target,\n"); - } - else - { - out.Write(" out float4 ocol0 : SV_Target0,\n" - " out float4 ocol1 : SV_Target1,\n"); - } - if (per_pixel_depth) - out.Write(" out float depth : SV_Depth,\n"); - out.Write(" in float4 rawpos : SV_Position,\n"); - out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa)); - out.Write(" in {} float4 colors_1 : COLOR1", GetInterpolationQualifier(msaa, ssaa)); - - // compute window position if needed because binding semantic WPOS is not widely supported - for (u32 i = 0; i < numTexgen; ++i) - { - out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i, - i); - } - if (!host_config.fast_depth_calc) - { - out.Write("\n,\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - numTexgen); - } - if (per_pixel_lighting) - { - out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - numTexgen + 1); - out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - numTexgen + 2); - } - out.Write(",\n in float clipDist0 : SV_ClipDistance0\n" - ",\n in float clipDist1 : SV_ClipDistance1\n"); - if (stereo) - out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); - out.Write("\n ) {{\n"); + out.Write(" float4 ocol1;\n"); } if (!stereo) out.Write(" int layer = 0;\n"); @@ -634,11 +571,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode")); out.Write(" // Main tev loop\n"); - if (api_type == APIType::D3D) - { - // Tell DirectX we don't want this loop unrolled (it crashes if it tries to) - out.Write(" [loop]\n"); - } out.Write(" for(uint stage = 0u; stage <= num_stages; stage++)\n" " {{\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 3f05400a46..67dd84bb7f 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -38,85 +38,65 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config out.Write("{}", s_lighting_struct); // uniforms - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); - else - out.Write("cbuffer VSBlock {{\n"); + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); out.Write("{}", s_shader_uniforms); out.Write("}};\n"); out.Write("struct VS_OUTPUT {{\n"); - GenerateVSOutputMembers(out, api_type, num_texgen, host_config, ""); + GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "", ShaderStage::Vertex); out.Write("}};\n\n"); WriteIsNanHeader(out, api_type); WriteBitfieldExtractHeader(out, api_type, host_config); WriteLightingFunction(out); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + for (int i = 0; i < 8; ++i) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); + + if (host_config.backend_geometry_shaders) { - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); - for (int i = 0; i < 8; ++i) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); - - if (host_config.backend_geometry_shaders) - { - out.Write("VARYING_LOCATION(0) out VertexData {{\n"); - GenerateVSOutputMembers(out, api_type, num_texgen, host_config, - GetInterpolationQualifier(msaa, ssaa, true, false)); - out.Write("}} vs;\n"); - } - else - { - // Let's set up attributes - u32 counter = 0; - out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - for (u32 i = 0; i < num_texgen; ++i) - { - out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); - } - if (!host_config.fast_depth_calc) - { - out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - if (per_pixel_lighting) - { - out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - } - - out.Write("void main()\n{{\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n"); + GenerateVSOutputMembers(out, api_type, num_texgen, host_config, + GetInterpolationQualifier(msaa, ssaa, true, false), + ShaderStage::Vertex); + out.Write("}} vs;\n"); } - else // D3D + else { - out.Write("VS_OUTPUT main(\n"); - - // inputs - out.Write(" float3 rawnormal : NORMAL,\n" - " float3 rawtangent : TANGENT,\n" - " float3 rawbinormal : BINORMAL,\n" - " float4 rawcolor0 : COLOR0,\n" - " float4 rawcolor1 : COLOR1,\n"); - for (int i = 0; i < 8; ++i) - out.Write(" float3 rawtex{} : TEXCOORD{},\n", i, i); - out.Write(" uint posmtx : BLENDINDICES,\n"); - out.Write(" float4 rawpos : POSITION) {{\n"); + // Let's set up attributes + u32 counter = 0; + out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + for (u32 i = 0; i < num_texgen; ++i) + { + out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); + } + if (!host_config.fast_depth_calc) + { + out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + } + if (per_pixel_lighting) + { + out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + } } + out.Write("void main()\n{{\n"); + out.Write("VS_OUTPUT o;\n" "\n"); @@ -335,45 +315,38 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "}}\n"); } - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + if (host_config.backend_geometry_shaders) { - if (host_config.backend_geometry_shaders) - { - AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config); - } - else - { - // TODO: Pass interface blocks between shader stages even if geometry shaders - // are not supported, however that will require at least OpenGL 3.2 support. - for (u32 i = 0; i < num_texgen; ++i) - out.Write("tex{}.xyz = o.tex{};\n", i, i); - if (!host_config.fast_depth_calc) - out.Write("clipPos = o.clipPos;\n"); - if (per_pixel_lighting) - { - out.Write("Normal = o.Normal;\n" - "WorldPos = o.WorldPos;\n"); - } - out.Write("colors_0 = o.colors_0;\n" - "colors_1 = o.colors_1;\n"); - } - - if (host_config.backend_depth_clamp) - { - out.Write("gl_ClipDistance[0] = clipDist0;\n" - "gl_ClipDistance[1] = clipDist1;\n"); - } - - // Vulkan NDC space has Y pointing down (right-handed NDC space). - if (api_type == APIType::Vulkan) - out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); - else - out.Write("gl_Position = o.pos;\n"); + AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config); } - else // D3D + else { - out.Write("return o;\n"); + // TODO: Pass interface blocks between shader stages even if geometry shaders + // are not supported, however that will require at least OpenGL 3.2 support. + for (u32 i = 0; i < num_texgen; ++i) + out.Write("tex{}.xyz = o.tex{};\n", i, i); + if (!host_config.fast_depth_calc) + out.Write("clipPos = o.clipPos;\n"); + if (per_pixel_lighting) + { + out.Write("Normal = o.Normal;\n" + "WorldPos = o.WorldPos;\n"); + } + out.Write("colors_0 = o.colors_0;\n" + "colors_1 = o.colors_1;\n"); } + + if (host_config.backend_depth_clamp) + { + out.Write("gl_ClipDistance[0] = clipDist0;\n" + "gl_ClipDistance[1] = clipDist1;\n"); + } + + // Vulkan NDC space has Y pointing down (right-handed NDC space). + if (api_type == APIType::Vulkan) + out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + else + out.Write("gl_Position = o.pos;\n"); out.Write("}}\n"); return out; @@ -393,8 +366,7 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& } else { - out.Write("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", - api_type == APIType::D3D ? "[loop] " : "", num_texgen); + out.Write("for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", num_texgen); } out.Write(" // Texcoord transforms\n"); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 9fa2d6cd0e..2eb5a53a97 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -86,110 +86,80 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("{}", s_lighting_struct); // uniforms - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); - else - out.Write("cbuffer VSBlock {{\n"); + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); out.Write("{}", s_shader_uniforms); out.Write("}};\n"); out.Write("struct VS_OUTPUT {{\n"); - GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, ""); + GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "", + ShaderStage::Vertex); out.Write("}};\n\n"); WriteIsNanHeader(out, api_type); - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + if ((uid_data->components & VB_HAS_TANGENT) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); + + if ((uid_data->components & VB_HAS_COL0) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + if ((uid_data->components & VB_HAS_COL1) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + + for (u32 i = 0; i < 8; ++i) { - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - if ((uid_data->components & VB_HAS_NORMAL) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); - if ((uid_data->components & VB_HAS_TANGENT) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); - if ((uid_data->components & VB_HAS_BINORMAL) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); + const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); - if ((uid_data->components & VB_HAS_COL0) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - if ((uid_data->components & VB_HAS_COL1) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); - - for (u32 i = 0; i < 8; ++i) + if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) { - const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); - - if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) - { - out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, - has_texmtx != 0 ? 3 : 2, i); - } + out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, + has_texmtx != 0 ? 3 : 2, i); } - - if (host_config.backend_geometry_shaders) - { - out.Write("VARYING_LOCATION(0) out VertexData {{\n"); - GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, - GetInterpolationQualifier(msaa, ssaa, true, false)); - out.Write("}} vs;\n"); - } - else - { - // Let's set up attributes - u32 counter = 0; - out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - for (u32 i = 0; i < uid_data->numTexGens; ++i) - { - out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); - } - if (!host_config.fast_depth_calc) - { - out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - if (per_pixel_lighting) - { - out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - } - } - - out.Write("void main()\n{{\n"); } - else // D3D + + if (host_config.backend_geometry_shaders) { - out.Write("VS_OUTPUT main(\n"); - - // inputs - if ((uid_data->components & VB_HAS_NORMAL) != 0) - out.Write(" float3 rawnormal : NORMAL,\n"); - if ((uid_data->components & VB_HAS_TANGENT) != 0) - out.Write(" float3 rawtangent : TANGENT,\n"); - if ((uid_data->components & VB_HAS_BINORMAL) != 0) - out.Write(" float3 rawbinormal : BINORMAL,\n"); - if ((uid_data->components & VB_HAS_COL0) != 0) - out.Write(" float4 rawcolor0 : COLOR0,\n"); - if ((uid_data->components & VB_HAS_COL1) != 0) - out.Write(" float4 rawcolor1 : COLOR1,\n"); - for (u32 i = 0; i < 8; ++i) - { - const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); - - if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) - out.Write(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i); - } - if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) - out.Write(" uint4 posmtx : BLENDINDICES,\n"); - out.Write(" float4 rawpos : POSITION) {{\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n"); + GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, + GetInterpolationQualifier(msaa, ssaa, true, false), + ShaderStage::Vertex); + out.Write("}} vs;\n"); } + else + { + // Let's set up attributes + u32 counter = 0; + out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + for (u32 i = 0; i < uid_data->numTexGens; ++i) + { + out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); + } + if (!host_config.fast_depth_calc) + { + out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + } + if (per_pixel_lighting) + { + out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + } + } + + out.Write("void main()\n{{\n"); out.Write("VS_OUTPUT o;\n"); @@ -548,45 +518,38 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho "}}\n"); } - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + if (host_config.backend_geometry_shaders) { - if (host_config.backend_geometry_shaders) - { - AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, host_config); - } - else - { - // TODO: Pass interface blocks between shader stages even if geometry shaders - // are not supported, however that will require at least OpenGL 3.2 support. - for (u32 i = 0; i < uid_data->numTexGens; ++i) - out.Write("tex{}.xyz = o.tex{};\n", i, i); - if (!host_config.fast_depth_calc) - out.Write("clipPos = o.clipPos;\n"); - if (per_pixel_lighting) - { - out.Write("Normal = o.Normal;\n" - "WorldPos = o.WorldPos;\n"); - } - out.Write("colors_0 = o.colors_0;\n" - "colors_1 = o.colors_1;\n"); - } - - if (host_config.backend_depth_clamp) - { - out.Write("gl_ClipDistance[0] = clipDist0;\n" - "gl_ClipDistance[1] = clipDist1;\n"); - } - - // Vulkan NDC space has Y pointing down (right-handed NDC space). - if (api_type == APIType::Vulkan) - out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); - else - out.Write("gl_Position = o.pos;\n"); + AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, host_config); } - else // D3D + else { - out.Write("return o;\n"); + // TODO: Pass interface blocks between shader stages even if geometry shaders + // are not supported, however that will require at least OpenGL 3.2 support. + for (u32 i = 0; i < uid_data->numTexGens; ++i) + out.Write("tex{}.xyz = o.tex{};\n", i, i); + if (!host_config.fast_depth_calc) + out.Write("clipPos = o.clipPos;\n"); + if (per_pixel_lighting) + { + out.Write("Normal = o.Normal;\n" + "WorldPos = o.WorldPos;\n"); + } + out.Write("colors_0 = o.colors_0;\n" + "colors_1 = o.colors_1;\n"); } + + if (host_config.backend_depth_clamp) + { + out.Write("gl_ClipDistance[0] = clipDist0;\n" + "gl_ClipDistance[1] = clipDist1;\n"); + } + + // Vulkan NDC space has Y pointing down (right-handed NDC space). + if (api_type == APIType::Vulkan) + out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + else + out.Write("gl_Position = o.pos;\n"); out.Write("}}\n"); return out;