From beb35320f6d73d063ad7b957692ffe591eb33c70 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 6 Dec 2017 03:00:40 +1000 Subject: [PATCH 1/3] TextureConverterShaderGen: Make generated shaders HLSL-compatible --- .../VideoCommon/TextureConverterShaderGen.cpp | 68 +++++++++++-------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 6bf0eaef78..5133555980 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -32,35 +32,49 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) { - ShaderCode out; + const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; + ShaderCode out; if (api_type == APIType::OpenGL) + { out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "#define samp0 samp9\n" "#define uv0 f_uv0\n" "in vec3 uv0;\n" - "out vec4 ocol0;\n"); - + "out vec4 ocol0;\n" + "void main(){\n" + " vec4 texcol = texture(samp0, %s);\n", + mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + } else if (api_type == APIType::Vulkan) + { out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" "layout(location = 0) in vec3 uv0;\n" "layout(location = 1) in vec4 col0;\n" - "layout(location = 0) out vec4 ocol0;"); - - bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; - out.Write("void main(){\n" - " vec4 texcol = texture(samp0, %s);\n", - mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + "layout(location = 0) out vec4 ocol0;" + "void main(){\n" + " vec4 texcol = texture(samp0, %s);\n", + mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + } + else if (api_type == APIType::D3D) + { + out.Write("Texture2DArray tex0 : register(t0);\n" + "SamplerState samp0 : register(s0);\n" + "void main(out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0) {\n" + " float4 texcol = tex0.Sample(samp0, uv0);\n"); + } if (uid_data->is_depth_copy) { - if (api_type == APIType::Vulkan) + if (api_type == APIType::D3D || api_type == APIType::Vulkan) out.Write("texcol.x = 1.0 - texcol.x;\n"); out.Write(" int depth = int(texcol.x * 16777216.0);\n" // Convert to Z24 format - " ivec4 workspace;\n" + " int4 workspace;\n" " workspace.r = (depth >> 16) & 255;\n" " workspace.g = (depth >> 8) & 255;\n" " workspace.b = depth & 255;\n" @@ -69,7 +83,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) " workspace.a = (depth >> 16) & 0xF0;\n" // Normalize components to [0.0..1.0] - " texcol = vec4(workspace) / 255.0;\n"); + " texcol = float4(workspace) / 255.0;\n"); switch (uid_data->dst_format) { case EFBCopyFormat::R4: // Z4 @@ -90,7 +104,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) break; case EFBCopyFormat::RGBA8: // Z24X8 - out.Write(" ocol0 = vec4(texcol.rgb, 0.0);\n"); + out.Write(" ocol0 = float4(texcol.rgb, 0.0);\n"); break; case EFBCopyFormat::G8: // Z8M @@ -110,7 +124,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) default: ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast(uid_data->dst_format)); - out.Write(" ocol0 = vec4(texcol.bgr, 0.0);\n"); + out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n"); break; } } @@ -165,16 +179,16 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) color_mask[4] = color_mask[5] = color_mask[6] = color_mask[7] = 1.0f / 15.0f; } } - out.Write(" const vec4 colmat[7] = {\n"); + out.Write(" const float4 colmat[7] = {\n"); for (size_t i = 0; i < colmat.size() / 4; i++) { - out.Write(" vec4(%f, %f, %f, %f)%s\n", colmat[i * 4 + 0], colmat[i * 4 + 1], + out.Write(" float4(%f, %f, %f, %f)%s\n", colmat[i * 4 + 0], colmat[i * 4 + 1], colmat[i * 4 + 2], colmat[i * 4 + 3], i < 7 ? "," : ""); } - out.Write( - " };\n" - " texcol = floor(texcol * colmat[5]) * colmat[6];\n" - " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n"); + out.Write(" };\n" + " texcol = floor(texcol * colmat[5]) * colmat[6];\n" + " ocol0 = float4(dot(texcol, colmat[0]), dot(texcol, colmat[1]), dot(texcol, " + "colmat[2]), dot(texcol, colmat[3])) + colmat[4];\n"); break; default: @@ -193,7 +207,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) { case EFBCopyFormat::R4: // R4 out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = vec4(red, red, red, red);\n"); + " ocol0 = float4(red, red, red, red);\n"); break; case EFBCopyFormat::R8_0x1: // R8 @@ -202,7 +216,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) break; case EFBCopyFormat::RA4: // RA4 - out.Write(" vec2 red_alpha = vec2(ivec2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" + out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" " ocol0 = red_alpha.rrrg;\n"); break; @@ -231,17 +245,17 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) break; case EFBCopyFormat::RGB565: // RGB565 - out.Write(" vec2 red_blue = vec2(ivec2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" + out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n" - " ocol0 = vec4(red_blue.r, green, red_blue.g, 1.0);\n"); + " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); break; case EFBCopyFormat::RGB5A3: // RGB5A3 // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection // will need to be implemented once we move away from floats. - out.Write(" vec3 color = vec3(ivec3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" + out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n" - " ocol0 = vec4(color, alpha);\n"); + " ocol0 = float4(color, alpha);\n"); break; case EFBCopyFormat::RGBA8: // RGBA8 @@ -249,7 +263,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) break; case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy - out.Write(" ocol0 = vec4(texcol.rgb, 1.0);\n"); + out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n"); break; default: From e0ffce27854e179025f98542ba2d4843f1e2161c Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 6 Dec 2017 03:01:04 +1000 Subject: [PATCH 2/3] D3D: Use VideoCommon EFB-to-texture shaders --- .../VideoBackends/D3D/PixelShaderCache.cpp | 159 ------------------ .../Core/VideoBackends/D3D/PixelShaderCache.h | 2 - .../Core/VideoBackends/D3D/TextureCache.cpp | 71 ++++---- Source/Core/VideoBackends/D3D/TextureCache.h | 7 + 4 files changed, 42 insertions(+), 197 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 3625cf4e0c..664ecec3a2 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -38,9 +38,7 @@ LinearDiskCache g_ps_disk_cache; LinearDiskCache g_uber_ps_disk_cache; extern std::unique_ptr g_async_compiler; -ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr}; ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr}; -ID3D11PixelShader* s_DepthMatrixProgram[2] = {nullptr}; ID3D11PixelShader* s_ClearProgram = nullptr; ID3D11PixelShader* s_AnaglyphProgram = nullptr; ID3D11PixelShader* s_DepthResolveProgram = nullptr; @@ -104,106 +102,6 @@ const char color_copy_program_code_msaa[] = { "ocol0 /= SAMPLES;\n" "}\n"}; -const char color_matrix_program_code[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "uniform float4 cColMatrix[7] : register(c0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "float4 texcol = Tex0.Sample(samp0,uv0);\n" - "texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n" - "ocol0 = " - "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[" - "1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3]))" - " + cColMatrix[4];\n" - "}\n"}; - -const char color_matrix_program_code_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "uniform float4 cColMatrix[7] : register(c0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "int width, height, slices, samples;\n" - "Tex0.GetDimensions(width, height, slices, samples);\n" - "float4 texcol = 0;\n" - "for(int i = 0; i < SAMPLES; ++i)\n" - " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - "texcol /= SAMPLES;\n" - "texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n" - "ocol0 = " - "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(" - "texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n"}; - -const char depth_matrix_program[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "uniform float4 cColMatrix[7] : register(c0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0){\n" - " float4 texcol = Tex0.Sample(samp0,uv0);\n" - " int depth = int((1.0 - texcol.x) * 16777216.0);\n" - - // Convert to Z24 format - " int4 workspace;\n" - " workspace.r = (depth >> 16) & 255;\n" - " workspace.g = (depth >> 8) & 255;\n" - " workspace.b = depth & 255;\n" - - // Convert to Z4 format - " workspace.a = (depth >> 16) & 0xF0;\n" - - // Normalize components to [0.0..1.0] - " texcol = float4(workspace) / 255.0;\n" - - // Apply color matrix - " ocol0 = " - "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1])," - "dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + " - "cColMatrix[4];\n" - "}\n"}; - -const char depth_matrix_program_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "uniform float4 cColMatrix[7] : register(c0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0){\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " float4 texcol = 0;\n" - " for(int i = 0; i < SAMPLES; ++i)\n" - " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - " texcol /= SAMPLES;\n" - " int depth = int((1.0 - texcol.x) * 16777216.0);\n" - - // Convert to Z24 format - " int4 workspace;\n" - " workspace.r = (depth >> 16) & 255;\n" - " workspace.g = (depth >> 8) & 255;\n" - " workspace.b = depth & 255;\n" - - // Convert to Z4 format - " workspace.a = (depth >> 16) & 0xF0;\n" - - // Normalize components to [0.0..1.0] - " texcol = float4(workspace) / 255.0;\n" - - // Apply color matrix - " ocol0 = " - "float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(" - "texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n"}; - const char depth_resolve_program[] = { "#define SAMPLES %d\n" "Texture2DMSArray Tex0 : register(t0);\n" @@ -368,49 +266,6 @@ ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled) } } -ID3D11PixelShader* PixelShaderCache::GetColorMatrixProgram(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - return s_ColorMatrixProgram[0]; - } - else if (s_ColorMatrixProgram[1]) - { - return s_ColorMatrixProgram[1]; - } - else - { - // create MSAA shader for current AA mode - std::string buf = - StringFromFormat(color_matrix_program_code_msaa, g_ActiveConfig.iMultisamples); - s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf); - CHECK(s_ColorMatrixProgram[1] != nullptr, "Create color matrix MSAA pixel shader"); - D3D::SetDebugObjectName(s_ColorMatrixProgram[1], "color matrix MSAA pixel shader"); - return s_ColorMatrixProgram[1]; - } -} - -ID3D11PixelShader* PixelShaderCache::GetDepthMatrixProgram(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - return s_DepthMatrixProgram[0]; - } - else if (s_DepthMatrixProgram[1]) - { - return s_DepthMatrixProgram[1]; - } - else - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(depth_matrix_program_msaa, g_ActiveConfig.iMultisamples); - s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf); - CHECK(s_DepthMatrixProgram[1] != nullptr, "Create depth matrix MSAA pixel shader"); - D3D::SetDebugObjectName(s_DepthMatrixProgram[1], "depth matrix MSAA pixel shader"); - return s_DepthMatrixProgram[1]; - } -} - ID3D11PixelShader* PixelShaderCache::GetClearProgram() { return s_ClearProgram; @@ -490,16 +345,6 @@ void PixelShaderCache::Init() CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader"); D3D::SetDebugObjectName(s_ColorCopyProgram[0], "color copy pixel shader"); - // used for color conversion - s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code); - CHECK(s_ColorMatrixProgram[0] != nullptr, "Create color matrix pixel shader"); - D3D::SetDebugObjectName(s_ColorMatrixProgram[0], "color matrix pixel shader"); - - // used for depth copy - s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program); - CHECK(s_DepthMatrixProgram[0] != nullptr, "Create depth matrix pixel shader"); - D3D::SetDebugObjectName(s_DepthMatrixProgram[0], "depth matrix pixel shader"); - Clear(); SETSTAT(stats.numPixelShadersCreated, 0); @@ -557,8 +402,6 @@ void PixelShaderCache::Clear() void PixelShaderCache::InvalidateMSAAShaders() { SAFE_RELEASE(s_ColorCopyProgram[1]); - SAFE_RELEASE(s_ColorMatrixProgram[1]); - SAFE_RELEASE(s_DepthMatrixProgram[1]); SAFE_RELEASE(s_rgb8_to_rgba6[1]); SAFE_RELEASE(s_rgba6_to_rgb8[1]); SAFE_RELEASE(s_DepthResolveProgram); @@ -574,8 +417,6 @@ void PixelShaderCache::Shutdown() for (int i = 0; i < 2; ++i) { SAFE_RELEASE(s_ColorCopyProgram[i]); - SAFE_RELEASE(s_ColorMatrixProgram[i]); - SAFE_RELEASE(s_DepthMatrixProgram[i]); SAFE_RELEASE(s_rgba6_to_rgb8[i]); SAFE_RELEASE(s_rgb8_to_rgba6[i]); } diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h index 18979cc8ef..9a0faed9e5 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.h @@ -32,9 +32,7 @@ public: static ID3D11Buffer* GetConstantBuffer(); - static ID3D11PixelShader* GetColorMatrixProgram(bool multisampled); static ID3D11PixelShader* GetColorCopyProgram(bool multisampled); - static ID3D11PixelShader* GetDepthMatrixProgram(bool multisampled); static ID3D11PixelShader* GetClearProgram(); static ID3D11PixelShader* GetAnaglyphProgram(); static ID3D11PixelShader* GetDepthResolveProgram(); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index a7eab7c041..cca8448421 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -29,8 +29,6 @@ namespace DX11 { -static const size_t MAX_COPY_BUFFERS = 32; -static ID3D11Buffer* s_efbcopycbuf[MAX_COPY_BUFFERS] = {0}; static std::unique_ptr g_encoder; void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, @@ -207,17 +205,16 @@ TextureCache::TextureCache() TextureCache::~TextureCache() { - for (unsigned int k = 0; k < MAX_COPY_BUFFERS; ++k) - SAFE_RELEASE(s_efbcopycbuf[k]); - g_encoder->Shutdown(); g_encoder.reset(); SAFE_RELEASE(palette_buf); SAFE_RELEASE(palette_buf_srv); SAFE_RELEASE(palette_uniform); - for (ID3D11PixelShader*& shader : palette_pixel_shader) + for (auto*& shader : palette_pixel_shader) SAFE_RELEASE(shader); + for (auto& iter : m_efb_to_tex_pixel_shaders) + SAFE_RELEASE(iter.second); } void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, @@ -227,19 +224,24 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, { auto* destination_texture = static_cast(entry->texture.get()); - // When copying at half size, in multisampled mode, resolve the color/depth buffer first. - // This is because multisampled texture reads go through Load, not Sample, and the linear - // filter is ignored. - bool multisampled = (g_ActiveConfig.iMultisamples > 1); - ID3D11ShaderResourceView* efbTexSRV = is_depth_copy ? - FramebufferManager::GetEFBDepthTexture()->GetSRV() : - FramebufferManager::GetEFBColorTexture()->GetSRV(); - if (multisampled && scale_by_half) + bool multisampled = g_ActiveConfig.iMultisamples > 1; + ID3D11ShaderResourceView* efb_tex_srv; + if (multisampled) { - multisampled = false; - efbTexSRV = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : - FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); + efb_tex_srv = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : + FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); } + else + { + efb_tex_srv = is_depth_copy ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : + FramebufferManager::GetEFBColorTexture()->GetSRV(); + } + + auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, + scale_by_half); + ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid); + if (!pixel_shader) + return; g_renderer->ResetAPIState(); @@ -249,20 +251,6 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, static_cast(destination_texture->GetConfig().height)); D3D::context->RSSetViewports(1, &vp); - // set transformation - if (nullptr == s_efbcopycbuf[cbuf_id]) - { - const D3D11_BUFFER_DESC cbdesc = - CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); - D3D11_SUBRESOURCE_DATA data; - data.pSysMem = colmat; - HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &s_efbcopycbuf[cbuf_id]); - CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbuf_id); - D3D::SetDebugObjectName(s_efbcopycbuf[cbuf_id], - "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); - } - D3D::stateman->SetPixelConstants(s_efbcopycbuf[cbuf_id]); - const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect); // TODO: try targetSource.asRECT(); const D3D11_RECT sourcerect = @@ -284,13 +272,24 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, // Create texture copy D3D::drawShadedTexQuad( - efbTexSRV, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), - is_depth_copy ? PixelShaderCache::GetDepthMatrixProgram(multisampled) : - PixelShaderCache::GetColorMatrixProgram(multisampled), - VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), - GeometryShaderCache::GetCopyGeometryShader()); + efb_tex_srv, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), + pixel_shader, VertexShaderCache::GetSimpleVertexShader(), + VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); FramebufferManager::BindEFBRenderTarget(); g_renderer->RestoreAPIState(); } + +ID3D11PixelShader* +TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid) +{ + auto iter = m_efb_to_tex_pixel_shaders.find(uid); + if (iter != m_efb_to_tex_pixel_shaders.end()) + return iter->second; + + ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::D3D, uid.GetUidData()); + ID3D11PixelShader* shader = D3D::CompileAndCreatePixelShader(code.GetBuffer()); + m_efb_to_tex_pixel_shaders.emplace(uid, shader); + return shader; +} } diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index d650c04d4c..3f48ddb6f7 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -4,8 +4,11 @@ #pragma once +#include + #include "VideoBackends/D3D/D3DTexture.h" #include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/TextureConverterShaderGen.h" class AbstractTexture; struct TextureConfig; @@ -39,9 +42,13 @@ private: bool CompileShaders() override { return true; } void DeleteShaders() override {} + ID3D11PixelShader* GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid); + ID3D11Buffer* palette_buf; ID3D11ShaderResourceView* palette_buf_srv; ID3D11Buffer* palette_uniform; ID3D11PixelShader* palette_pixel_shader[3]; + + std::map m_efb_to_tex_pixel_shaders; }; } From d0601c0a8372780ae3412d25bf81561d23e89ac1 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 6 Dec 2017 03:08:35 +1000 Subject: [PATCH 3/3] TextureConversionShader: Use round() instead of roundEven() in HLSL HLSL does not define roundEven(), only round(). This means that the output may differ slightly for OpenGL vs Direct3D. However, it ensures consistency across OpenGL drivers, as round() in GLSL can go either way. --- Source/Core/VideoCommon/TextureConversionShader.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 044e3a4868..9535ccc88b 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -72,6 +72,12 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) WRITE(p, "uniform float y_scale;\n"); } + // D3D does not have roundEven(), only round(), which is specified "to the nearest integer". + // This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL + // we need to use roundEven(). + if (ApiType == APIType::D3D) + WRITE(p, "#define roundEven(x) round(x)\n"); + // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel. WRITE(p, "float4 RGBA8ToRGB8(float4 src)\n"); WRITE(p, "{\n");