Restructure parameters to TetxureConverterShaderGen/TextureConversionShader

This will be used for later refactoring for increased accuracy.
This commit is contained in:
Pokechu22 2022-02-07 13:37:28 -08:00
parent d20094efa2
commit 791bd16b28
7 changed files with 89 additions and 76 deletions

View File

@ -14,7 +14,7 @@ protected:
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter, const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override const std::array<u32, 3>& filter_coefficients) override
{ {
} }
@ -22,7 +22,7 @@ protected:
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override const std::array<u32, 3>& filter_coefficients) override
{ {
} }
}; };

View File

@ -14,7 +14,7 @@ protected:
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter, const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override const std::array<u32, 3>& filter_coefficients) override
{ {
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma); src_rect, scale_by_half, y_scale, gamma);
@ -23,7 +23,7 @@ protected:
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override const std::array<u32, 3>& filter_coefficients) override
{ {
// TODO: If we ever want to "fake" vram textures, we would need to implement this // TODO: If we ever want to "fake" vram textures, we would need to implement this
} }

View File

@ -1978,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
} }
} }
EFBCopyFilterCoefficients std::array<u32, 3>
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{ {
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return EFBCopyFilterCoefficients{ return {
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) / static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
64.0f, static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) + static_cast<u32>(coefficients[4]),
static_cast<u32>(coefficients[4])) / static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f,
}; };
} }
EFBCopyFilterCoefficients std::array<u32, 3>
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{ {
// If the user disables the copy filter, only apply it to the VRAM copy. // If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients); std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter) if (!g_ActiveConfig.bDisableCopyFilter)
return res; return res;
// Disabling the copy filter in options should not ignore the values the game sets completely, // Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead, // as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res.middle = res.upper + res.middle + res.lower; res[1] = res[0] + res[1] + res[2];
res.upper = 0.0f; res[0] = 0;
res.lower = 0.0f; res[2] = 0;
return res; return res;
} }
bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients) bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
{ {
// If the top/bottom coefficients are zero, no point sampling/blending from these rows. // If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients.upper != 0 || coefficients.lower != 0; return coefficients[0] != 0 || coefficients[2] != 0;
}
bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
{
// Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that,
// colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
// breaks (as colors end up >= 512, which wraps back to 0).
return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
} }
void TextureCacheBase::CopyRenderTargetToTexture( void TextureCacheBase::CopyRenderTargetToTexture(
@ -2255,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_ram) if (copy_to_ram)
{ {
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PixelFormat srcFormat = bpmem.zcontrol.pixel_format; PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients)); AllCopyFilterCoefsNeeded(coefficients),
CopyFilterCanOverflow(coefficients), gamma != 1.0);
std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture(); std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
if (staging_texture) if (staging_texture)
@ -2716,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
bool scale_by_half, bool linear_filter, bool scale_by_half, bool linear_filter,
EFBCopyFormat dst_format, bool is_intensity, float gamma, EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) const std::array<u32, 3>& filter_coefficients)
{ {
// Flush EFB pokes first, as they're expected to be included. // Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes(); g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null. // Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline = const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid( TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
dst_format, is_depth_copy, is_intensity, scale_by_half, scale_by_half, 1.0f / gamma, filter_coefficients));
NeedsCopyFilterInShader(filter_coefficients)));
if (!copy_pipeline) if (!copy_pipeline)
{ {
WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
@ -2746,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
struct Uniforms struct Uniforms
{ {
float src_left, src_top, src_width, src_height; float src_left, src_top, src_width, src_height;
float filter_coefficients[3]; std::array<u32, 3> filter_coefficients;
float gamma_rcp; float gamma_rcp;
float clamp_top; float clamp_top;
float clamp_bottom; float clamp_bottom;
@ -2761,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
uniforms.src_top = framebuffer_rect.top * rcp_efb_height; uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width; uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height; uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper; uniforms.filter_coefficients = filter_coefficients;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.gamma_rcp = 1.0f / gamma; uniforms.gamma_rcp = 1.0f / gamma;
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB, // NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
// which returns random garbage from the empty bus (confirmed by hardware tests). // which returns random garbage from the empty bus (confirmed by hardware tests).
@ -2795,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
u32 memory_stride, const MathUtil::Rectangle<int>& src_rect, u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool scale_by_half, bool linear_filter, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) const std::array<u32, 3>& filter_coefficients)
{ {
// Flush EFB pokes first, as they're expected to be included. // Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes(); g_framebuffer_manager->FlushEFBPokes();
@ -2826,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
float gamma_rcp; float gamma_rcp;
float clamp_top; float clamp_top;
float clamp_bottom; float clamp_bottom;
float filter_coefficients[3]; std::array<u32, 3> filter_coefficients;
u32 padding; u32 padding;
}; };
Uniforms encoder_params; Uniforms encoder_params;
@ -2847,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height; encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1; const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height; encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
encoder_params.filter_coefficients[0] = filter_coefficients.upper; encoder_params.filter_coefficients = filter_coefficients;
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params)); g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left. // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.

View File

@ -57,23 +57,30 @@ struct TextureAndTLUTFormat
struct EFBCopyParams struct EFBCopyParams
{ {
EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_, EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_,
bool copy_filter_) bool all_copy_filter_coefs_needed_, bool copy_filter_can_overflow_,
bool apply_gamma_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
copy_filter(copy_filter_) all_copy_filter_coefs_needed(all_copy_filter_coefs_needed_),
copy_filter_can_overflow(copy_filter_can_overflow_), apply_gamma(apply_gamma_)
{ {
} }
bool operator<(const EFBCopyParams& rhs) const bool operator<(const EFBCopyParams& rhs) const
{ {
return std::tie(efb_format, copy_format, depth, yuv, copy_filter) < return std::tie(efb_format, copy_format, depth, yuv, all_copy_filter_coefs_needed,
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter); copy_filter_can_overflow,
apply_gamma) < std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv,
rhs.all_copy_filter_coefs_needed,
rhs.copy_filter_can_overflow, rhs.apply_gamma);
} }
PixelFormat efb_format; PixelFormat efb_format;
EFBCopyFormat copy_format; EFBCopyFormat copy_format;
bool depth; bool depth;
bool yuv; bool yuv;
bool copy_filter; bool all_copy_filter_coefs_needed;
bool copy_filter_can_overflow;
bool apply_gamma;
}; };
template <> template <>
@ -89,19 +96,13 @@ struct fmt::formatter<EFBCopyParams>
else else
copy_format = fmt::to_string(uid.copy_format); copy_format = fmt::to_string(uid.copy_format);
return fmt::format_to(ctx.out(), return fmt::format_to(ctx.out(),
"format: {}, copy format: {}, depth: {}, yuv: {}, copy filter: {}", "format: {}, copy format: {}, depth: {}, yuv: {}, apply_gamma: {}, "
uid.efb_format, copy_format, uid.depth, uid.yuv, uid.copy_filter); "all_copy_filter_coefs_needed: {}, copy_filter_can_overflow: {}",
uid.efb_format, copy_format, uid.depth, uid.yuv, uid.apply_gamma,
uid.all_copy_filter_coefs_needed, uid.copy_filter_can_overflow);
} }
}; };
// Reduced version of the full coefficient array, with a single value for each row.
struct EFBCopyFilterCoefficients
{
float upper;
float middle;
float lower;
};
class TextureCacheBase class TextureCacheBase
{ {
private: private:
@ -267,8 +268,8 @@ public:
// Save States // Save States
void DoState(PointerWrap& p); void DoState(PointerWrap& p);
// Returns false if the top/bottom row coefficients are zero. static bool AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients);
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); static bool CopyFilterCanOverflow(const std::array<u32, 3>& coefficients);
protected: protected:
// Decodes the specified data to the GPU texture specified by entry. // Decodes the specified data to the GPU texture specified by entry.
@ -285,12 +286,12 @@ protected:
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, float y_scale, float gamma, bool clamp_top, bool linear_filter, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients); bool clamp_bottom, const std::array<u32, 3>& filter_coefficients);
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients); const std::array<u32, 3>& filter_coefficients);
alignas(16) u8* temp = nullptr; alignas(16) u8* temp = nullptr;
size_t temp_size = 0; size_t temp_size = 0;
@ -338,9 +339,9 @@ private:
void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
// Precomputing the coefficients for the previous, current, and next lines for the copy filter. // Precomputing the coefficients for the previous, current, and next lines for the copy filter.
static EFBCopyFilterCoefficients static std::array<u32, 3>
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
static EFBCopyFilterCoefficients static std::array<u32, 3>
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
// Flushes a pending EFB copy to RAM from the host to the guest RAM. // Flushes a pending EFB copy to RAM from the host to the guest RAM.

View File

@ -62,7 +62,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
" float y_scale;\n" " float y_scale;\n"
" float gamma_rcp;\n" " float gamma_rcp;\n"
" float2 clamp_tb;\n" " float2 clamp_tb;\n"
" float3 filter_coefficients;\n" " uint3 filter_coefficients;\n"
"}};\n"); "}};\n");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{ {
@ -151,7 +151,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
// The filter is only applied to the RGB channels, the alpha channel is left intact. // The filter is only applied to the RGB channels, the alpha channel is left intact.
code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n" code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
"{{\n"); "{{\n");
if (params.copy_filter) if (params.all_copy_filter_coefs_needed)
{ {
code.Write(" float4 prev_row = "); code.Write(" float4 prev_row = ");
WriteSampleOp(-1); WriteSampleOp(-1);
@ -162,9 +162,9 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
" float4 next_row = "); " float4 next_row = ");
WriteSampleOp(1); WriteSampleOp(1);
code.Write(";\n" code.Write(";\n"
" return float4(min(prev_row.rgb * filter_coefficients[0] +\n" " return float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n"
" current_row.rgb * filter_coefficients[1] +\n" " current_row.rgb * filter_coefficients[1] / 64.0 +\n"
" next_row.rgb * filter_coefficients[2], \n" " next_row.rgb * filter_coefficients[2] / 64.0, \n"
" float3(1, 1, 1)), current_row.a);\n"); " float3(1, 1, 1)), current_row.a);\n");
} }
else else
@ -172,7 +172,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
code.Write(" float4 current_row = "); code.Write(" float4 current_row = ");
WriteSampleOp(0); WriteSampleOp(0);
code.Write(";\n" code.Write(";\n"
"return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" "return float4(min(current_row.rgb * filter_coefficients[1] / 64.0, float3(1, 1, 1)),\n"
" current_row.a);\n"); " current_row.a);\n");
} }
code.Write("}}\n"); code.Write("}}\n");

View File

@ -6,13 +6,15 @@
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/BPMemory.h" #include "VideoCommon/BPMemory.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
namespace TextureConversionShaderGen namespace TextureConversionShaderGen
{ {
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half, bool copy_filter) bool scale_by_half, float gamma_rcp,
const std::array<u32, 3>& filter_coefficients)
{ {
TCShaderUid out; TCShaderUid out;
@ -22,7 +24,11 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
uid_data->is_depth_copy = is_depth_copy; uid_data->is_depth_copy = is_depth_copy;
uid_data->is_intensity = is_intensity; uid_data->is_intensity = is_intensity;
uid_data->scale_by_half = scale_by_half; uid_data->scale_by_half = scale_by_half;
uid_data->copy_filter = copy_filter; uid_data->all_copy_filter_coefs_needed =
TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients);
uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients);
// If the gamma is needed, then include that too.
uid_data->apply_gamma = gamma_rcp != 1.0f;
return out; return out;
} }
@ -31,7 +37,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
{ {
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n" " float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n" " uint3 filter_coefficients;\n"
" float gamma_rcp;\n" " float gamma_rcp;\n"
" float2 clamp_tb;\n" " float2 clamp_tb;\n"
" float pixel_height;\n" " float pixel_height;\n"
@ -98,22 +104,22 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
// The copy filter applies to both color and depth copies. This has been verified on hardware. // The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact. // The filter is only applied to the RGB channels, the alpha channel is left intact.
if (uid_data->copy_filter) if (uid_data->all_copy_filter_coefs_needed)
{ {
out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n" " float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n" " float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n"
" current_row.rgb * filter_coefficients[1] +\n" " current_row.rgb * filter_coefficients[1] / 64.0 +\n"
" next_row.rgb * filter_coefficients[2], \n" " next_row.rgb * filter_coefficients[2] / 64.0, \n"
" float3(1, 1, 1)), current_row.a);\n"); " float3(1, 1, 1)), current_row.a);\n");
} }
else else
{ {
out.Write( out.Write(
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n" " float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1] / 64.0,\n"
" current_row.a);\n"); " float3(1, 1, 1)), current_row.a);\n");
} }
if (uid_data->is_depth_copy) if (uid_data->is_depth_copy)

View File

@ -25,7 +25,9 @@ struct UidData
u32 is_depth_copy : 1; u32 is_depth_copy : 1;
u32 is_intensity : 1; u32 is_intensity : 1;
u32 scale_by_half : 1; u32 scale_by_half : 1;
u32 copy_filter : 1; u32 all_copy_filter_coefs_needed : 1;
u32 copy_filter_can_overflow : 1;
u32 apply_gamma : 1;
}; };
#pragma pack() #pragma pack()
@ -35,7 +37,8 @@ ShaderCode GenerateVertexShader(APIType api_type);
ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data); ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data);
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half, bool copy_filter); bool scale_by_half, float gamma_rcp,
const std::array<u32, 3>& filter_coefficients);
} // namespace TextureConversionShaderGen } // namespace TextureConversionShaderGen
@ -53,8 +56,10 @@ struct fmt::formatter<TextureConversionShaderGen::UidData>
dst_format = fmt::to_string(uid.dst_format); dst_format = fmt::to_string(uid.dst_format);
return fmt::format_to(ctx.out(), return fmt::format_to(ctx.out(),
"dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, " "dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, "
"scale_by_half: {}, copy_filter: {}", "scale_by_half: {}, all_copy_filter_coefs_needed: {}, "
"copy_filter_can_overflow: {}, apply_gamma: {}",
dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity, dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity,
uid.scale_by_half, uid.copy_filter); uid.scale_by_half, uid.all_copy_filter_coefs_needed,
uid.copy_filter_can_overflow, uid.apply_gamma);
} }
}; };