From fc05511354da331d0491f597e620206111450a91 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 1 Apr 2022 21:53:25 +0300 Subject: [PATCH] rsx: Optimize software sampling further for the 6-tap kernel --- rpcs3/Emu/RSX/Program/GLSLCommon.cpp | 114 ++++++++++++++------------- rpcs3/Emu/RSX/Program/GLSLCommon.h | 3 +- rpcs3/Emu/RSX/RSXThread.cpp | 3 +- 3 files changed, 63 insertions(+), 57 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 2ef5d5ecf1..b6bbc383a0 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -593,10 +593,12 @@ namespace glsl "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n" "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n" "#define DEPTH_COMPARE " << rsx::texture_control_bits::DEPTH_COMPARE_OP << "\n" - "#define FILTERED_BIT " << rsx::texture_control_bits::FILTERED << "\n" + "#define FILTERED_MAG_BIT " << rsx::texture_control_bits::FILTERED_MAG << "\n" + "#define FILTERED_MIN_BIT " << rsx::texture_control_bits::FILTERED_MIN << "\n" "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n" "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n" - "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n\n"; + "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n" + "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n"; } } @@ -902,43 +904,37 @@ namespace glsl "#define TEX2D_Z24X8_RGBA8_MS(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n\n"; OS << - R"( - vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step) - { - const float last_sample_point = max(coord - actual_step, 0.); - const float next_sample_point = min(coord + actual_step, 1.); - const float last_coord_step = floor(coord / uv_step) * uv_step; - const float next_coord_step = last_coord_step + uv_step; - const float next_next_coord_step = next_coord_step + uv_step; - const vec3 weights = vec3(next_coord_step - coord, - min(next_next_coord_step, next_sample_point) - next_coord_step, - max(next_next_coord_step, next_sample_point) - next_next_coord_step); - return weights / dot(weights, vec3(1)); - } - )"; + "vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step)" + "{\n" + " const float next_sample_point = coord + actual_step;\n" + " const float next_coord_step = fma(floor(coord / uv_step), uv_step, uv_step);\n" + " const float next_coord_step_plus_one = next_coord_step + uv_step;\n" + " vec3 weights = vec3(next_coord_step, min(next_coord_step_plus_one, next_sample_point), max(next_coord_step_plus_one, next_sample_point)) - vec3(coord, next_coord_step, next_coord_step_plus_one);\n" + " return weights / actual_step;\n" + "}\n\n"; auto insert_msaa_sample_code = [&OS](const std::string_view& sampler_type) { OS << - "vec4 texelFetch2DMS(in " << sampler_type << " tex, const in ivec2 sample_count, const in ivec2 icoords, const in int index, const in ivec2 offset)\n" + "vec4 texelFetch2DMS(in " << sampler_type << " tex, const in vec2 sample_count, const in ivec2 icoords, const in int index, const in ivec2 offset)\n" "{\n" - " const ivec2 resolve_coords = (icoords + offset) * ivec2(bvec2(texture_parameters[index].scale_bias.xy));\n" - " const ivec2 aa_coords = resolve_coords / sample_count;\n" - " const ivec2 sample_loc = ivec2(resolve_coords % sample_count);\n" - " const int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);\n" - " return texelFetch(tex, aa_coords, sample_index);\n" + " const vec2 resolve_coords = vec2(icoords + offset);\n" + " const vec2 aa_coords = floor(resolve_coords / sample_count);\n" // AA coords = real_coords / sample_count + " const vec2 sample_loc = fma(aa_coords, -sample_count, resolve_coords);\n" // Sample ID = real_coords % sample_count + " const float sample_index = fma(sample_loc.y, sample_count.y, sample_loc.x);\n" + " return texelFetch(tex, ivec2(aa_coords), int(sample_index));\n" "}\n\n" "vec4 sampleTexture2DMS(in " << sampler_type << " tex, const in vec2 coords, const in int index)\n" "{\n" " const uint flags = TEX_FLAGS(index);\n" " const vec2 normalized_coords = COORD_SCALE2(index, coords);\n" - " const ivec2 sample_count = ivec2(2, textureSamples(tex) / 2);\n" - " const ivec2 image_size = textureSize(tex) * sample_count;\n" + " const vec2 sample_count = vec2(2., textureSamples(tex) * 0.5);\n" + " const vec2 image_size = textureSize(tex) * sample_count;\n" " const ivec2 icoords = ivec2(normalized_coords * image_size);\n" " const vec4 sample0 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0));\n" "\n" - " if (!_test_bit(flags, FILTERED_BIT))\n" + " if (_get_bits(flags, FILTERED_MAG_BIT, 2) == 0)\n" " {\n" " return sample0;\n" " }\n" @@ -947,44 +943,57 @@ namespace glsl " const vec2 uv_step = 1.0 / vec2(image_size);\n" " const vec2 actual_step = vec2(dFdx(normalized_coords.x), dFdy(normalized_coords.y));\n" "\n" - " if (uv_step.x == actual_step.x && uv_step.y == actual_step.y)\n" + " const bvec2 no_filter = lessThan(abs(uv_step - actual_step), vec2(0.000001));\n" + " if (no_filter.x && no_filter.y)\n" " {\n" " return sample0;\n" " }\n" "\n" - " // Fetch remaining samples\n" - " const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 0));\n" // Bottom right - " const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0, 1));\n" // Top left - " const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 1));\n" // Top right - "\n" " vec4 a, b;\n" " float factor;\n" + " const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0, 1)); // Top left\n" "\n" - " if (actual_step.x > uv_step.x)\n" - " {\n" - " // Downscale in X, centered\n" - " const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 0));\n" // Further bottom right - " const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 1));\n" // Further top right - " const vec3 weights = compute2x2DownsampleWeights(normalized_coords.x, uv_step.x, actual_step.x);\n" - "\n" - " a = (sample0 * weights.x + sample1 * weights.y + sample4 * weights.z);\n" // Weighted sum - " b = (sample2 * weights.x + sample3 * weights.y + sample5 * weights.z);\n" // Weighted sum - " }\n" - " else if (actual_step.x < uv_step.x)\n" - " {\n" - " // Upscale in X\n" - " factor = fract(normalized_coords.x * image_size.x);\n" - " a = mix(sample0, sample1, factor);\n" - " b = mix(sample2, sample3, factor);\n" - " }\n" - " else\n" + " if (no_filter.x)\n" " {\n" " // No scaling, 1:1\n" " a = sample0;\n" " b = sample2;\n" " }\n" + " else\n" + " {\n" + " // Filter required, sample more data\n" + " const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 0)); // Bottom right\n" + " const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 1)); // Top right\n" "\n" - " if (actual_step.y > uv_step.y)\n" + " if (actual_step.x > uv_step.x)\n" + " {\n" + " // Downscale in X, centered\n" + " const vec3 weights = compute2x2DownsampleWeights(normalized_coords.x, uv_step.x, actual_step.x);\n" + "\n" + " const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 0)); // Further bottom right\n" + " a = fma(sample0, weights.xxxx, sample1 * weights.y) + (sample4 * weights.z); // Weighted sum\n" + "\n" + " if (!no_filter.y)\n" + " {\n" + " const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 1)); // Further top right\n" + " b = fma(sample2, weights.xxxx, sample3 * weights.y) + (sample5 * weights.z); // Weighted sum\n" + " }\n" + " }\n" + " else if (actual_step.x < uv_step.x)\n" + " {\n" + " // Upscale in X\n" + " factor = fract(normalized_coords.x * image_size.x);\n" + " a = mix(sample0, sample1, factor);\n" + " b = mix(sample2, sample3, factor);\n" + " }\n" + " }\n" + "\n" + " if (no_filter.y)\n" + " {\n" + " // 1:1 no scale\n" + " return a;\n" + " }\n" + " else if (actual_step.y > uv_step.y)\n" " {\n" " // Downscale in Y\n" " const vec3 weights = compute2x2DownsampleWeights(normalized_coords.y, uv_step.y, actual_step.y);\n" @@ -997,11 +1006,6 @@ namespace glsl " factor = fract(normalized_coords.y * image_size.y);\n" " return mix(a, b, factor);\n" " }\n" - " else\n" - " {\n" - " // 1:1 no scale\n" - " return a;\n" - " }\n" "}\n\n"; }; diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.h b/rpcs3/Emu/RSX/Program/GLSLCommon.h index 5d704de3af..13813ea4ac 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.h @@ -23,7 +23,8 @@ namespace rsx DEPTH_COMPARE_OP, DEPTH_COMPARE_1, DEPTH_COMPARE_2, - FILTERED, + FILTERED_MAG, + FILTERED_MIN, UNNORMALIZED_COORDS, GAMMA_CTRL_MASK = (1 << GAMMA_R) | (1 << GAMMA_G) | (1 << GAMMA_B) | (1 << GAMMA_A), diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index bfa0e05ade..d05511bfb1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2002,7 +2002,8 @@ namespace rsx { current_fp_texture_state.multisampled_textures |= (1 << i); texture_control |= (static_cast(tex.zfunc()) << texture_control_bits::DEPTH_COMPARE_OP); - texture_control |= (static_cast(tex.mag_filter() != rsx::texture_magnify_filter::nearest) << texture_control_bits::FILTERED); + texture_control |= (static_cast(tex.mag_filter() != rsx::texture_magnify_filter::nearest) << texture_control_bits::FILTERED_MAG); + texture_control |= (static_cast(tex.min_filter() != rsx::texture_minify_filter::nearest) << texture_control_bits::FILTERED_MIN); texture_control |= (((tex.format() & CELL_GCM_TEXTURE_UN) >> 6) << texture_control_bits::UNNORMALIZED_COORDS); }