From 99b6963fab159f6a15fe5f89951d9964ce5a4b35 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 31 Jul 2021 17:27:16 +0300 Subject: [PATCH] rsx: Improve unnormalized coordinate sampling - Improve rounding when sampling nearest neighbour. This is mostly a problem with NVIDIA - Implement unnormalized 3D sampling --- rpcs3/Emu/RSX/Common/TextureUtils.h | 1 + rpcs3/Emu/RSX/Common/texture_cache.h | 23 ++++---- rpcs3/Emu/RSX/Common/texture_cache_helpers.h | 4 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 4 +- rpcs3/Emu/RSX/Program/GLSLCommon.cpp | 57 +++++++++++--------- rpcs3/Emu/RSX/Program/program_util.cpp | 12 ++--- rpcs3/Emu/RSX/Program/program_util.h | 7 +-- rpcs3/Emu/RSX/RSXThread.cpp | 15 +++++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 +-- 9 files changed, 74 insertions(+), 55 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 9af137acc6..d984eb0ea0 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -133,6 +133,7 @@ namespace rsx u64 surface_cache_tag = 0; f32 scale_x = 1.f; f32 scale_y = 1.f; + f32 scale_z = 1.f; virtual ~sampled_image_descriptor_base() = default; virtual u32 encoded_component_map() const = 0; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 9950be5056..745179516f 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -172,7 +172,7 @@ namespace rsx sampled_image_descriptor() = default; sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, rsx::format_class ftype, - size2f scale, rsx::texture_dimension_extended type, bool cyclic_reference = false) + size3f scale, rsx::texture_dimension_extended type, bool cyclic_reference = false) { image_handle = handle; upload_context = ctx; @@ -180,12 +180,13 @@ namespace rsx is_cyclic_reference = cyclic_reference; scale_x = scale.width; scale_y = scale.height; + scale_z = scale.depth; image_type = type; } sampled_image_descriptor(image_resource_type external_handle, deferred_request_command reason, const image_section_attributes_t& attr, position2u src_offset, - texture_upload_context ctx, rsx::format_class ftype, size2f scale, + texture_upload_context ctx, rsx::format_class ftype, size3f scale, rsx::texture_dimension_extended type, const texture_channel_remap_t& remap) { external_subresource_desc = { external_handle, reason, attr, src_offset, remap }; @@ -195,6 +196,7 @@ namespace rsx format_class = ftype; scale_x = scale.width; scale_y = scale.height; + scale_z = scale.depth; image_type = type; } @@ -1656,7 +1658,7 @@ namespace rsx sampled_image_descriptor fast_texture_search( commandbuffer_type& cmd, const image_section_attributes_t& attr, - const size2f& scale, + const size3f& scale, u32 encoded_remap, const texture_channel_remap_t& remap, const texture_cache_search_options& options, @@ -2004,7 +2006,7 @@ namespace rsx u32 tex_size = 0, required_surface_height = 1; u8 subsurface_count = 1; - size2f scale{ 1.f, 1.f }; + size3f scale{ 1.f, 1.f, 1.f }; if (is_unnormalized) { @@ -2012,10 +2014,7 @@ namespace rsx { scale.width /= attributes.width; scale.height /= attributes.height; - } - else - { - rsx_log.error("Unimplemented unnormalized sampling for texture type %d", static_cast(extended_dimension)); + scale.depth /= attributes.depth; } } @@ -2025,7 +2024,7 @@ namespace rsx if (attributes.pitch = tex.pitch(); !attributes.pitch) { attributes.pitch = packed_pitch; - scale = { 0.f, 0.f }; + scale = { 0.f, 0.f, 0.f }; } else if (packed_pitch > attributes.pitch && !options.is_compressed_format) { @@ -2043,12 +2042,13 @@ namespace rsx case rsx::texture_dimension_extended::texture_dimension_1d: attributes.depth = 1; attributes.slice_h = 1; - scale.height = 0.f; + scale.height = scale.depth = 0.f; subsurface_count = 1; required_surface_height = 1; break; case rsx::texture_dimension_extended::texture_dimension_2d: attributes.depth = 1; + scale.depth = 0.f; subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count(); attributes.slice_h = required_surface_height = attributes.height; break; @@ -2066,7 +2066,8 @@ namespace rsx required_surface_height = tex_size / attributes.pitch; attributes.slice_h = required_surface_height / attributes.depth; break; - default: break; // TODO + default: + fmt::throw_exception("Unsupported texture dimension %d", static_cast(extended_dimension)); } if (options.is_compressed_format) diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 4629db6e9f..b4914c36fc 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -503,7 +503,7 @@ namespace rsx sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd, render_target_type texptr, const image_section_attributes_t& attr, - const size2f& scale, + const size3f& scale, texture_dimension_extended extended_dimension, u32 encoded_remap, const texture_channel_remap_t& decoded_remap, bool surface_is_rop_target, @@ -594,7 +594,7 @@ namespace rsx sampled_image_descriptor merge_cache_resources( const surface_store_list_type& fbos, const std::vector& local, const image_section_attributes_t& attr, - const size2f& scale, + const size3f& scale, texture_dimension_extended extended_dimension, u32 /*encoded_remap*/, const texture_channel_remap_t& decoded_remap, int select_hint = -1) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 2ecbf1e901..f833a10755 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -812,10 +812,10 @@ void GLGSRender::load_program_env() if (update_fragment_texture_env) { // Fragment texture parameters - auto mapping = m_texture_parameters_buffer->alloc_from_heap(256, m_uniform_buffer_offset_align); + auto mapping = m_texture_parameters_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); current_fragment_program.texture_params.write_to(mapping.first, current_fp_metadata.referenced_textures_mask); - m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 256); + m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 512); } if (update_raster_env) diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 891b4b9055..6f3983ac42 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -847,44 +847,49 @@ namespace glsl "#define TEX_NAME(index) tex##index\n" "#define TEX_NAME_STENCIL(index) tex##index##_stencil\n\n" - "#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x), TEX_FLAGS(index))\n" - "#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, bias), TEX_FLAGS(index))\n" - "#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, lod), TEX_FLAGS(index))\n" - "#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, dpdx, dpdy), TEX_FLAGS(index))\n" - "#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), coord2 * vec2(texture_parameters[index].scale.x, 1.)), TEX_FLAGS(index))\n" + "#define COORD_SCALE1(index, coord1) ((coord1 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.x)\n" + "#define COORD_SCALE2(index, coord2) ((coord2 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xy)\n" + "#define COORD_SCALE3(index, coord3) ((coord3 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xyz)\n\n" - "#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale), TEX_FLAGS(index))\n" - "#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale, bias), TEX_FLAGS(index))\n" - "#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), coord2 * texture_parameters[index].scale, lod), TEX_FLAGS(index))\n" - "#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].scale, dpdx, dpdy), TEX_FLAGS(index))\n" - "#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.)), TEX_FLAGS(index))\n" + "#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index))\n" + "#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index))\n" + "#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index))\n" + "#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index))\n" + "#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord2.x), coord2.y)), TEX_FLAGS(index))\n" - "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].scale, texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n"; + "#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index))\n" + "#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index))\n" + "#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index))\n" + "#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index))\n" + "#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index))\n" + + "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), COORD_SCALE2(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n"; if (props.emulate_shadow_compare) { OS << - "#define SHADOW_COORD(coord3, scale, flags) vec3(coord3.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0))\n" - "#define SHADOW_COORD4(coord4, scale, flags) vec4(SHADOW_COORD(coord4.xyz, scale, flags), coord4.w)\n" - "#define SHADOW_COORD_PROJ(coord4, scale, flags) vec4(coord4.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w)\n" - "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(coord3, texture_parameters[index].scale, TEX_FLAGS(index)))\n" - "#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(coord4, texture_parameters[index].scale, TEX_FLAGS(index)))\n" - "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(coord4, texture_parameters[index].scale, TEX_FLAGS(index)))\n"; + "#define SHADOW_COORD(index, coord3) vec3(COORD_SCALE2(index, coord3.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0))\n" + "#define SHADOW_COORD4(index, coord4) vec4(SHADOW_COORD(index, coord4.xyz), coord4.w)\n" + "#define SHADOW_COORD_PROJ(index, coord4) vec4(COORD_SCALE2(index, coord4.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w)\n\n" + + "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(index, coord3))\n" + "#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(index, coord4))\n" + "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(index, coord4))\n"; } else { OS << - "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].scale, 1.))\n" - "#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), coord4 * vec3(texture_parameters[index].scale, 1., 1.))\n" - "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.))\n"; + "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), vec3(COORD_SCALE2(index, coord3.xy), coord3.z))\n" + "#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w))\n" + "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w))\n"; } OS << - "#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), TEX_FLAGS(index))\n" - "#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), TEX_FLAGS(index))\n" - "#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), TEX_FLAGS(index))\n" - "#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord3, dpdx, dpdy), TEX_FLAGS(index))\n" - "#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), TEX_FLAGS(index))\n\n"; + "#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index))\n" + "#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index))\n" + "#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index))\n" + "#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index))\n" + "#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w)), TEX_FLAGS(index))\n\n"; } if (props.require_wpos) @@ -998,7 +1003,7 @@ namespace glsl OS << "struct sampler_info\n" "{\n" - " vec2 scale;\n" + " vec4 scale_bias;\n" " uint remap;\n" " uint flags;\n" "};\n" diff --git a/rpcs3/Emu/RSX/Program/program_util.cpp b/rpcs3/Emu/RSX/Program/program_util.cpp index ea0e8d3bb0..80d8a2237a 100644 --- a/rpcs3/Emu/RSX/Program/program_util.cpp +++ b/rpcs3/Emu/RSX/Program/program_util.cpp @@ -24,21 +24,21 @@ namespace rsx case 0: return; case 1: - std::memcpy(dst, src, 16); return; + std::memcpy(dst, src, sizeof(TIU_slot)); return; case 3: - std::memcpy(dst, src, 32); return; + std::memcpy(dst, src, sizeof(TIU_slot) * 2); return; case 7: - std::memcpy(dst, src, 48); return; + std::memcpy(dst, src, sizeof(TIU_slot) * 3); return; case 15: - std::memcpy(dst, src, 64); return; + std::memcpy(dst, src, sizeof(TIU_slot) * 4); return; default: break; }; const auto start = std::countr_zero(mask); const auto end = 16 - std::countl_zero(mask); - const auto mem_offset = (start * 16); - const auto mem_size = (end - start) * 16; + const auto mem_offset = (start * sizeof(TIU_slot)); + const auto mem_size = (end - start) * sizeof(TIU_slot); std::memcpy(static_cast(dst) + mem_offset, reinterpret_cast(src) + mem_offset, mem_size); } diff --git a/rpcs3/Emu/RSX/Program/program_util.h b/rpcs3/Emu/RSX/Program/program_util.h index c99ab6ac48..dc149f9383 100644 --- a/rpcs3/Emu/RSX/Program/program_util.h +++ b/rpcs3/Emu/RSX/Program/program_util.h @@ -11,15 +11,16 @@ namespace rsx }; #pragma pack(push, 1) - // NOTE: This structure must be packed to match GPU layout. + // NOTE: This structure must be packed to match GPU layout (std140). struct fragment_program_texture_config { struct TIU_slot { - float scale_x; - float scale_y; + float scale[3]; + float subpixel_bias; u32 remap; u32 control; + u32 padding[2]; } slots_[16]; // QT headers will collide with any variable named 'slots' because reasons diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index a9535b51ef..9ddf1f1948 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1878,8 +1878,10 @@ namespace rsx auto &tex = rsx::method_registers.fragment_textures[i]; if (tex.enabled()) { - current_fragment_program.texture_params[i].scale_x = sampler_descriptors[i]->scale_x; - current_fragment_program.texture_params[i].scale_y = sampler_descriptors[i]->scale_y; + current_fragment_program.texture_params[i].scale[0] = sampler_descriptors[i]->scale_x; + current_fragment_program.texture_params[i].scale[1] = sampler_descriptors[i]->scale_y; + current_fragment_program.texture_params[i].scale[2] = sampler_descriptors[i]->scale_z; + current_fragment_program.texture_params[i].subpixel_bias = 0.f; current_fragment_program.texture_params[i].remap = tex.remap(); m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; @@ -1899,8 +1901,17 @@ namespace rsx const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); if (raw_format & CELL_GCM_TEXTURE_UN) + { current_fp_texture_state.unnormalized_coords |= (1 << i); + if (tex.min_filter() == rsx::texture_minify_filter::nearest || + tex.mag_filter() == rsx::texture_magnify_filter::nearest) + { + // Subpixel offset so that (X + bias) * scale will round correctly + current_fragment_program.texture_params[i].subpixel_bias = 0.5f; + } + } + if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR) { switch (sampler_descriptors[i]->format_class) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 93cc1f4f92..3e6abdf257 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1897,12 +1897,12 @@ void VKGSRender::load_program_env() { check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE); - auto mem = m_fragment_texture_params_ring_info.alloc<256>(256); - auto buf = m_fragment_texture_params_ring_info.map(mem, 256); + auto mem = m_fragment_texture_params_ring_info.alloc<256>(512); + auto buf = m_fragment_texture_params_ring_info.map(mem, 512); current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask); m_fragment_texture_params_ring_info.unmap(); - m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 }; + m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 512 }; } if (update_raster_env)