rsx: Improve unnormalized coordinate sampling

- Improve rounding when sampling nearest neighbour. This is mostly a problem with NVIDIA
- Implement unnormalized 3D sampling
This commit is contained in:
kd-11 2021-07-31 17:27:16 +03:00 committed by kd-11
parent b3c65b7bca
commit 99b6963fab
9 changed files with 74 additions and 55 deletions

View File

@ -133,6 +133,7 @@ namespace rsx
u64 surface_cache_tag = 0;
f32 scale_x = 1.f;
f32 scale_y = 1.f;
f32 scale_z = 1.f;
virtual ~sampled_image_descriptor_base() = default;
virtual u32 encoded_component_map() const = 0;

View File

@ -172,7 +172,7 @@ namespace rsx
sampled_image_descriptor() = default;
sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, rsx::format_class ftype,
size2f scale, rsx::texture_dimension_extended type, bool cyclic_reference = false)
size3f scale, rsx::texture_dimension_extended type, bool cyclic_reference = false)
{
image_handle = handle;
upload_context = ctx;
@ -180,12 +180,13 @@ namespace rsx
is_cyclic_reference = cyclic_reference;
scale_x = scale.width;
scale_y = scale.height;
scale_z = scale.depth;
image_type = type;
}
sampled_image_descriptor(image_resource_type external_handle, deferred_request_command reason,
const image_section_attributes_t& attr, position2u src_offset,
texture_upload_context ctx, rsx::format_class ftype, size2f scale,
texture_upload_context ctx, rsx::format_class ftype, size3f scale,
rsx::texture_dimension_extended type, const texture_channel_remap_t& remap)
{
external_subresource_desc = { external_handle, reason, attr, src_offset, remap };
@ -195,6 +196,7 @@ namespace rsx
format_class = ftype;
scale_x = scale.width;
scale_y = scale.height;
scale_z = scale.depth;
image_type = type;
}
@ -1656,7 +1658,7 @@ namespace rsx
sampled_image_descriptor fast_texture_search(
commandbuffer_type& cmd,
const image_section_attributes_t& attr,
const size2f& scale,
const size3f& scale,
u32 encoded_remap,
const texture_channel_remap_t& remap,
const texture_cache_search_options& options,
@ -2004,7 +2006,7 @@ namespace rsx
u32 tex_size = 0, required_surface_height = 1;
u8 subsurface_count = 1;
size2f scale{ 1.f, 1.f };
size3f scale{ 1.f, 1.f, 1.f };
if (is_unnormalized)
{
@ -2012,10 +2014,7 @@ namespace rsx
{
scale.width /= attributes.width;
scale.height /= attributes.height;
}
else
{
rsx_log.error("Unimplemented unnormalized sampling for texture type %d", static_cast<u32>(extended_dimension));
scale.depth /= attributes.depth;
}
}
@ -2025,7 +2024,7 @@ namespace rsx
if (attributes.pitch = tex.pitch(); !attributes.pitch)
{
attributes.pitch = packed_pitch;
scale = { 0.f, 0.f };
scale = { 0.f, 0.f, 0.f };
}
else if (packed_pitch > attributes.pitch && !options.is_compressed_format)
{
@ -2043,12 +2042,13 @@ namespace rsx
case rsx::texture_dimension_extended::texture_dimension_1d:
attributes.depth = 1;
attributes.slice_h = 1;
scale.height = 0.f;
scale.height = scale.depth = 0.f;
subsurface_count = 1;
required_surface_height = 1;
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
attributes.depth = 1;
scale.depth = 0.f;
subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count();
attributes.slice_h = required_surface_height = attributes.height;
break;
@ -2066,7 +2066,8 @@ namespace rsx
required_surface_height = tex_size / attributes.pitch;
attributes.slice_h = required_surface_height / attributes.depth;
break;
default: break; // TODO
default:
fmt::throw_exception("Unsupported texture dimension %d", static_cast<int>(extended_dimension));
}
if (options.is_compressed_format)

View File

@ -503,7 +503,7 @@ namespace rsx
sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd,
render_target_type texptr,
const image_section_attributes_t& attr,
const size2f& scale,
const size3f& scale,
texture_dimension_extended extended_dimension,
u32 encoded_remap, const texture_channel_remap_t& decoded_remap,
bool surface_is_rop_target,
@ -594,7 +594,7 @@ namespace rsx
sampled_image_descriptor merge_cache_resources(
const surface_store_list_type& fbos, const std::vector<section_storage_type*>& local,
const image_section_attributes_t& attr,
const size2f& scale,
const size3f& scale,
texture_dimension_extended extended_dimension,
u32 /*encoded_remap*/, const texture_channel_remap_t& decoded_remap,
int select_hint = -1)

View File

@ -812,10 +812,10 @@ void GLGSRender::load_program_env()
if (update_fragment_texture_env)
{
// Fragment texture parameters
auto mapping = m_texture_parameters_buffer->alloc_from_heap(256, m_uniform_buffer_offset_align);
auto mapping = m_texture_parameters_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
current_fragment_program.texture_params.write_to(mapping.first, current_fp_metadata.referenced_textures_mask);
m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 256);
m_texture_parameters_buffer->bind_range(GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, mapping.second, 512);
}
if (update_raster_env)

View File

@ -847,44 +847,49 @@ namespace glsl
"#define TEX_NAME(index) tex##index\n"
"#define TEX_NAME_STENCIL(index) tex##index##_stencil\n\n"
"#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x), TEX_FLAGS(index))\n"
"#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, bias), TEX_FLAGS(index))\n"
"#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, lod), TEX_FLAGS(index))\n"
"#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, dpdx, dpdy), TEX_FLAGS(index))\n"
"#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), coord2 * vec2(texture_parameters[index].scale.x, 1.)), TEX_FLAGS(index))\n"
"#define COORD_SCALE1(index, coord1) ((coord1 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.x)\n"
"#define COORD_SCALE2(index, coord2) ((coord2 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xy)\n"
"#define COORD_SCALE3(index, coord3) ((coord3 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xyz)\n\n"
"#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale), TEX_FLAGS(index))\n"
"#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale, bias), TEX_FLAGS(index))\n"
"#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), coord2 * texture_parameters[index].scale, lod), TEX_FLAGS(index))\n"
"#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].scale, dpdx, dpdy), TEX_FLAGS(index))\n"
"#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.)), TEX_FLAGS(index))\n"
"#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index))\n"
"#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index))\n"
"#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index))\n"
"#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index))\n"
"#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord2.x), coord2.y)), TEX_FLAGS(index))\n"
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].scale, texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n";
"#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index))\n"
"#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index))\n"
"#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index))\n"
"#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index))\n"
"#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index))\n"
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), COORD_SCALE2(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n";
if (props.emulate_shadow_compare)
{
OS <<
"#define SHADOW_COORD(coord3, scale, flags) vec3(coord3.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0))\n"
"#define SHADOW_COORD4(coord4, scale, flags) vec4(SHADOW_COORD(coord4.xyz, scale, flags), coord4.w)\n"
"#define SHADOW_COORD_PROJ(coord4, scale, flags) vec4(coord4.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w)\n"
"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(coord3, texture_parameters[index].scale, TEX_FLAGS(index)))\n"
"#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(coord4, texture_parameters[index].scale, TEX_FLAGS(index)))\n"
"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(coord4, texture_parameters[index].scale, TEX_FLAGS(index)))\n";
"#define SHADOW_COORD(index, coord3) vec3(COORD_SCALE2(index, coord3.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0))\n"
"#define SHADOW_COORD4(index, coord4) vec4(SHADOW_COORD(index, coord4.xyz), coord4.w)\n"
"#define SHADOW_COORD_PROJ(index, coord4) vec4(COORD_SCALE2(index, coord4.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w)\n\n"
"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(index, coord3))\n"
"#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(index, coord4))\n"
"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(index, coord4))\n";
}
else
{
OS <<
"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].scale, 1.))\n"
"#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), coord4 * vec3(texture_parameters[index].scale, 1., 1.))\n"
"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.))\n";
"#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), vec3(COORD_SCALE2(index, coord3.xy), coord3.z))\n"
"#define TEX2D_SHADOWCUBE(index, coord4) texture(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w))\n"
"#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w))\n";
}
OS <<
"#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), TEX_FLAGS(index))\n"
"#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), TEX_FLAGS(index))\n"
"#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), TEX_FLAGS(index))\n"
"#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord3, dpdx, dpdy), TEX_FLAGS(index))\n"
"#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), TEX_FLAGS(index))\n\n";
"#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index))\n"
"#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index))\n"
"#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index))\n"
"#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index))\n"
"#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w)), TEX_FLAGS(index))\n\n";
}
if (props.require_wpos)
@ -998,7 +1003,7 @@ namespace glsl
OS <<
"struct sampler_info\n"
"{\n"
" vec2 scale;\n"
" vec4 scale_bias;\n"
" uint remap;\n"
" uint flags;\n"
"};\n"

View File

@ -24,21 +24,21 @@ namespace rsx
case 0:
return;
case 1:
std::memcpy(dst, src, 16); return;
std::memcpy(dst, src, sizeof(TIU_slot)); return;
case 3:
std::memcpy(dst, src, 32); return;
std::memcpy(dst, src, sizeof(TIU_slot) * 2); return;
case 7:
std::memcpy(dst, src, 48); return;
std::memcpy(dst, src, sizeof(TIU_slot) * 3); return;
case 15:
std::memcpy(dst, src, 64); return;
std::memcpy(dst, src, sizeof(TIU_slot) * 4); return;
default:
break;
};
const auto start = std::countr_zero(mask);
const auto end = 16 - std::countl_zero(mask);
const auto mem_offset = (start * 16);
const auto mem_size = (end - start) * 16;
const auto mem_offset = (start * sizeof(TIU_slot));
const auto mem_size = (end - start) * sizeof(TIU_slot);
std::memcpy(static_cast<u8*>(dst) + mem_offset, reinterpret_cast<const u8*>(src) + mem_offset, mem_size);
}

View File

@ -11,15 +11,16 @@ namespace rsx
};
#pragma pack(push, 1)
// NOTE: This structure must be packed to match GPU layout.
// NOTE: This structure must be packed to match GPU layout (std140).
struct fragment_program_texture_config
{
struct TIU_slot
{
float scale_x;
float scale_y;
float scale[3];
float subpixel_bias;
u32 remap;
u32 control;
u32 padding[2];
}
slots_[16]; // QT headers will collide with any variable named 'slots' because reasons

View File

@ -1878,8 +1878,10 @@ namespace rsx
auto &tex = rsx::method_registers.fragment_textures[i];
if (tex.enabled())
{
current_fragment_program.texture_params[i].scale_x = sampler_descriptors[i]->scale_x;
current_fragment_program.texture_params[i].scale_y = sampler_descriptors[i]->scale_y;
current_fragment_program.texture_params[i].scale[0] = sampler_descriptors[i]->scale_x;
current_fragment_program.texture_params[i].scale[1] = sampler_descriptors[i]->scale_y;
current_fragment_program.texture_params[i].scale[2] = sampler_descriptors[i]->scale_z;
current_fragment_program.texture_params[i].subpixel_bias = 0.f;
current_fragment_program.texture_params[i].remap = tex.remap();
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
@ -1899,8 +1901,17 @@ namespace rsx
const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
if (raw_format & CELL_GCM_TEXTURE_UN)
{
current_fp_texture_state.unnormalized_coords |= (1 << i);
if (tex.min_filter() == rsx::texture_minify_filter::nearest ||
tex.mag_filter() == rsx::texture_magnify_filter::nearest)
{
// Subpixel offset so that (X + bias) * scale will round correctly
current_fragment_program.texture_params[i].subpixel_bias = 0.5f;
}
}
if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR)
{
switch (sampler_descriptors[i]->format_class)

View File

@ -1897,12 +1897,12 @@ void VKGSRender::load_program_env()
{
check_heap_status(VK_HEAP_CHECK_TEXTURE_ENV_STORAGE);
auto mem = m_fragment_texture_params_ring_info.alloc<256>(256);
auto buf = m_fragment_texture_params_ring_info.map(mem, 256);
auto mem = m_fragment_texture_params_ring_info.alloc<256>(512);
auto buf = m_fragment_texture_params_ring_info.map(mem, 512);
current_fragment_program.texture_params.write_to(buf, current_fp_metadata.referenced_textures_mask);
m_fragment_texture_params_ring_info.unmap();
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 };
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 512 };
}
if (update_raster_env)