rsx: Implement packed format renormalization

- Renormalizes arbitrary N-bit values as 8-bit normalized.
- NV hardware performs integer normalization at 8 bits if the size is less than 8.
- This can cause significant arithmetic drift because the error is multiplied by a huge number when sampling.
This commit is contained in:
kd-11 2019-10-13 22:37:10 +03:00 committed by kd-11
parent 407be45069
commit f7842b765f
5 changed files with 50 additions and 10 deletions

View File

@ -669,7 +669,7 @@ namespace glsl
"}\n\n"
//TODO: Move all the texture read control operations here
"vec4 process_texel(const in vec4 rgba, const in uint control_bits)\n"
"vec4 process_texel(in vec4 rgba, const in uint control_bits)\n"
"{\n"
#ifdef __APPLE__
" uint remap_bits = (control_bits >> 16) & 0xFFFF;\n"
@ -690,6 +690,13 @@ namespace glsl
" }\n"
" }\n"
"\n"
" if ((control_bits & 0x20) != 0)\n"
" {\n"
" // Renormalize to 8-bit (PS3) accuracy\n"
" rgba = floor(rgba * 255.);\n"
" rgba /= 255.;"
" }\n"
"\n"
" //TODO: Verify gamma control bit ordering, looks to be 0x7 for rgb, 0xF for rgba\n"
" uvec4 mask = uvec4(control_bits & 0xF) & uvec4(0x1, 0x2, 0x4, 0x8);\n"
" vec4 convert = srgb_to_linear(rgba);\n"

View File

@ -37,8 +37,8 @@ GLGSRender::GLGSRender() : GSRender()
else
m_vertex_cache = std::make_unique<gl::weak_vertex_cache>();
supports_hw_a2c = false;
supports_multidraw = true;
backend_config.supports_hw_a2c = false;
backend_config.supports_multidraw = true;
}
extern CellGcmContextData current_context;
@ -826,7 +826,7 @@ void GLGSRender::on_init_thread()
m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000, nullptr, gl::buffer::memory_type::host_visible);
// Initialize with 256k identity entries
auto *dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write);
auto* dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write);
for (u32 n = 0; n < (0x100000 >> 2); ++n)
{
dst[n] = n;
@ -834,6 +834,12 @@ void GLGSRender::on_init_thread()
m_identity_index_buffer->unmap();
}
else if (gl_caps.vendor_NVIDIA)
{
// NOTE: On NVIDIA cards going back decades (including the PS3) there is a slight normalization inaccuracy in compressed formats.
// Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data.
backend_config.supports_hw_renormalization = true;
}
m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));

View File

@ -710,7 +710,7 @@ namespace rsx
auto alpha_ref = rsx::method_registers.alpha_ref() / 255.f;
auto rop_control = rsx::method_registers.alpha_test_enabled()? 1u : 0u;
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !supports_hw_a2c)
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c)
{
// Alpha values generate a coverage mask for order independent blending
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
@ -1739,6 +1739,22 @@ namespace rsx
LOG_ERROR(RSX, "Depth texture bound to pipeline with unexpected format 0x%X", format);
}
}
else if (!backend_config.supports_hw_renormalization)
{
switch (format)
{
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_R6G5B5:
texture_control |= (1 << 5);
break;
default:
break;
}
}
if (const auto srgb_mask = tex.gamma())
{

View File

@ -464,6 +464,13 @@ namespace rsx
}
};
struct backend_configuration
{
bool supports_multidraw; // Draw call batching
bool supports_hw_a2c; // Alpha to coverage
bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour
};
struct sampled_image_descriptor_base;
class thread
@ -484,8 +491,7 @@ namespace rsx
bool skip_current_frame = false;
frame_statistics_t stats{};
bool supports_multidraw = false; // Draw call batching
bool supports_hw_a2c = false; // Alpha to coverage
backend_configuration backend_config{};
// FIFO
std::unique_ptr<FIFO::FIFO_control> fifo_ctrl;

View File

@ -528,10 +528,15 @@ VKGSRender::VKGSRender() : GSRender()
m_ui_renderer = std::make_unique<vk::ui_overlay_renderer>();
m_ui_renderer->create(*m_current_command_buffer, m_texture_upload_buffer_ring_info);
supports_multidraw = true;
backend_config.supports_multidraw = true;
// NOTE: We do not actually need multiple sample support for A2C to work
// This is here for visual consistency - will be removed when AA problems due to mipmaps are fixed
supports_hw_a2c = (g_cfg.video.antialiasing_level != msaa_level::none);
backend_config.supports_hw_a2c = (g_cfg.video.antialiasing_level != msaa_level::none);
// NOTE: On NVIDIA cards going back decades (including the PS3) there is a slight normalization inaccuracy in compressed formats.
// Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data.
backend_config.supports_hw_renormalization = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA);
}
VKGSRender::~VKGSRender()
@ -2573,7 +2578,7 @@ bool VKGSRender::load_program()
}
const auto rasterization_samples = u8((m_current_renderpass_key >> 16) & 0xF);
if (supports_hw_a2c || rasterization_samples > 1)
if (backend_config.supports_hw_a2c || rasterization_samples > 1)
{
properties.state.set_multisample_state(
rasterization_samples,