rsx: Enable support for extended range in depth buffer

- Software clipping emulation is used here as OpenGL does not have explicit clip control.
- Hardware clip control for vulkan to be enabled after this.
This commit is contained in:
kd-11 2020-08-16 12:48:20 +03:00 committed by kd-11
parent 2e88924cb9
commit dc465df3bc
12 changed files with 290 additions and 222 deletions

View File

@ -573,20 +573,37 @@ namespace glsl
"}\n\n";
}
if (props.domain == glsl::program_domain::glsl_vertex_program)
if (props.domain == glsl::program_domain::glsl_vertex_program && props.emulate_zclip_transform)
{
OS <<
"vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n"
"{\n"
" float d = pos.z / pos.w;\n"
" if (d < 0.f && d >= near_plane)\n"
" d = 0.f;\n" //force clamp negative values
" else if (d > 1.f && d <= far_plane)\n"
" d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n"
" else\n"
" return pos; //d = (0.99 * d);\n" //range compression for normal values is disabled until a solution to ops comparing z is found
"\n"
" return vec4(pos.x, pos.y, d * pos.w, pos.w);\n"
" float d = pos.z / pos.w;\n";
if (!props.emulate_depth_clip_only)
{
OS <<
" if (d < 0.f && d >= near_plane)\n"
" d = 0.f;\n" //force clamp negative values
" else if (d > 1.f && d <= far_plane)\n"
" d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n"
" else\n"
" return pos; //d = (0.99 * d);\n" //range compression for normal values is disabled until a solution to ops comparing z is found
"\n"
" return vec4(pos.x, pos.y, d * pos.w, pos.w);\n";
}
else
{
// Technically the depth value here is the 'final' depth that should be stored in the Z buffer.
// Forward mapping eqn is d' = d * (f - n) + n, where d' is the stored Z value (this) and d is the normalized API value.
OS <<
" double inv_range = double(1.0) / double(far_plane - near_plane);\n"
" double new_d = (double(d) - double(near_plane)) * inv_range;\n"
"\n"
" return vec4(pos.x, pos.y, float(new_d * pos.w), pos.w);\n";
}
OS <<
"}\n\n";
return;

View File

@ -30,6 +30,8 @@ namespace glsl
bool require_texture_expand;
bool emulate_coverage_tests;
bool emulate_shadow_compare;
bool emulate_zclip_transform;
bool emulate_depth_clip_only;
bool low_precision_tests;
bool disable_early_discard;
bool supports_native_fp16;

View File

@ -167,6 +167,11 @@ void GLGSRender::update_draw_state()
gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
}
if (gl::get_driver_caps().NV_depth_buffer_float_supported)
{
gl_state.depth_range(rsx::method_registers.clip_min(), rsx::method_registers.clip_max());
}
gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER);
if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST))

View File

@ -7,6 +7,204 @@
namespace gl
{
class capabilities
{
public:
bool EXT_dsa_supported = false;
bool EXT_depth_bounds_test = false;
bool ARB_dsa_supported = false;
bool ARB_buffer_storage_supported = false;
bool ARB_texture_buffer_supported = false;
bool ARB_shader_draw_parameters_supported = false;
bool ARB_depth_buffer_float_supported = false;
bool ARB_texture_barrier_supported = false;
bool NV_texture_barrier_supported = false;
bool NV_gpu_shader5_supported = false;
bool AMD_gpu_shader_half_float_supported = false;
bool ARB_compute_shader_supported = false;
bool NV_depth_buffer_float_supported = false;
bool initialized = false;
bool vendor_INTEL = false; // has broken GLSL compiler
bool vendor_AMD = false; // has broken ARB_multidraw
bool vendor_NVIDIA = false; // has NaN poisoning issues
bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers
bool check(const std::string& ext_name, const char* test)
{
if (ext_name == test)
{
rsx_log.notice("Extension %s is supported", ext_name);
return true;
}
return false;
}
void initialize()
{
int find_count = 13;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
std::string vendor_string = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
std::string version_string = reinterpret_cast<const char*>(glGetString(GL_VERSION));
std::string renderer_string = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
for (int i = 0; i < ext_count; i++)
{
if (!find_count) break;
const std::string ext_name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
if (check(ext_name, "GL_ARB_shader_draw_parameters"))
{
ARB_shader_draw_parameters_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_direct_state_access"))
{
EXT_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_direct_state_access"))
{
ARB_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_buffer_storage"))
{
ARB_buffer_storage_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_buffer_object"))
{
ARB_texture_buffer_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_depth_buffer_float"))
{
ARB_depth_buffer_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_barrier"))
{
ARB_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_texture_barrier"))
{
NV_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_gpu_shader5"))
{
NV_gpu_shader5_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_AMD_gpu_shader_half_float"))
{
AMD_gpu_shader_half_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_compute_shader"))
{
ARB_compute_shader_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_depth_bounds_test"))
{
EXT_depth_bounds_test = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_depth_buffer_float"))
{
NV_depth_buffer_float_supported = true;
find_count--;
continue;
}
}
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
{
vendor_MESA = true;
}
// Workaround for intel drivers which have terrible capability reporting
if (!vendor_string.empty())
{
std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower);
}
else
{
rsx_log.error("Failed to get vendor string from driver. Are we missing a context?");
vendor_string = "intel"; // lowest acceptable value
}
if (!vendor_MESA && vendor_string.find("intel") != umax)
{
int version_major = 0;
int version_minor = 0;
glGetIntegerv(GL_MAJOR_VERSION, &version_major);
glGetIntegerv(GL_MINOR_VERSION, &version_minor);
vendor_INTEL = true;
//Texture buffers moved into core at GL 3.3
if (version_major > 3 || (version_major == 3 && version_minor >= 3))
ARB_texture_buffer_supported = true;
//Check for expected library entry-points for some required functions
if (!ARB_buffer_storage_supported && glBufferStorage && glMapBufferRange)
ARB_buffer_storage_supported = true;
if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange)
ARB_dsa_supported = true;
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
EXT_dsa_supported = true;
}
else if (!vendor_MESA && vendor_string.find("nvidia") != umax)
{
vendor_NVIDIA = true;
}
#ifdef _WIN32
else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax)
{
vendor_AMD = true;
}
#endif
initialized = true;
}
};
const capabilities& get_driver_caps();
struct driver_state
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
@ -166,7 +364,14 @@ namespace gl
if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max))
{
glDepthBoundsEXT(min, max);
if (get_driver_caps().NV_depth_buffer_float_supported)
{
glDepthBoundsdNV(min, max);
}
else
{
glDepthBoundsEXT(min, max);
}
properties[DEPTH_BOUNDS_MIN] = depth_min;
properties[DEPTH_BOUNDS_MAX] = depth_max;
@ -180,7 +385,14 @@ namespace gl
if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max))
{
glDepthRange(min, max);
if (get_driver_caps().NV_depth_buffer_float_supported)
{
glDepthRangedNV(min, max);
}
else
{
glDepthRange(min, max);
}
properties[DEPTH_RANGE_MIN] = depth_min;
properties[DEPTH_RANGE_MAX] = depth_max;

View File

@ -167,7 +167,7 @@ namespace gl
glDebugMessageCallback(log_debug, nullptr);
}
capabilities &get_driver_caps()
const capabilities& get_driver_caps()
{
if (!g_driver_caps.initialized)
g_driver_caps.initialize();

View File

@ -52,11 +52,7 @@ namespace gl
else\
gl##func##EXT(texture_name, target, __VA_ARGS__);
class capabilities;
class blitter;
void enable_debugging();
capabilities& get_driver_caps();
bool is_primitive_native(rsx::primitive_type in);
GLenum draw_mode(rsx::primitive_type in);
@ -78,194 +74,6 @@ namespace gl
}
};
class capabilities
{
public:
bool EXT_dsa_supported = false;
bool EXT_depth_bounds_test = false;
bool ARB_dsa_supported = false;
bool ARB_buffer_storage_supported = false;
bool ARB_texture_buffer_supported = false;
bool ARB_shader_draw_parameters_supported = false;
bool ARB_depth_buffer_float_supported = false;
bool ARB_texture_barrier_supported = false;
bool NV_texture_barrier_supported = false;
bool NV_gpu_shader5_supported = false;
bool AMD_gpu_shader_half_float_supported = false;
bool ARB_compute_shader_supported = false;
bool initialized = false;
bool vendor_INTEL = false; // has broken GLSL compiler
bool vendor_AMD = false; // has broken ARB_multidraw
bool vendor_NVIDIA = false; // has NaN poisoning issues
bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers
bool check(const std::string& ext_name, const char* test)
{
if (ext_name == test)
{
rsx_log.notice("Extension %s is supported", ext_name);
return true;
}
return false;
}
void initialize()
{
int find_count = 12;
int ext_count = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
std::string vendor_string = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
std::string version_string = reinterpret_cast<const char*>(glGetString(GL_VERSION));
std::string renderer_string = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
for (int i = 0; i < ext_count; i++)
{
if (!find_count) break;
const std::string ext_name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i));
if (check(ext_name, "GL_ARB_shader_draw_parameters"))
{
ARB_shader_draw_parameters_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_direct_state_access"))
{
EXT_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_direct_state_access"))
{
ARB_dsa_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_buffer_storage"))
{
ARB_buffer_storage_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_buffer_object"))
{
ARB_texture_buffer_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_depth_buffer_float"))
{
ARB_depth_buffer_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_texture_barrier"))
{
ARB_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_texture_barrier"))
{
NV_texture_barrier_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_NV_gpu_shader5"))
{
NV_gpu_shader5_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_AMD_gpu_shader_half_float"))
{
AMD_gpu_shader_half_float_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_ARB_compute_shader"))
{
ARB_compute_shader_supported = true;
find_count--;
continue;
}
if (check(ext_name, "GL_EXT_depth_bounds_test"))
{
EXT_depth_bounds_test = true;
find_count--;
continue;
}
}
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
if (version_string.find("Mesa") != umax || renderer_string.find("Mesa") != umax)
{
vendor_MESA = true;
}
// Workaround for intel drivers which have terrible capability reporting
if (!vendor_string.empty())
{
std::transform(vendor_string.begin(), vendor_string.end(), vendor_string.begin(), ::tolower);
}
else
{
rsx_log.error("Failed to get vendor string from driver. Are we missing a context?");
vendor_string = "intel"; // lowest acceptable value
}
if (!vendor_MESA && vendor_string.find("intel") != umax)
{
int version_major = 0;
int version_minor = 0;
glGetIntegerv(GL_MAJOR_VERSION, &version_major);
glGetIntegerv(GL_MINOR_VERSION, &version_minor);
vendor_INTEL = true;
//Texture buffers moved into core at GL 3.3
if (version_major > 3 || (version_major == 3 && version_minor >= 3))
ARB_texture_buffer_supported = true;
//Check for expected library entry-points for some required functions
if (!ARB_buffer_storage_supported && glBufferStorage && glMapBufferRange)
ARB_buffer_storage_supported = true;
if (!ARB_dsa_supported && glGetTextureImage && glTextureBufferRange)
ARB_dsa_supported = true;
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
EXT_dsa_supported = true;
}
else if (!vendor_MESA && vendor_string.find("nvidia") != umax)
{
vendor_NVIDIA = true;
}
#ifdef _WIN32
else if (vendor_string.find("amd") != umax || vendor_string.find("ati") != umax)
{
vendor_AMD = true;
}
#endif
initialized = true;
}
};
class fence
{
GLsync m_value = nullptr;

View File

@ -246,6 +246,10 @@ OPENGL_PROC(PFNGLMEMORYBARRIERPROC, MemoryBarrier);
// ARB_compute_shader
OPENGL_PROC(PFNGLDISPATCHCOMPUTEPROC, DispatchCompute);
// NV_depth_buffer_float
OPENGL_PROC(PFNGLDEPTHRANGEDNVPROC, DepthRangedNV);
OPENGL_PROC(PFNGLDEPTHBOUNDSDNVPROC, DepthBoundsdNV);
WGL_PROC(PFNWGLSWAPINTERVALEXTPROC, SwapIntervalEXT);
#if !defined(__GNUG__) || defined(__MINGW32__)

View File

@ -124,6 +124,8 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
glsl::shader_properties properties2{};
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.emulate_zclip_transform = true;
properties2.emulate_depth_clip_only = dev_caps.NV_depth_buffer_float_supported;
insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false);

View File

@ -108,15 +108,26 @@ void VKGSRender::update_draw_state()
if (m_device->get_depth_bounds_support())
{
f32 bounds_min, bounds_max;
if (rsx::method_registers.depth_bounds_test_enabled())
{
//Update depth bounds min/max
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
// Update depth bounds min/max
bounds_min = rsx::method_registers.depth_bounds_min();
bounds_max = rsx::method_registers.depth_bounds_max();
}
else
{
vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f);
bounds_min = rsx::method_registers.clip_min();
bounds_max = rsx::method_registers.clip_max();
}
if (!m_device->get_unrestricted_depth_range_support())
{
bounds_min = std::clamp(bounds_min, 0.f, 1.f);
bounds_max = std::clamp(bounds_max, 0.f, 1.f);
}
vkCmdSetDepthBounds(*m_current_command_buffer, bounds_min, bounds_max);
}
bind_viewport();

View File

@ -955,14 +955,25 @@ void VKGSRender::set_viewport()
{
const auto clip_width = rsx::apply_resolution_scale(rsx::method_registers.surface_clip_width(), true);
const auto clip_height = rsx::apply_resolution_scale(rsx::method_registers.surface_clip_height(), true);
const auto zclip_near = rsx::method_registers.clip_min();
const auto zclip_far = rsx::method_registers.clip_max();
//NOTE: The scale_offset matrix already has viewport matrix factored in
m_viewport.x = 0;
m_viewport.y = 0;
m_viewport.width = clip_width;
m_viewport.height = clip_height;
m_viewport.minDepth = 0.f;
m_viewport.maxDepth = 1.f;
if (m_device->get_unrestricted_depth_range_support())
{
m_viewport.minDepth = zclip_near;
m_viewport.maxDepth = zclip_far;
}
else
{
m_viewport.minDepth = 0.f;
m_viewport.maxDepth = 1.f;
}
}
void VKGSRender::set_scissor(bool clip_viewport)

View File

@ -636,7 +636,7 @@ namespace vk
VkPhysicalDeviceDriverPropertiesKHR driver_properties{};
bool stencil_export_support = false;
bool conditional_render_support = false;
bool host_query_reset_support = false;
bool unrestricted_depth_range_support = false;
friend class render_device;
private:
@ -687,7 +687,7 @@ private:
stencil_export_support = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
conditional_render_support = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
host_query_reset_support = device_extensions.is_supported(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
unrestricted_depth_range_support = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
public:
@ -863,7 +863,6 @@ private:
// Exported device endpoints
PFN_vkCmdBeginConditionalRenderingEXT cmdBeginConditionalRenderingEXT = nullptr;
PFN_vkCmdEndConditionalRenderingEXT cmdEndConditionalRenderingEXT = nullptr;
PFN_vkResetQueryPoolEXT resetQueryPoolEXT = nullptr;
public:
render_device() = default;
@ -903,9 +902,9 @@ private:
requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
}
if (pgpu->host_query_reset_support)
if (pgpu->unrestricted_depth_range_support)
{
requested_extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
@ -998,11 +997,6 @@ private:
cmdEndConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdEndConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"));
}
if (pgpu->host_query_reset_support)
{
resetQueryPoolEXT = reinterpret_cast<PFN_vkResetQueryPoolEXT>(vkGetDeviceProcAddr(dev, "vkResetQueryPoolEXT"));
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
@ -1113,9 +1107,9 @@ private:
return pgpu->conditional_render_support;
}
bool get_host_query_reset_support() const
bool get_unrestricted_depth_range_support() const
{
return pgpu->host_query_reset_support;
return pgpu->unrestricted_depth_range_support;
}
mem_allocator_base* get_allocator() const

View File

@ -185,6 +185,8 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
glsl::shader_properties properties2{};
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.emulate_zclip_transform = true;
properties2.emulate_depth_clip_only = true;
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_spirv);