mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-29 22:20:48 +00:00
rsx: Always enable ROP output quantization on NV
This commit is contained in:
parent
e04855a0da
commit
c4b259e0f8
@ -214,7 +214,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
|||||||
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
||||||
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
|
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
|
||||||
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
||||||
m_shader_props.srgb_output_rounding = ::gl::get_driver_caps().vendor_NVIDIA;
|
m_shader_props.ROP_output_rounding = ::gl::get_driver_caps().vendor_NVIDIA;
|
||||||
|
|
||||||
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
||||||
}
|
}
|
||||||
|
@ -427,82 +427,69 @@ namespace glsl
|
|||||||
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
|
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
|
||||||
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
|
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
|
||||||
|
|
||||||
//TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
|
|
||||||
if (props.disable_early_discard)
|
if (props.disable_early_discard)
|
||||||
{
|
{
|
||||||
OS <<
|
OS <<
|
||||||
" if (_fragment_discard)\n"
|
" if (_fragment_discard)\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" discard;\n"
|
" discard;\n"
|
||||||
" }\n"
|
" }\n\n";
|
||||||
" else if ((rop_control & ROP_CMD_MASK) != 0)\n";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
OS << " if ((rop_control & ROP_CMD_MASK) != 0)\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
OS <<
|
|
||||||
" {\n"
|
|
||||||
" const bool alpha_test = _test_bit(rop_control, ALPHA_TEST_ENABLE_BIT);\n"
|
|
||||||
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n";
|
|
||||||
|
|
||||||
if (!props.fp32_outputs)
|
|
||||||
{
|
|
||||||
OS << " const bool srgb_convert = _test_bit(rop_control, SRGB_FRAMEBUFFER_BIT);\n\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (props.emulate_coverage_tests)
|
|
||||||
{
|
|
||||||
OS << " const bool a2c_enabled = _test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT);\n";
|
|
||||||
OS << " const bool msaa_write_enabled = _test_bit(rop_control, MSAA_WRITE_ENABLE_BIT);\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
OS <<
|
|
||||||
" if (alpha_test && !comparison_passes(ROP_quantize(" << reg0 << ").a, alpha_ref, alpha_func))\n"
|
|
||||||
" {\n"
|
|
||||||
" discard;\n"
|
|
||||||
" }\n";
|
|
||||||
|
|
||||||
if (props.emulate_coverage_tests)
|
|
||||||
{
|
|
||||||
OS <<
|
|
||||||
" else if (a2c_enabled && (!msaa_write_enabled || !coverage_test_passes(" << reg0 << ")))\n"
|
|
||||||
" {\n"
|
|
||||||
" discard;\n"
|
|
||||||
" }\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pre-output stages
|
||||||
if (!props.fp32_outputs)
|
if (!props.fp32_outputs)
|
||||||
{
|
{
|
||||||
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
|
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
|
||||||
if (props.supports_native_fp16)
|
const auto vtype = (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4";
|
||||||
{
|
OS <<
|
||||||
OS <<
|
" if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))\n"
|
||||||
" else if (srgb_convert)\n"
|
" {\n"
|
||||||
" {\n"
|
" " << reg0 << " = " << vtype << "(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a);\n"
|
||||||
" " << reg0 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
|
" " << reg1 << " = " << vtype << "(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a);\n"
|
||||||
" " << reg1 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
|
" " << reg2 << " = " << vtype << "(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a);\n"
|
||||||
" " << reg2 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
|
" " << reg3 << " = " << vtype << "(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a);\n"
|
||||||
" " << reg3 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
|
" }\n\n";
|
||||||
" }\n";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
OS <<
|
|
||||||
" else if (srgb_convert)\n"
|
|
||||||
" {\n"
|
|
||||||
" " << reg0 << " = round_srgb8(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
|
|
||||||
" " << reg1 << " = round_srgb8(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
|
|
||||||
" " << reg2 << " = round_srgb8(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
|
|
||||||
" " << reg3 << " = round_srgb8(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
|
|
||||||
" }\n";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OS <<
|
// Output conversion
|
||||||
" }\n\n"
|
if (props.ROP_output_rounding)
|
||||||
|
{
|
||||||
|
OS <<
|
||||||
|
" if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
|
||||||
|
" {\n"
|
||||||
|
" " << reg0 << " = round_to_8bit(" << reg0 << ");\n"
|
||||||
|
" " << reg1 << " = round_to_8bit(" << reg1 << ");\n"
|
||||||
|
" " << reg2 << " = round_to_8bit(" << reg2 << ");\n"
|
||||||
|
" " << reg3 << " = round_to_8bit(" << reg3 << ");\n"
|
||||||
|
" }\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Post-output stages
|
||||||
|
// TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
|
||||||
|
OS <<
|
||||||
|
// Alpha Testing
|
||||||
|
" if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))\n"
|
||||||
|
" {\n"
|
||||||
|
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n"
|
||||||
|
" if (!comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func)) discard;\n"
|
||||||
|
" }\n\n";
|
||||||
|
|
||||||
|
// ALPHA_TO_COVERAGE
|
||||||
|
if (props.emulate_coverage_tests)
|
||||||
|
{
|
||||||
|
OS <<
|
||||||
|
" if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))\n"
|
||||||
|
" {\n"
|
||||||
|
" if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) ||\n"
|
||||||
|
" !coverage_test_passes(" << reg0 << "))\n"
|
||||||
|
" {\n"
|
||||||
|
" discard;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit
|
||||||
|
OS <<
|
||||||
" ocol0 = " << reg0 << ";\n"
|
" ocol0 = " << reg0 << ";\n"
|
||||||
" ocol1 = " << reg1 << ";\n"
|
" ocol1 = " << reg1 << ";\n"
|
||||||
" ocol2 = " << reg2 << ";\n"
|
" ocol2 = " << reg2 << ";\n"
|
||||||
@ -546,17 +533,7 @@ namespace glsl
|
|||||||
{
|
{
|
||||||
const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)";
|
const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)";
|
||||||
const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)";
|
const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)";
|
||||||
OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n";
|
OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n\n";
|
||||||
}
|
|
||||||
|
|
||||||
if (!props.fp32_outputs && props.srgb_output_rounding)
|
|
||||||
{
|
|
||||||
OS << "#define round_srgb8 round_to_8bit\n\n";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We can get the 8-bit rounding for free on non-NVIDIA hardware
|
|
||||||
OS << "#define round_srgb8(v4) (v4)\n\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OS << "// Workaround for broken early discard in some drivers\n";
|
OS << "// Workaround for broken early discard in some drivers\n";
|
||||||
@ -593,21 +570,6 @@ namespace glsl
|
|||||||
OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n";
|
OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n";
|
||||||
OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n";
|
OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
OS << fmt::replace_all(
|
|
||||||
"$Ty ROP_quantize(const in $Ty v)\n"
|
|
||||||
"{\n"
|
|
||||||
" if (!_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
|
|
||||||
" {\n"
|
|
||||||
" return v;\n"
|
|
||||||
" }\n"
|
|
||||||
"\n"
|
|
||||||
" return round_to_8bit(v);\n"
|
|
||||||
"}\n",
|
|
||||||
{
|
|
||||||
{ "$Ty"sv, (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4"}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (props.require_lit_emulation)
|
if (props.require_lit_emulation)
|
||||||
@ -1175,7 +1137,6 @@ namespace glsl
|
|||||||
" vec4 scale_bias;\n"
|
" vec4 scale_bias;\n"
|
||||||
" uint remap;\n"
|
" uint remap;\n"
|
||||||
" uint flags;\n"
|
" uint flags;\n"
|
||||||
"};\n"
|
"};\n\n";
|
||||||
"\n";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -39,6 +39,6 @@ namespace glsl
|
|||||||
bool low_precision_tests : 1;
|
bool low_precision_tests : 1;
|
||||||
bool disable_early_discard : 1;
|
bool disable_early_discard : 1;
|
||||||
bool supports_native_fp16 : 1;
|
bool supports_native_fp16 : 1;
|
||||||
bool srgb_output_rounding : 1;
|
bool ROP_output_rounding : 1;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -273,7 +273,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
|||||||
m_shader_props.low_precision_tests = device_props.has_low_precision_rounding;
|
m_shader_props.low_precision_tests = device_props.has_low_precision_rounding;
|
||||||
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
|
m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA;
|
||||||
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
|
||||||
m_shader_props.srgb_output_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
m_shader_props.ROP_output_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
||||||
|
|
||||||
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
glsl::insert_glsl_legacy_function(OS, m_shader_props);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user