rsx: Improve ROP output handling

- Perform 8-bit quantization/rounding before emulated operations like ALPHA_TEST
This commit is contained in:
kd-11 2022-11-16 17:37:49 +03:00 committed by kd-11
parent 8199f97e7a
commit e04855a0da
4 changed files with 134 additions and 69 deletions

View File

@ -404,7 +404,7 @@ namespace glsl
void insert_rop_init(std::ostream& OS)
{
OS <<
" if (_test_bit(rop_control, 9))\n"
" if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n"
" {\n"
" // Convert x,y to linear address\n"
" const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n"
@ -435,30 +435,31 @@ namespace glsl
" {\n"
" discard;\n"
" }\n"
" else if (_get_bits(rop_control, 0, 8) != 0)\n";
" else if ((rop_control & ROP_CMD_MASK) != 0)\n";
}
else
{
OS << " if (_get_bits(rop_control, 0, 8) != 0)\n";
OS << " if ((rop_control & ROP_CMD_MASK) != 0)\n";
}
OS <<
" {\n"
" const bool alpha_test = _test_bit(rop_control, 0);\n"
" const uint alpha_func = _get_bits(rop_control, 16, 3);\n";
" const bool alpha_test = _test_bit(rop_control, ALPHA_TEST_ENABLE_BIT);\n"
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n";
if (!props.fp32_outputs)
{
OS << " const bool srgb_convert = _test_bit(rop_control, 1);\n\n";
OS << " const bool srgb_convert = _test_bit(rop_control, SRGB_FRAMEBUFFER_BIT);\n\n";
}
if (props.emulate_coverage_tests)
{
OS << " const bool a2c_enabled = _test_bit(rop_control, 4);\n";
OS << " const bool a2c_enabled = _test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT);\n";
OS << " const bool msaa_write_enabled = _test_bit(rop_control, MSAA_WRITE_ENABLE_BIT);\n";
}
OS <<
" if (alpha_test && !comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func))\n"
" if (alpha_test && !comparison_passes(ROP_quantize(" << reg0 << ").a, alpha_ref, alpha_func))\n"
" {\n"
" discard;\n"
" }\n";
@ -466,7 +467,7 @@ namespace glsl
if (props.emulate_coverage_tests)
{
OS <<
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
" else if (a2c_enabled && (!msaa_write_enabled || !coverage_test_passes(" << reg0 << ")))\n"
" {\n"
" discard;\n"
" }\n";
@ -480,10 +481,10 @@ namespace glsl
OS <<
" else if (srgb_convert)\n"
" {\n"
" " << reg0 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
" " << reg1 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
" " << reg2 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
" " << reg3 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
" " << reg0 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
" " << reg1 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
" " << reg2 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
" " << reg3 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
" }\n";
}
else
@ -491,10 +492,10 @@ namespace glsl
OS <<
" else if (srgb_convert)\n"
" {\n"
" " << reg0 << " = round_to_8bit(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
" " << reg1 << " = round_to_8bit(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
" " << reg2 << " = round_to_8bit(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
" " << reg3 << " = round_to_8bit(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
" " << reg0 << " = round_srgb8(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
" " << reg1 << " = round_srgb8(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
" " << reg2 << " = round_srgb8(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
" " << reg3 << " = round_srgb8(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
" }\n";
}
}
@ -528,8 +529,37 @@ namespace glsl
if (props.domain == glsl::program_domain::glsl_fragment_program)
{
OS << "// Workaround for broken early discard in some drivers\n";
OS << "// ROP control\n";
OS << "#define ALPHA_TEST_ENABLE_BIT " << rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT << "\n";
OS << "#define SRGB_FRAMEBUFFER_BIT " << rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT << "\n";
OS << "#define ALPHA_TO_COVERAGE_ENABLE_BIT " << rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT << "\n";
OS << "#define MSAA_WRITE_ENABLE_BIT " << rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT << "\n";
OS << "#define INT_FRAMEBUFFER_BIT " << rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT << "\n";
OS << "#define POLYGON_STIPPLE_ENABLE_BIT " << rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT << "\n";
OS << "#define ALPHA_TEST_FUNC_OFFSET " << rsx::ROP_control_bits::ALPHA_FUNC_OFFSET << "\n";
OS << "#define ALPHA_TEST_FUNC_LENGTH " << rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS << "\n";
OS << "#define MSAA_SAMPLE_CTRL_OFFSET " << rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET << "\n";
OS << "#define MSAA_SAMPLE_CTRL_LENGTH " << rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS << "\n";
OS << "#define ROP_CMD_MASK " << rsx::ROP_control_bits::ROP_CMD_MASK << "\n\n";
// 8-bit rounding/quantization
{
const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)";
const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)";
OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n";
}
if (!props.fp32_outputs && props.srgb_output_rounding)
{
OS << "#define round_srgb8 round_to_8bit\n\n";
}
else
{
// We can get the 8-bit rounding for free on non-NVIDIA hardware
OS << "#define round_srgb8(v4) (v4)\n\n";
}
OS << "// Workaround for broken early discard in some drivers\n";
if (props.disable_early_discard)
{
OS << "bool _fragment_discard = false;\n";
@ -540,21 +570,6 @@ namespace glsl
OS << "#define _kill() discard\n\n";
}
if (!props.fp32_outputs)
{
OS << "// Workaround broken output rounding behavior\n";
if (props.srgb_output_rounding)
{
const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)";
const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)";
OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n\n";
}
else
{
OS << "#define round_to_8bit(v4) (v4)\n\n";
}
}
if (props.require_texture_ops)
{
// Declare special texture control flags
@ -567,17 +582,32 @@ namespace glsl
OS << "#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n";
OS << "#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n";
OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n";
OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n";
OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n";
OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n";
OS << "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n";
OS << "#define DEPTH_COMPARE " << rsx::texture_control_bits::DEPTH_COMPARE_OP << "\n";
OS << "#define FILTERED_MAG_BIT " << rsx::texture_control_bits::FILTERED_MAG << "\n";
OS << "#define FILTERED_MIN_BIT " << rsx::texture_control_bits::FILTERED_MIN << "\n";
OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n";
OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n";
OS << "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n";
OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n";
OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n";
}
OS << fmt::replace_all(
"$Ty ROP_quantize(const in $Ty v)\n"
"{\n"
" if (!_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
" {\n"
" return v;\n"
" }\n"
"\n"
" return round_to_8bit(v);\n"
"}\n",
{
{ "$Ty"sv, (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4"}
}
);
}
if (props.require_lit_emulation)
@ -667,10 +697,8 @@ namespace glsl
{
// Purely stochastic
OS <<
"bool coverage_test_passes(const in vec4 _sample, const in uint control)\n"
"bool coverage_test_passes(const in vec4 _sample)\n"
"{\n"
" if (!_test_bit(control, 0)) return false;\n"
"\n"
" float random = _rand(gl_FragCoord);\n"
" return (_sample.a > random);\n"
"}\n\n";

View File

@ -31,6 +31,46 @@ namespace rsx
EXPAND_MASK = (1 << EXPAND_R) | (1 << EXPAND_G) | (1 << EXPAND_B) | (1 << EXPAND_A),
EXPAND_OFFSET = EXPAND_A
};
enum ROP_control_bits : u32
{
// Commands. These trigger explicit action.
ALPHA_TEST_ENABLE_BIT = 0,
SRGB_FRAMEBUFFER_BIT = 1,
ALPHA_TO_COVERAGE_ENABLE_BIT = 2,
POLYGON_STIPPLE_ENABLE_BIT = 3,
// Auxilliary config
INT_FRAMEBUFFER_BIT = 16,
MSAA_WRITE_ENABLE_BIT = 17,
// Data
ALPHA_FUNC_OFFSET = 18,
MSAA_SAMPLE_CTRL_OFFSET = 21,
// Data lengths
ALPHA_FUNC_NUM_BITS = 3,
MSAA_SAMPLE_CTRL_NUM_BITS = 2,
// Meta
ROP_CMD_MASK = 0xF // Commands are encoded in the lower 16 bits
};
struct ROP_control_t
{
u32 value = 0;
void enable_alpha_test() { value |= (1u << ROP_control_bits::ALPHA_TEST_ENABLE_BIT); }
void enable_framebuffer_sRGB() { value |= (1u << ROP_control_bits::SRGB_FRAMEBUFFER_BIT); }
void enable_alpha_to_coverage() { value |= (1u << ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT); }
void enable_polygon_stipple() { value |= (1u << ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT); }
void enable_framebuffer_INT() { value |= (1u << ROP_control_bits::INT_FRAMEBUFFER_BIT); }
void enable_MSAA_writes() { value |= (1u << ROP_control_bits::MSAA_WRITE_ENABLE_BIT); }
void set_alpha_test_func(uint func) { value |= (func << ROP_control_bits::ALPHA_FUNC_OFFSET); }
void set_msaa_control(uint ctrl) { value |= (ctrl << ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET); }
};
}
namespace program_common

View File

@ -1050,18 +1050,18 @@ namespace rsx
void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/)
{
u32 rop_control = 0u;
ROP_control_t rop_control{};
if (rsx::method_registers.alpha_test_enabled())
{
const u32 alpha_func = static_cast<u32>(rsx::method_registers.alpha_func());
rop_control |= (alpha_func << 16);
rop_control |= ROP_control::alpha_test_enable;
rop_control.set_alpha_test_func(alpha_func);
rop_control.enable_alpha_test();
}
if (rsx::method_registers.polygon_stipple_enabled())
{
rop_control |= ROP_control::polygon_stipple_enable;
rop_control.enable_polygon_stipple();
}
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c)
@ -1070,8 +1070,11 @@ namespace rsx
// Alpha values generate a coverage mask for order independent blending
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
// Simulated using combined alpha blend and alpha test
if (rsx::method_registers.msaa_sample_mask()) rop_control |= ROP_control::msaa_mask_enable;
rop_control |= ROP_control::csaa_enable;
rop_control.enable_alpha_to_coverage();
if (rsx::method_registers.msaa_sample_mask())
{
rop_control.enable_MSAA_writes();
}
// Sample configuration bits
switch (rsx::method_registers.surface_antialias())
@ -1079,10 +1082,10 @@ namespace rsx
case rsx::surface_antialiasing::center_1_sample:
break;
case rsx::surface_antialiasing::diagonal_centered_2_samples:
rop_control |= 1u << 6;
rop_control.set_msaa_control(1u);
break;
default:
rop_control |= 3u << 6;
rop_control.set_msaa_control(3u);
break;
}
}
@ -1091,19 +1094,24 @@ namespace rsx
const f32 fog1 = rsx::method_registers.fog_params_1();
const u32 fog_mode = static_cast<u32>(rsx::method_registers.fog_equation());
if (rsx::method_registers.framebuffer_srgb_enabled())
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (rsx::method_registers.surface_color())
{
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (rsx::method_registers.surface_color())
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
// These behave very differently from "normal" formats.
break;
default:
// Integer framebuffer formats.
rop_control.enable_framebuffer_INT();
// Check if we want sRGB conversion.
if (rsx::method_registers.framebuffer_srgb_enabled())
{
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
break;
default:
rop_control |= ROP_control::framebuffer_srgb_enable;
break;
rop_control.enable_framebuffer_sRGB();
}
break;
}
// Generate wpos coefficients
@ -1120,7 +1128,7 @@ namespace rsx
const f32 alpha_ref = rsx::method_registers.alpha_ref();
u32 *dst = static_cast<u32*>(buffer);
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control, std::bit_cast<u32>(alpha_ref));
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
}

View File

@ -198,17 +198,6 @@ namespace rsx
result_zcull_intr = 2
};
enum ROP_control : u32
{
alpha_test_enable = (1u << 0),
framebuffer_srgb_enable = (1u << 1),
csaa_enable = (1u << 4),
msaa_mask_enable = (1u << 5),
msaa_config_mask = (3u << 6),
polygon_stipple_enable = (1u << 9),
alpha_func_mask = (7u << 16)
};
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
u32 get_address(u32 offset, u32 location, u32 size_to_check = 0,