mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 12:32:43 +00:00
rsx: Improve ROP output handling
- Perform 8-bit quantization/rounding before emulated operations like ALPHA_TEST
This commit is contained in:
parent
8199f97e7a
commit
e04855a0da
@ -404,7 +404,7 @@ namespace glsl
|
||||
void insert_rop_init(std::ostream& OS)
|
||||
{
|
||||
OS <<
|
||||
" if (_test_bit(rop_control, 9))\n"
|
||||
" if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n"
|
||||
" {\n"
|
||||
" // Convert x,y to linear address\n"
|
||||
" const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n"
|
||||
@ -435,30 +435,31 @@ namespace glsl
|
||||
" {\n"
|
||||
" discard;\n"
|
||||
" }\n"
|
||||
" else if (_get_bits(rop_control, 0, 8) != 0)\n";
|
||||
" else if ((rop_control & ROP_CMD_MASK) != 0)\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS << " if (_get_bits(rop_control, 0, 8) != 0)\n";
|
||||
OS << " if ((rop_control & ROP_CMD_MASK) != 0)\n";
|
||||
}
|
||||
|
||||
OS <<
|
||||
" {\n"
|
||||
" const bool alpha_test = _test_bit(rop_control, 0);\n"
|
||||
" const uint alpha_func = _get_bits(rop_control, 16, 3);\n";
|
||||
" const bool alpha_test = _test_bit(rop_control, ALPHA_TEST_ENABLE_BIT);\n"
|
||||
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n";
|
||||
|
||||
if (!props.fp32_outputs)
|
||||
{
|
||||
OS << " const bool srgb_convert = _test_bit(rop_control, 1);\n\n";
|
||||
OS << " const bool srgb_convert = _test_bit(rop_control, SRGB_FRAMEBUFFER_BIT);\n\n";
|
||||
}
|
||||
|
||||
if (props.emulate_coverage_tests)
|
||||
{
|
||||
OS << " const bool a2c_enabled = _test_bit(rop_control, 4);\n";
|
||||
OS << " const bool a2c_enabled = _test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT);\n";
|
||||
OS << " const bool msaa_write_enabled = _test_bit(rop_control, MSAA_WRITE_ENABLE_BIT);\n";
|
||||
}
|
||||
|
||||
OS <<
|
||||
" if (alpha_test && !comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func))\n"
|
||||
" if (alpha_test && !comparison_passes(ROP_quantize(" << reg0 << ").a, alpha_ref, alpha_func))\n"
|
||||
" {\n"
|
||||
" discard;\n"
|
||||
" }\n";
|
||||
@ -466,7 +467,7 @@ namespace glsl
|
||||
if (props.emulate_coverage_tests)
|
||||
{
|
||||
OS <<
|
||||
" else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n"
|
||||
" else if (a2c_enabled && (!msaa_write_enabled || !coverage_test_passes(" << reg0 << ")))\n"
|
||||
" {\n"
|
||||
" discard;\n"
|
||||
" }\n";
|
||||
@ -480,10 +481,10 @@ namespace glsl
|
||||
OS <<
|
||||
" else if (srgb_convert)\n"
|
||||
" {\n"
|
||||
" " << reg0 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
|
||||
" " << reg1 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
|
||||
" " << reg2 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
|
||||
" " << reg3 << " = round_to_8bit(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
|
||||
" " << reg0 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
|
||||
" " << reg1 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
|
||||
" " << reg2 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
|
||||
" " << reg3 << " = round_srgb8(f16vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
|
||||
" }\n";
|
||||
}
|
||||
else
|
||||
@ -491,10 +492,10 @@ namespace glsl
|
||||
OS <<
|
||||
" else if (srgb_convert)\n"
|
||||
" {\n"
|
||||
" " << reg0 << " = round_to_8bit(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
|
||||
" " << reg1 << " = round_to_8bit(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
|
||||
" " << reg2 << " = round_to_8bit(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
|
||||
" " << reg3 << " = round_to_8bit(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
|
||||
" " << reg0 << " = round_srgb8(vec4(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a));\n"
|
||||
" " << reg1 << " = round_srgb8(vec4(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a));\n"
|
||||
" " << reg2 << " = round_srgb8(vec4(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a));\n"
|
||||
" " << reg3 << " = round_srgb8(vec4(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a));\n"
|
||||
" }\n";
|
||||
}
|
||||
}
|
||||
@ -528,8 +529,37 @@ namespace glsl
|
||||
|
||||
if (props.domain == glsl::program_domain::glsl_fragment_program)
|
||||
{
|
||||
OS << "// Workaround for broken early discard in some drivers\n";
|
||||
OS << "// ROP control\n";
|
||||
OS << "#define ALPHA_TEST_ENABLE_BIT " << rsx::ROP_control_bits::ALPHA_TEST_ENABLE_BIT << "\n";
|
||||
OS << "#define SRGB_FRAMEBUFFER_BIT " << rsx::ROP_control_bits::SRGB_FRAMEBUFFER_BIT << "\n";
|
||||
OS << "#define ALPHA_TO_COVERAGE_ENABLE_BIT " << rsx::ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT << "\n";
|
||||
OS << "#define MSAA_WRITE_ENABLE_BIT " << rsx::ROP_control_bits::MSAA_WRITE_ENABLE_BIT << "\n";
|
||||
OS << "#define INT_FRAMEBUFFER_BIT " << rsx::ROP_control_bits::INT_FRAMEBUFFER_BIT << "\n";
|
||||
OS << "#define POLYGON_STIPPLE_ENABLE_BIT " << rsx::ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT << "\n";
|
||||
OS << "#define ALPHA_TEST_FUNC_OFFSET " << rsx::ROP_control_bits::ALPHA_FUNC_OFFSET << "\n";
|
||||
OS << "#define ALPHA_TEST_FUNC_LENGTH " << rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS << "\n";
|
||||
OS << "#define MSAA_SAMPLE_CTRL_OFFSET " << rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET << "\n";
|
||||
OS << "#define MSAA_SAMPLE_CTRL_LENGTH " << rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS << "\n";
|
||||
OS << "#define ROP_CMD_MASK " << rsx::ROP_control_bits::ROP_CMD_MASK << "\n\n";
|
||||
|
||||
// 8-bit rounding/quantization
|
||||
{
|
||||
const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)";
|
||||
const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)";
|
||||
OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n";
|
||||
}
|
||||
|
||||
if (!props.fp32_outputs && props.srgb_output_rounding)
|
||||
{
|
||||
OS << "#define round_srgb8 round_to_8bit\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
// We can get the 8-bit rounding for free on non-NVIDIA hardware
|
||||
OS << "#define round_srgb8(v4) (v4)\n\n";
|
||||
}
|
||||
|
||||
OS << "// Workaround for broken early discard in some drivers\n";
|
||||
if (props.disable_early_discard)
|
||||
{
|
||||
OS << "bool _fragment_discard = false;\n";
|
||||
@ -540,21 +570,6 @@ namespace glsl
|
||||
OS << "#define _kill() discard\n\n";
|
||||
}
|
||||
|
||||
if (!props.fp32_outputs)
|
||||
{
|
||||
OS << "// Workaround broken output rounding behavior\n";
|
||||
if (props.srgb_output_rounding)
|
||||
{
|
||||
const auto _255 = (props.supports_native_fp16) ? "f16vec4(255.)" : "vec4(255.)";
|
||||
const auto _1_over_2 = (props.supports_native_fp16) ? "f16vec4(0.5)" : "vec4(0.5)";
|
||||
OS << "#define round_to_8bit(v4) (floor(fma(v4, " << _255 << ", " << _1_over_2 << ")) / " << _255 << ")\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS << "#define round_to_8bit(v4) (v4)\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (props.require_texture_ops)
|
||||
{
|
||||
// Declare special texture control flags
|
||||
@ -567,17 +582,32 @@ namespace glsl
|
||||
OS << "#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n";
|
||||
OS << "#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n";
|
||||
|
||||
OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n";
|
||||
OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n";
|
||||
OS << "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n";
|
||||
OS << "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n";
|
||||
OS << "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n";
|
||||
OS << "#define DEPTH_COMPARE " << rsx::texture_control_bits::DEPTH_COMPARE_OP << "\n";
|
||||
OS << "#define FILTERED_MAG_BIT " << rsx::texture_control_bits::FILTERED_MAG << "\n";
|
||||
OS << "#define FILTERED_MIN_BIT " << rsx::texture_control_bits::FILTERED_MIN << "\n";
|
||||
OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n";
|
||||
OS << "#define INT_COORDS_BIT " << rsx::texture_control_bits::UNNORMALIZED_COORDS << "\n";
|
||||
OS << "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n";
|
||||
OS << "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n";
|
||||
OS << "#define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT)\n\n";
|
||||
}
|
||||
|
||||
OS << fmt::replace_all(
|
||||
"$Ty ROP_quantize(const in $Ty v)\n"
|
||||
"{\n"
|
||||
" if (!_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
|
||||
" {\n"
|
||||
" return v;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" return round_to_8bit(v);\n"
|
||||
"}\n",
|
||||
{
|
||||
{ "$Ty"sv, (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4"}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
if (props.require_lit_emulation)
|
||||
@ -667,10 +697,8 @@ namespace glsl
|
||||
{
|
||||
// Purely stochastic
|
||||
OS <<
|
||||
"bool coverage_test_passes(const in vec4 _sample, const in uint control)\n"
|
||||
"bool coverage_test_passes(const in vec4 _sample)\n"
|
||||
"{\n"
|
||||
" if (!_test_bit(control, 0)) return false;\n"
|
||||
"\n"
|
||||
" float random = _rand(gl_FragCoord);\n"
|
||||
" return (_sample.a > random);\n"
|
||||
"}\n\n";
|
||||
|
@ -31,6 +31,46 @@ namespace rsx
|
||||
EXPAND_MASK = (1 << EXPAND_R) | (1 << EXPAND_G) | (1 << EXPAND_B) | (1 << EXPAND_A),
|
||||
EXPAND_OFFSET = EXPAND_A
|
||||
};
|
||||
|
||||
enum ROP_control_bits : u32
|
||||
{
|
||||
// Commands. These trigger explicit action.
|
||||
ALPHA_TEST_ENABLE_BIT = 0,
|
||||
SRGB_FRAMEBUFFER_BIT = 1,
|
||||
ALPHA_TO_COVERAGE_ENABLE_BIT = 2,
|
||||
POLYGON_STIPPLE_ENABLE_BIT = 3,
|
||||
|
||||
// Auxilliary config
|
||||
INT_FRAMEBUFFER_BIT = 16,
|
||||
MSAA_WRITE_ENABLE_BIT = 17,
|
||||
|
||||
// Data
|
||||
ALPHA_FUNC_OFFSET = 18,
|
||||
MSAA_SAMPLE_CTRL_OFFSET = 21,
|
||||
|
||||
// Data lengths
|
||||
ALPHA_FUNC_NUM_BITS = 3,
|
||||
MSAA_SAMPLE_CTRL_NUM_BITS = 2,
|
||||
|
||||
// Meta
|
||||
ROP_CMD_MASK = 0xF // Commands are encoded in the lower 16 bits
|
||||
};
|
||||
|
||||
struct ROP_control_t
|
||||
{
|
||||
u32 value = 0;
|
||||
|
||||
void enable_alpha_test() { value |= (1u << ROP_control_bits::ALPHA_TEST_ENABLE_BIT); }
|
||||
void enable_framebuffer_sRGB() { value |= (1u << ROP_control_bits::SRGB_FRAMEBUFFER_BIT); }
|
||||
void enable_alpha_to_coverage() { value |= (1u << ROP_control_bits::ALPHA_TO_COVERAGE_ENABLE_BIT); }
|
||||
void enable_polygon_stipple() { value |= (1u << ROP_control_bits::POLYGON_STIPPLE_ENABLE_BIT); }
|
||||
|
||||
void enable_framebuffer_INT() { value |= (1u << ROP_control_bits::INT_FRAMEBUFFER_BIT); }
|
||||
void enable_MSAA_writes() { value |= (1u << ROP_control_bits::MSAA_WRITE_ENABLE_BIT); }
|
||||
|
||||
void set_alpha_test_func(uint func) { value |= (func << ROP_control_bits::ALPHA_FUNC_OFFSET); }
|
||||
void set_msaa_control(uint ctrl) { value |= (ctrl << ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET); }
|
||||
};
|
||||
}
|
||||
|
||||
namespace program_common
|
||||
|
@ -1050,18 +1050,18 @@ namespace rsx
|
||||
|
||||
void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/)
|
||||
{
|
||||
u32 rop_control = 0u;
|
||||
ROP_control_t rop_control{};
|
||||
|
||||
if (rsx::method_registers.alpha_test_enabled())
|
||||
{
|
||||
const u32 alpha_func = static_cast<u32>(rsx::method_registers.alpha_func());
|
||||
rop_control |= (alpha_func << 16);
|
||||
rop_control |= ROP_control::alpha_test_enable;
|
||||
rop_control.set_alpha_test_func(alpha_func);
|
||||
rop_control.enable_alpha_test();
|
||||
}
|
||||
|
||||
if (rsx::method_registers.polygon_stipple_enabled())
|
||||
{
|
||||
rop_control |= ROP_control::polygon_stipple_enable;
|
||||
rop_control.enable_polygon_stipple();
|
||||
}
|
||||
|
||||
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && !backend_config.supports_hw_a2c)
|
||||
@ -1070,8 +1070,11 @@ namespace rsx
|
||||
// Alpha values generate a coverage mask for order independent blending
|
||||
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
|
||||
// Simulated using combined alpha blend and alpha test
|
||||
if (rsx::method_registers.msaa_sample_mask()) rop_control |= ROP_control::msaa_mask_enable;
|
||||
rop_control |= ROP_control::csaa_enable;
|
||||
rop_control.enable_alpha_to_coverage();
|
||||
if (rsx::method_registers.msaa_sample_mask())
|
||||
{
|
||||
rop_control.enable_MSAA_writes();
|
||||
}
|
||||
|
||||
// Sample configuration bits
|
||||
switch (rsx::method_registers.surface_antialias())
|
||||
@ -1079,10 +1082,10 @@ namespace rsx
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
rop_control |= 1u << 6;
|
||||
rop_control.set_msaa_control(1u);
|
||||
break;
|
||||
default:
|
||||
rop_control |= 3u << 6;
|
||||
rop_control.set_msaa_control(3u);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1091,19 +1094,24 @@ namespace rsx
|
||||
const f32 fog1 = rsx::method_registers.fog_params_1();
|
||||
const u32 fog_mode = static_cast<u32>(rsx::method_registers.fog_equation());
|
||||
|
||||
if (rsx::method_registers.framebuffer_srgb_enabled())
|
||||
// Check if framebuffer is actually an XRGB format and not a WZYX format
|
||||
switch (rsx::method_registers.surface_color())
|
||||
{
|
||||
// Check if framebuffer is actually an XRGB format and not a WZYX format
|
||||
switch (rsx::method_registers.surface_color())
|
||||
case rsx::surface_color_format::w16z16y16x16:
|
||||
case rsx::surface_color_format::w32z32y32x32:
|
||||
case rsx::surface_color_format::x32:
|
||||
// These behave very differently from "normal" formats.
|
||||
break;
|
||||
default:
|
||||
// Integer framebuffer formats.
|
||||
rop_control.enable_framebuffer_INT();
|
||||
|
||||
// Check if we want sRGB conversion.
|
||||
if (rsx::method_registers.framebuffer_srgb_enabled())
|
||||
{
|
||||
case rsx::surface_color_format::w16z16y16x16:
|
||||
case rsx::surface_color_format::w32z32y32x32:
|
||||
case rsx::surface_color_format::x32:
|
||||
break;
|
||||
default:
|
||||
rop_control |= ROP_control::framebuffer_srgb_enable;
|
||||
break;
|
||||
rop_control.enable_framebuffer_sRGB();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Generate wpos coefficients
|
||||
@ -1120,7 +1128,7 @@ namespace rsx
|
||||
const f32 alpha_ref = rsx::method_registers.alpha_ref();
|
||||
|
||||
u32 *dst = static_cast<u32*>(buffer);
|
||||
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control, std::bit_cast<u32>(alpha_ref));
|
||||
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control.value, std::bit_cast<u32>(alpha_ref));
|
||||
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
|
||||
}
|
||||
|
||||
|
@ -198,17 +198,6 @@ namespace rsx
|
||||
result_zcull_intr = 2
|
||||
};
|
||||
|
||||
enum ROP_control : u32
|
||||
{
|
||||
alpha_test_enable = (1u << 0),
|
||||
framebuffer_srgb_enable = (1u << 1),
|
||||
csaa_enable = (1u << 4),
|
||||
msaa_mask_enable = (1u << 5),
|
||||
msaa_config_mask = (3u << 6),
|
||||
polygon_stipple_enable = (1u << 9),
|
||||
alpha_func_mask = (7u << 16)
|
||||
};
|
||||
|
||||
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
|
||||
|
||||
u32 get_address(u32 offset, u32 location, u32 size_to_check = 0,
|
||||
|
Loading…
x
Reference in New Issue
Block a user