rsx: Implement proper decoding for some obscure fragment instructions

PK4UBG and UP4UBG were dropped from the NV_fragment_program spec in 2002.
Not much information about them remains but seems pretty straightforward.
This commit is contained in:
kd-11 2021-06-05 02:40:39 +03:00 committed by kd-11
parent 11ab9b7fa9
commit 39815801aa
6 changed files with 47 additions and 8 deletions

View File

@ -207,6 +207,8 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_texture_expand = properties.has_exp_tex_op;
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.emulate_coverage_tests = true; // g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;

View File

@ -830,6 +830,26 @@ std::string FragmentProgramDecompiler::BuildCode()
"#define _builtin_rsq(x) (1. / _builtin_sqrt(x))\n"
"#define _builtin_div(x, y) (x / y)\n\n";
if (properties.has_pkg)
{
OS <<
"vec4 _builtin_pkg(const in vec4 value)\n"
"{\n"
" vec4 convert = linear_to_srgb(value);\n"
" return uintBitsToFloat(packUnorm4x8(convert)).xxxx;\n"
"}\n\n";
}
if (properties.has_upg)
{
OS <<
"vec4 _builtin_upg(const in float value)\n"
"{\n"
" vec4 raw = unpackUnorm4x8(floatBitsToUint(value));\n"
" return srgb_to_linear(raw);\n"
"}\n\n";
}
if (properties.has_divsq)
{
// Define RSX-compliant DIVSQ
@ -954,6 +974,9 @@ bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode)
case RSX_FP_OPCODE_PK16: SetDst(getFloatTypeName(4) + "(uintBitsToFloat(packUnorm2x16($0.xy)))"); return true;
case RSX_FP_OPCODE_PKG:
// Should be similar to PKB but with gamma correction, see description of PK4UBG in khronos page
properties.has_pkg = true;
SetDst("_builtin_pkg($0)");
return true;
case RSX_FP_OPCODE_PKB: SetDst(getFloatTypeName(4) + "(uintBitsToFloat(packUnorm4x8($0)))"); return true;
case RSX_FP_OPCODE_SIN: SetDst("sin($0.xxxx)"); return true;
}
@ -1120,7 +1143,10 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
case RSX_FP_OPCODE_UP4: SetDst("unpackSnorm4x8(floatBitsToUint($0.x))"); return true;
case RSX_FP_OPCODE_UP16: SetDst("unpackUnorm2x16(floatBitsToUint($0.x)).xyxy"); return true;
case RSX_FP_OPCODE_UPG:
// Same as UPB with gamma correction
// Same as UPB with gamma correction
properties.has_upg = true;
SetDst("_builtin_upg($0.x)");
return true;
case RSX_FP_OPCODE_UPB: SetDst("(unpackUnorm4x8(floatBitsToUint($0.x)))"); return true;
}
return false;

View File

@ -285,6 +285,8 @@ public:
bool has_clamp = false;
bool has_w_access = false;
bool has_exp_tex_op = false;
bool has_pkg = false;
bool has_upg = false;
}
properties;

View File

@ -673,7 +673,7 @@ namespace glsl
"}\n\n";
}
if (!props.fp32_outputs)
if (!props.fp32_outputs || props.require_linear_to_srgb)
{
OS <<
"vec4 linear_to_srgb(const in vec4 cl)\n"
@ -685,6 +685,17 @@ namespace glsl
"}\n\n";
}
if (props.require_texture_ops || props.require_srgb_to_linear)
{
OS <<
"vec4 srgb_to_linear(const in vec4 cs)\n"
"{\n"
" vec4 a = cs / 12.92;\n"
" vec4 b = pow((cs + 0.055) / 1.055, vec4(2.4));\n"
" return _select(a, b, greaterThan(cs, vec4(0.04045)));\n"
"}\n\n";
}
if (props.require_depth_conversion)
{
ensure(props.require_texture_ops);
@ -763,12 +774,6 @@ namespace glsl
" return mix(direct, indexed, choice);\n"
"}\n\n"
#endif
"vec4 srgb_to_linear(const in vec4 cs)\n"
"{\n"
" vec4 a = cs / 12.92;\n"
" vec4 b = pow((cs + 0.055) / 1.055, vec4(2.4));\n"
" return _select(a, b, greaterThan(cs, vec4(0.04045)));\n"
"}\n\n"
//TODO: Move all the texture read control operations here
"vec4 process_texel(in vec4 rgba, const in uint control_bits)\n"

View File

@ -28,6 +28,8 @@ namespace glsl
bool require_texture_ops;
bool require_shadow_ops;
bool require_texture_expand;
bool require_srgb_to_linear;
bool require_linear_to_srgb;
bool emulate_coverage_tests;
bool emulate_shadow_compare;
bool emulate_zclip_transform;

View File

@ -244,6 +244,8 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_texture_ops = properties.has_tex_op;
m_shader_props.require_shadow_ops = properties.shadow_sampler_mask != 0;
m_shader_props.require_texture_expand = properties.has_exp_tex_op;
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA;