glsl/fp/vp: Avoid shader clutter

- Do not add unused subroutines in shaders unless necessary
-- makes shaders easier to read and disassembled spir-v has less clutter
- glsl: Replace switch block with lookup table
This commit is contained in:
kd-11 2018-01-25 00:09:27 +03:00
parent 2e04dceaf0
commit 33bcdd476c
9 changed files with 192 additions and 123 deletions

View File

@ -40,15 +40,48 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask)
{
if (dst.exp_tex)
{
//If dst.exp_tex really is _bx2 postfix, we need to unpack dynamic range
//Expand [0,1] to [-1, 1]. Confirmed by Castlevania: LOS
AddCode("//exp tex flag is set");
code = "((" + code + "- 0.5) * 2.)";
}
if (dst.saturate)
{
code = saturate(code);
else
code = ClampValue(code, dst.prec);
}
else if (dst.prec)
{
switch (dst.opcode)
{
case RSX_FP_OPCODE_NRM:
case RSX_FP_OPCODE_MAX:
case RSX_FP_OPCODE_MIN:
case RSX_FP_OPCODE_COS:
case RSX_FP_OPCODE_SIN:
case RSX_FP_OPCODE_REFL:
case RSX_FP_OPCODE_EX2:
case RSX_FP_OPCODE_FRC:
case RSX_FP_OPCODE_LIT:
case RSX_FP_OPCODE_LIF:
case RSX_FP_OPCODE_LRP:
case RSX_FP_OPCODE_LG2:
break;
case RSX_FP_OPCODE_MOV:
//NOTE: Sometimes varying inputs from VS are out of range so do not exempt any input types, unless fp16 (Naruto UNS)
if (dst.fp16 && src0.fp16 && src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP)
break;
default:
{
//fp16 precsion flag on f32 register; ignore
if (dst.prec == 1 && !dst.fp16)
break;
//clamp value to allowed range
code = ClampValue(code, dst.prec);
break;
}
}
}
}
code += (append_mask ? "$m" : "");
@ -385,7 +418,10 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
auto &reg = temp_registers[src.tmp_reg_index];
if (reg.requires_gather(xy_read, zw_read))
{
properties.has_gather_op = true;
AddCode(reg.gather_r());
}
}
}
@ -407,7 +443,10 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
switch (dst.src_attr_reg_num)
{
case 0x00: ret += reg_table[0]; break;
case 0x00:
ret += reg_table[0];
properties.has_wpos_input = true;
break;
default:
if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0]))
{
@ -478,24 +517,28 @@ std::string FragmentProgramDecompiler::BuildCode()
//Insert global function definitions
insertGlobalFunctions(OS);
std::string float2 = getFloatTypeName(2);
std::string float4 = getFloatTypeName(4);
//Declare register gather/merge if needed
if (properties.has_gather_op)
{
std::string float2 = getFloatTypeName(2);
std::string float4 = getFloatTypeName(4);
OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
OS << " return " << float4 << "(x, y, z, w);\n";
OS << "}\n\n";
OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
OS << " float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
OS << " float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
OS << " return " << float4 << "(x, y, z, w);\n";
OS << "}\n\n";
OS << float2 << " gather(" << float4 << " _h)\n";
OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
OS << " return " << float2 << "(x, y);\n";
OS << "}\n\n";
OS << float2 << " gather(" << float4 << " _h)\n";
OS << "{\n";
OS << " float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
OS << " float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
OS << " return " << float2 << "(x, y);\n";
OS << "}\n\n";
}
insertMainStart(OS);
OS << main << std::endl;
@ -558,7 +601,10 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode)
case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true;
case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
case RSX_FP_OPCODE_LIT: SetDst("lit_legacy($0)"); return true;
case RSX_FP_OPCODE_LIT:
SetDst("lit_legacy($0)");
properties.has_lit_op = true;
return true;
case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true;
case RSX_FP_OPCODE_LRP: SetDst(getFloatTypeName(4) + "($2 * (1 - $0) + $1 * $0)"); return true;
case RSX_FP_OPCODE_LG2: SetDst("log2(" + NotZeroPositive("$0.x") + ").xxxx"); return true;

View File

@ -219,7 +219,16 @@ protected:
/** insert end of main function (return value, output copy...)
*/
virtual void insertMainEnd(std::stringstream &OS) = 0;
public:
struct
{
bool has_lit_op = false;
bool has_gather_op = false;
bool has_wpos_input = false;
}
properties;
ParamArray m_parr;
FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size);
FragmentProgramDecompiler(const FragmentProgramDecompiler&) = delete;

View File

@ -281,31 +281,18 @@ namespace glsl
OS << " if (desc.attribute_size == 0)\n";
OS << " {\n";
OS << " //default values\n";
OS << " switch (location)\n";
OS << " {\n";
OS << " case 0:\n";
OS << " //position\n";
OS << " return vec4(0., 0., 0., 1.);\n";
OS << " case 1:\n";
OS << " case 2:\n";
OS << " //weight, normals\n";
OS << " return vec4(0.);\n";
OS << " case 3:\n";
OS << " //diffuse\n";
OS << " return vec4(1.);\n";
OS << " case 4:\n";
OS << " //specular\n";
OS << " return vec4(0.);\n";
OS << " case 5:\n";
OS << " //fog\n";
OS << " return vec4(0.);\n";
OS << " case 6:\n";
OS << " //point size\n";
OS << " return vec4(1.);\n";
OS << " default:\n";
OS << " //mostly just texture coordinates\n";
OS << " return vec4(0.);\n";
OS << " }\n";
OS << " const vec4 defaults[] = \n";
OS << " { vec4(0., 0., 0., 1.), //position\n";
OS << " vec4(0.), vec4(0.), //weight, normals\n";
OS << " vec4(1.), //diffuse\n";
OS << " vec4(0.), vec4(0.), //specular, fog\n";
OS << " vec4(1.), //point size\n";
OS << " vec4(0.), //in_7\n";
OS << " //in_tc registers\n";
OS << " vec4(0.), vec4(0.), vec4(0.), vec4(0.),\n";
OS << " vec4(0.), vec4(0.), vec4(0.), vec4(0.)\n";
OS << " };\n";
OS << " return defaults[location];\n";
OS << " }\n\n";
OS << " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n";
OS << " if (desc.frequency == 0)\n";
@ -326,84 +313,95 @@ namespace glsl
OS << "}\n\n";
}
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain)
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false)
{
OS << "vec4 lit_legacy(vec4 val)";
OS << "{\n";
OS << " vec4 clamped_val = val;\n";
OS << " clamped_val.x = max(val.x, 0.);\n";
OS << " clamped_val.y = max(val.y, 0.);\n";
OS << " vec4 result;\n";
OS << " result.x = 1.;\n";
OS << " result.w = 1.;\n";
OS << " result.y = clamped_val.x;\n";
OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n";
OS << " return result;\n";
OS << "}\n\n";
OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n";
OS << "{\n";
OS << " float d = pos.z / pos.w;\n";
OS << " if (d < 0.f && d >= near_plane)\n";
OS << " d = 0.f;\n"; //force clamp negative values
OS << " else if (d > 1.f && d <= far_plane)\n";
OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n";
OS << " else\n";
OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found
OS << "\n";
OS << " pos.z = d * pos.w;\n";
OS << " return pos;\n";
OS << "}\n\n";
if (require_lit_emulation)
{
OS << "vec4 lit_legacy(vec4 val)";
OS << "{\n";
OS << " vec4 clamped_val = val;\n";
OS << " clamped_val.x = max(val.x, 0.);\n";
OS << " clamped_val.y = max(val.y, 0.);\n";
OS << " vec4 result;\n";
OS << " result.x = 1.;\n";
OS << " result.w = 1.;\n";
OS << " result.y = clamped_val.x;\n";
OS << " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n";
OS << " return result;\n";
OS << "}\n\n";
}
if (domain == glsl::program_domain::glsl_vertex_program)
{
OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n";
OS << "{\n";
OS << " float d = pos.z / pos.w;\n";
OS << " if (d < 0.f && d >= near_plane)\n";
OS << " d = 0.f;\n"; //force clamp negative values
OS << " else if (d > 1.f && d <= far_plane)\n";
OS << " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n";
OS << " else\n";
OS << " return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found
OS << "\n";
OS << " pos.z = d * pos.w;\n";
OS << " return pos;\n";
OS << "}\n\n";
return;
}
program_common::insert_compare_op(OS);
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
//The A component (Z) is useless (should contain stencil8 or just 1)
OS << "vec4 decodeLinearDepth(float depth_value)\n";
OS << "{\n";
OS << " uint value = uint(depth_value * 16777215);\n";
OS << " uint b = (value & 0xff);\n";
OS << " uint g = (value >> 8) & 0xff;\n";
OS << " uint r = (value >> 16) & 0xff;\n";
OS << " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n";
OS << "}\n\n";
if (require_depth_conversion)
{
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
//The A component (Z) is useless (should contain stencil8 or just 1)
OS << "vec4 decodeLinearDepth(float depth_value)\n";
OS << "{\n";
OS << " uint value = uint(depth_value * 16777215);\n";
OS << " uint b = (value & 0xff);\n";
OS << " uint g = (value >> 8) & 0xff;\n";
OS << " uint r = (value >> 16) & 0xff;\n";
OS << " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n";
OS << "}\n\n";
OS << "float read_value(vec4 src, uint remap_index)\n";
OS << "{\n";
OS << " switch (remap_index)\n";
OS << " {\n";
OS << " case 0: return src.a;\n";
OS << " case 1: return src.r;\n";
OS << " case 2: return src.g;\n";
OS << " case 3: return src.b;\n";
OS << " }\n";
OS << "}\n\n";
OS << "float read_value(vec4 src, uint remap_index)\n";
OS << "{\n";
OS << " switch (remap_index)\n";
OS << " {\n";
OS << " case 0: return src.a;\n";
OS << " case 1: return src.r;\n";
OS << " case 2: return src.g;\n";
OS << " case 3: return src.b;\n";
OS << " }\n";
OS << "}\n\n";
OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n";
OS << "{\n";
OS << " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n";
OS << " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n";
OS << " if (remap_vector == 0xE4) return result;\n\n";
OS << " vec4 tmp;\n";
OS << " uint remap_a = remap_vector & 0x3;\n";
OS << " uint remap_r = (remap_vector >> 2) & 0x3;\n";
OS << " uint remap_g = (remap_vector >> 4) & 0x3;\n";
OS << " uint remap_b = (remap_vector >> 6) & 0x3;\n";
OS << " tmp.a = read_value(result, remap_a);\n";
OS << " tmp.r = read_value(result, remap_r);\n";
OS << " tmp.g = read_value(result, remap_g);\n";
OS << " tmp.b = read_value(result, remap_b);\n";
OS << " return tmp;\n";
OS << "}\n\n";
OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n";
OS << "{\n";
OS << " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n";
OS << " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n";
OS << " if (remap_vector == 0xE4) return result;\n\n";
OS << " vec4 tmp;\n";
OS << " uint remap_a = remap_vector & 0x3;\n";
OS << " uint remap_r = (remap_vector >> 2) & 0x3;\n";
OS << " uint remap_g = (remap_vector >> 4) & 0x3;\n";
OS << " uint remap_b = (remap_vector >> 6) & 0x3;\n";
OS << " tmp.a = read_value(result, remap_a);\n";
OS << " tmp.r = read_value(result, remap_r);\n";
OS << " tmp.g = read_value(result, remap_g);\n";
OS << " tmp.b = read_value(result, remap_b);\n";
OS << " return tmp;\n";
OS << "}\n\n";
}
OS << "vec4 get_wpos()\n";
OS << "{\n";
OS << " float abs_scale = abs(wpos_scale);\n";
OS << " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n";
OS << "}\n\n";
if (require_wpos)
{
OS << "vec4 get_wpos()\n";
OS << "{\n";
OS << " float abs_scale = abs(wpos_scale);\n";
OS << " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n";
OS << "}\n\n";
}
}
static void insert_fog_declaration(std::ostream& OS)

View File

@ -638,7 +638,10 @@ std::string VertexProgramDecompiler::Decompile()
case RSX_SCA_OPCODE_RSQ: SetDSTSca("1. / sqrt(" + NotZeroPositive("$s.x") +").xxxx"); break;
case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break;
case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break;
case RSX_SCA_OPCODE_LIT: SetDSTSca("lit_legacy($s)"); break;
case RSX_SCA_OPCODE_LIT:
SetDSTSca("lit_legacy($s)");
properties.has_lit_op = true;
break;
case RSX_SCA_OPCODE_BRA:
{
AddCode("$if ($cond) //BRA");

View File

@ -126,7 +126,14 @@ protected:
/** insert end of main function (return value, output copy...)
*/
virtual void insertMainEnd(std::stringstream &OS) = 0;
public:
struct
{
bool has_lit_op = false;
}
properties;
VertexProgramDecompiler(const RSXVertexProgram& prog);
std::string Decompile();
};

View File

@ -194,7 +194,7 @@ namespace
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program);
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
}
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -248,8 +248,11 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
}
}
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
OS << " vec4 wpos = get_wpos();\n";
if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa"))
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
if (properties.has_wpos_input)
OS << " vec4 wpos = get_wpos();\n";
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{

View File

@ -149,7 +149,7 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);
std::string parameters = "";

View File

@ -197,7 +197,7 @@ namespace vk
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program);
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
}
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -251,8 +251,11 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
}
}
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
OS << " vec4 wpos = get_wpos();\n";
if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa"))
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
if (properties.has_wpos_input)
OS << " vec4 wpos = get_wpos();\n";
bool two_sided_enabled = m_prog.front_back_color_enabled && (m_prog.back_color_diffuse_output || m_prog.back_color_specular_output);

View File

@ -185,7 +185,7 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
std::string parameters = "";