glsl/fp/vp: Avoid shader clutter

- Do not add unused subroutines in shaders unless necessary -- makes shaders easier to read and disassembled spir-v has less clutter - glsl: Replace switch block with lookup table
2025-03-14 10:21:21 +00:00 · 2018-01-25 00:09:27 +03:00 · 2018-01-25 00:09:27 +03:00 · 33bcdd476c
commit 33bcdd476c
parent 2e04dceaf0
9 changed files with 192 additions and 123 deletions
--- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp
+++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp
@ -40,15 +40,48 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask)
 	{
 		if (dst.exp_tex)
 		{
-			//If dst.exp_tex really is _bx2 postfix, we need to unpack dynamic range
+			//Expand [0,1] to [-1, 1]. Confirmed by Castlevania: LOS
 			AddCode("//exp tex flag is set");
 			code = "((" + code + "- 0.5) * 2.)";
 		}

 		if (dst.saturate)
+		{
 			code = saturate(code);
-		else
-			code = ClampValue(code, dst.prec);
+		}
+		else if (dst.prec)
+		{
+			switch (dst.opcode)
+			{
+			case RSX_FP_OPCODE_NRM:
+			case RSX_FP_OPCODE_MAX:
+			case RSX_FP_OPCODE_MIN:
+			case RSX_FP_OPCODE_COS:
+			case RSX_FP_OPCODE_SIN:
+			case RSX_FP_OPCODE_REFL:
+			case RSX_FP_OPCODE_EX2:
+			case RSX_FP_OPCODE_FRC:
+			case RSX_FP_OPCODE_LIT:
+			case RSX_FP_OPCODE_LIF:
+			case RSX_FP_OPCODE_LRP:
+			case RSX_FP_OPCODE_LG2:
+				break;
+			case RSX_FP_OPCODE_MOV:
+				//NOTE: Sometimes varying inputs from VS are out of range so do not exempt any input types, unless fp16 (Naruto UNS)
+				if (dst.fp16 && src0.fp16 && src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP)
+					break;
+			default:
+			{
+				//fp16 precsion flag on f32 register; ignore
+				if (dst.prec == 1 && !dst.fp16)
+					break;
+
+				//clamp value to allowed range
+				code = ClampValue(code, dst.prec);
+				break;
+			}
+			}
+		}
 	}

 	code += (append_mask ? "$m" : "");
@ -385,7 +418,10 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)

 				auto &reg = temp_registers[src.tmp_reg_index];
 				if (reg.requires_gather(xy_read, zw_read))
+				{
+					properties.has_gather_op = true;
 					AddCode(reg.gather_r());
+				}
 			}
 		}

@ -407,7 +443,10 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)

 		switch (dst.src_attr_reg_num)
 		{
-		case 0x00: ret += reg_table[0]; break;
+		case 0x00:
+			ret += reg_table[0];
+			properties.has_wpos_input = true;
+			break;
 		default:
 			if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0]))
 			{
@ -478,24 +517,28 @@ std::string FragmentProgramDecompiler::BuildCode()
 	//Insert global function definitions
 	insertGlobalFunctions(OS);

-	std::string float2 = getFloatTypeName(2);
-	std::string float4 = getFloatTypeName(4);
+	//Declare register gather/merge if needed
+	if (properties.has_gather_op)
+	{
+		std::string float2 = getFloatTypeName(2);
+		std::string float4 = getFloatTypeName(4);

-	OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
-	OS << "{\n";
-	OS << "	float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
-	OS << "	float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
-	OS << "	float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
-	OS << "	float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
-	OS << "	return " << float4 << "(x, y, z, w);\n";
-	OS << "}\n\n";
+		OS << float4 << " gather(" << float4 << " _h0, " << float4 << " _h1)\n";
+		OS << "{\n";
+		OS << "	float x = uintBitsToFloat(packHalf2x16(_h0.xy));\n";
+		OS << "	float y = uintBitsToFloat(packHalf2x16(_h0.zw));\n";
+		OS << "	float z = uintBitsToFloat(packHalf2x16(_h1.xy));\n";
+		OS << "	float w = uintBitsToFloat(packHalf2x16(_h1.zw));\n";
+		OS << "	return " << float4 << "(x, y, z, w);\n";
+		OS << "}\n\n";

-	OS << float2 << " gather(" << float4 << " _h)\n";
-	OS << "{\n";
-	OS << "	float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
-	OS << "	float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
-	OS << "	return " << float2 << "(x, y);\n";
-	OS << "}\n\n";
+		OS << float2 << " gather(" << float4 << " _h)\n";
+		OS << "{\n";
+		OS << "	float x = uintBitsToFloat(packHalf2x16(_h.xy));\n";
+		OS << "	float y = uintBitsToFloat(packHalf2x16(_h.zw));\n";
+		OS << "	return " << float2 << "(x, y);\n";
+		OS << "}\n\n";
+	}

 	insertMainStart(OS);
 	OS << main << std::endl;
@ -558,7 +601,10 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode)
 	case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true;
 	case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
 	case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
-	case RSX_FP_OPCODE_LIT: SetDst("lit_legacy($0)"); return true;
+	case RSX_FP_OPCODE_LIT:
+		SetDst("lit_legacy($0)");
+		properties.has_lit_op = true;
+		return true;
 	case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true;
 	case RSX_FP_OPCODE_LRP: SetDst(getFloatTypeName(4) + "($2 * (1 - $0) + $1 * $0)"); return true;
 	case RSX_FP_OPCODE_LG2: SetDst("log2(" + NotZeroPositive("$0.x") + ").xxxx"); return true;
--- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h
+++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h
@ -219,7 +219,16 @@ protected:
 	/** insert end of main function (return value, output copy...)
 	 */
 	virtual void insertMainEnd(std::stringstream &OS) = 0;
+
 public:
+	struct
+	{
+		bool has_lit_op = false;
+		bool has_gather_op = false;
+		bool has_wpos_input = false;
+	}
+	properties;
+
 	ParamArray m_parr;
 	FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size);
 	FragmentProgramDecompiler(const FragmentProgramDecompiler&) = delete;
--- a/rpcs3/Emu/RSX/Common/GLSLCommon.h
+++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h
@ -281,31 +281,18 @@ namespace glsl
 		OS << "	if (desc.attribute_size == 0)\n";
 		OS << "	{\n";
 		OS << "		//default values\n";
-		OS << "		switch (location)\n";
-		OS << "		{\n";
-		OS << "		case 0:\n";
-		OS << "			//position\n";
-		OS << "			return vec4(0., 0., 0., 1.);\n";
-		OS << "		case 1:\n";
-		OS << "		case 2:\n";
-		OS << "			//weight, normals\n";
-		OS << "			return vec4(0.);\n";
-		OS << "		case 3:\n";
-		OS << "			//diffuse\n";
-		OS << "			return vec4(1.);\n";
-		OS << "		case 4:\n";
-		OS << "			//specular\n";
-		OS << "			return vec4(0.);\n";
-		OS << "		case 5:\n";
-		OS << "			//fog\n";
-		OS << "			return vec4(0.);\n";
-		OS << "		case 6:\n";
-		OS << "			//point size\n";
-		OS << "			return vec4(1.);\n";
-		OS << "		default:\n";
-		OS << "			//mostly just texture coordinates\n";
-		OS << "			return vec4(0.);\n";
-		OS << "		}\n";
+		OS << "		const vec4 defaults[] = \n";
+		OS << "		{	vec4(0., 0., 0., 1.), //position\n";
+		OS << "			vec4(0.), vec4(0.), //weight, normals\n";
+		OS << "			vec4(1.), //diffuse\n";
+		OS << "			vec4(0.), vec4(0.), //specular, fog\n";
+		OS << "			vec4(1.), //point size\n";
+		OS << "			vec4(0.), //in_7\n";
+		OS << "			//in_tc registers\n";
+		OS << "			vec4(0.), vec4(0.), vec4(0.), vec4(0.),\n";
+		OS << "			vec4(0.), vec4(0.), vec4(0.), vec4(0.)\n";
+		OS << "		};\n";
+		OS << "		return defaults[location];\n";
 		OS << "	}\n\n";
 		OS << "	int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n";
 		OS << "	if (desc.frequency == 0)\n";
@ -326,84 +313,95 @@ namespace glsl
 		OS << "}\n\n";
 	}

-	static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain)
+	static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false)
 	{
-		OS << "vec4 lit_legacy(vec4 val)";
-		OS << "{\n";
-		OS << "	vec4 clamped_val = val;\n";
-		OS << "	clamped_val.x = max(val.x, 0.);\n";
-		OS << "	clamped_val.y = max(val.y, 0.);\n";
-		OS << "	vec4 result;\n";
-		OS << "	result.x = 1.;\n";
-		OS << "	result.w = 1.;\n";
-		OS << "	result.y = clamped_val.x;\n";
-		OS << "	result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n";
-		OS << "	return result;\n";
-		OS << "}\n\n";
-
-		OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n";
-		OS << "{\n";
-		OS << "	float d = pos.z / pos.w;\n";
-		OS << "	if (d < 0.f && d >= near_plane)\n";
-		OS << "		d = 0.f;\n"; //force clamp negative values
-		OS << "	else if (d > 1.f && d <= far_plane)\n";
-		OS << "		d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n";
-		OS << "	else\n";
-		OS << "		return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found
-		OS << "\n";
-		OS << "	pos.z = d * pos.w;\n";
-		OS << "	return pos;\n";
-		OS << "}\n\n";
+		if (require_lit_emulation)
+		{
+			OS << "vec4 lit_legacy(vec4 val)";
+			OS << "{\n";
+			OS << "	vec4 clamped_val = val;\n";
+			OS << "	clamped_val.x = max(val.x, 0.);\n";
+			OS << "	clamped_val.y = max(val.y, 0.);\n";
+			OS << "	vec4 result;\n";
+			OS << "	result.x = 1.;\n";
+			OS << "	result.w = 1.;\n";
+			OS << "	result.y = clamped_val.x;\n";
+			OS << "	result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n";
+			OS << "	return result;\n";
+			OS << "}\n\n";
+		}

 		if (domain == glsl::program_domain::glsl_vertex_program)
+		{
+			OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n";
+			OS << "{\n";
+			OS << "	float d = pos.z / pos.w;\n";
+			OS << "	if (d < 0.f && d >= near_plane)\n";
+			OS << "		d = 0.f;\n"; //force clamp negative values
+			OS << "	else if (d > 1.f && d <= far_plane)\n";
+			OS << "		d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n";
+			OS << "	else\n";
+			OS << "		return pos; //d = (0.99 * d);\n"; //range compression for normal values is disabled until a solution to ops comparing z is found
+			OS << "\n";
+			OS << "	pos.z = d * pos.w;\n";
+			OS << "	return pos;\n";
+			OS << "}\n\n";
+
 			return;
+		}

 		program_common::insert_compare_op(OS);

-		//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
-		//The A component (Z) is useless (should contain stencil8 or just 1)
-		OS << "vec4 decodeLinearDepth(float depth_value)\n";
-		OS << "{\n";
-		OS << "	uint value = uint(depth_value * 16777215);\n";
-		OS << "	uint b = (value & 0xff);\n";
-		OS << "	uint g = (value >> 8) & 0xff;\n";
-		OS << "	uint r = (value >> 16) & 0xff;\n";
-		OS << "	return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n";
-		OS << "}\n\n";
+		if (require_depth_conversion)
+		{
+			//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
+			//The A component (Z) is useless (should contain stencil8 or just 1)
+			OS << "vec4 decodeLinearDepth(float depth_value)\n";
+			OS << "{\n";
+			OS << "	uint value = uint(depth_value * 16777215);\n";
+			OS << "	uint b = (value & 0xff);\n";
+			OS << "	uint g = (value >> 8) & 0xff;\n";
+			OS << "	uint r = (value >> 16) & 0xff;\n";
+			OS << "	return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n";
+			OS << "}\n\n";

-		OS << "float read_value(vec4 src, uint remap_index)\n";
-		OS << "{\n";
-		OS << "	switch (remap_index)\n";
-		OS << "	{\n";
-		OS << "		case 0: return src.a;\n";
-		OS << "		case 1: return src.r;\n";
-		OS << "		case 2: return src.g;\n";
-		OS << "		case 3: return src.b;\n";
-		OS << "	}\n";
-		OS << "}\n\n";
+			OS << "float read_value(vec4 src, uint remap_index)\n";
+			OS << "{\n";
+			OS << "	switch (remap_index)\n";
+			OS << "	{\n";
+			OS << "		case 0: return src.a;\n";
+			OS << "		case 1: return src.r;\n";
+			OS << "		case 2: return src.g;\n";
+			OS << "		case 3: return src.b;\n";
+			OS << "	}\n";
+			OS << "}\n\n";

-		OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n";
-		OS << "{\n";
-		OS << "	vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n";
-		OS << "	uint remap_vector = floatBitsToUint(remap) & 0xFF;\n";
-		OS << "	if (remap_vector == 0xE4) return result;\n\n";
-		OS << "	vec4 tmp;\n";
-		OS << "	uint remap_a = remap_vector & 0x3;\n";
-		OS << "	uint remap_r = (remap_vector >> 2) & 0x3;\n";
-		OS << "	uint remap_g = (remap_vector >> 4) & 0x3;\n";
-		OS << "	uint remap_b = (remap_vector >> 6) & 0x3;\n";
-		OS << "	tmp.a = read_value(result, remap_a);\n";
-		OS << "	tmp.r = read_value(result, remap_r);\n";
-		OS << "	tmp.g = read_value(result, remap_g);\n";
-		OS << "	tmp.b = read_value(result, remap_b);\n";
-		OS << "	return tmp;\n";
-		OS << "}\n\n";
+			OS << "vec4 texture2DReconstruct(sampler2D tex, vec2 coord, float remap)\n";
+			OS << "{\n";
+			OS << "	vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n";
+			OS << "	uint remap_vector = floatBitsToUint(remap) & 0xFF;\n";
+			OS << "	if (remap_vector == 0xE4) return result;\n\n";
+			OS << "	vec4 tmp;\n";
+			OS << "	uint remap_a = remap_vector & 0x3;\n";
+			OS << "	uint remap_r = (remap_vector >> 2) & 0x3;\n";
+			OS << "	uint remap_g = (remap_vector >> 4) & 0x3;\n";
+			OS << "	uint remap_b = (remap_vector >> 6) & 0x3;\n";
+			OS << "	tmp.a = read_value(result, remap_a);\n";
+			OS << "	tmp.r = read_value(result, remap_r);\n";
+			OS << "	tmp.g = read_value(result, remap_g);\n";
+			OS << "	tmp.b = read_value(result, remap_b);\n";
+			OS << "	return tmp;\n";
+			OS << "}\n\n";
+		}

-		OS << "vec4 get_wpos()\n";
-		OS << "{\n";
-		OS << "	float abs_scale = abs(wpos_scale);\n";
-		OS << "	return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n";
-		OS << "}\n\n";
+		if (require_wpos)
+		{
+			OS << "vec4 get_wpos()\n";
+			OS << "{\n";
+			OS << "	float abs_scale = abs(wpos_scale);\n";
+			OS << "	return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n";
+			OS << "}\n\n";
+		}
 	}

 	static void insert_fog_declaration(std::ostream& OS)
--- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp
+++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp
@ -638,7 +638,10 @@ std::string VertexProgramDecompiler::Decompile()
 		case RSX_SCA_OPCODE_RSQ: SetDSTSca("1. / sqrt(" + NotZeroPositive("$s.x") +").xxxx"); break;
 		case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break;
 		case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break;
-		case RSX_SCA_OPCODE_LIT: SetDSTSca("lit_legacy($s)"); break;
+		case RSX_SCA_OPCODE_LIT:
+			SetDSTSca("lit_legacy($s)");
+			properties.has_lit_op = true;
+			break;
 		case RSX_SCA_OPCODE_BRA:
 		{
 			AddCode("$if ($cond) //BRA");
--- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h
+++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h
@ -126,7 +126,14 @@ protected:
 	/** insert end of main function (return value, output copy...)
 	*/
 	virtual void insertMainEnd(std::stringstream &OS) = 0;
+
 public:
+	struct
+	{
+		bool has_lit_op = false;
+	}
+	properties;
+
 	VertexProgramDecompiler(const RSXVertexProgram& prog);
 	std::string Decompile();
 };
--- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp
+++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp
@ -194,7 +194,7 @@ namespace

 void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
 {
-	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program);
+	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
 }

 void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -248,8 +248,11 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
 		}
 	}

-	OS << "	vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
-	OS << "	vec4 wpos = get_wpos();\n";
+	if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa"))
+		OS << "	vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
+
+	if (properties.has_wpos_input)
+		OS << "	vec4 wpos = get_wpos();\n";

 	for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
 	{
--- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
+++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
@ -149,7 +149,7 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::

 void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
 {
-	insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
+	insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
 	glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);

 	std::string parameters = "";
--- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp
+++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp
@ -197,7 +197,7 @@ namespace vk

 void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
 {
-	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program);
+	glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, m_prog.redirected_textures != 0, properties.has_wpos_input);
 }

 void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
@ -251,8 +251,11 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
 		}
 	}

-	OS << "	vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
-	OS << "	vec4 wpos = get_wpos();\n";
+	if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa"))
+		OS << "	vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
+
+	if (properties.has_wpos_input)
+		OS << "	vec4 wpos = get_wpos();\n";

 	bool two_sided_enabled = m_prog.front_back_color_enabled && (m_prog.back_color_diffuse_output || m_prog.back_color_specular_output);

--- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp
+++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp
@ -185,7 +185,7 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::

 void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
 {
-	glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
+	glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
 	glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);

 	std::string parameters = "";