From eed32cf3a46138b3e2b65f3f79909c2dabe017ed Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 25 Aug 2019 17:23:46 +0300 Subject: [PATCH] rsx: Decompiler fixups and improvements - Fix 2D coordinate sampling of W coordinate. W is actually HPOS.w and not 1. Z is however always 0. - Optimize register usage a bit Disassembling compiled SPV shows that global declaration results in less ops than using inout modifiers. Modifiers generate extra mov instructions. --- .../RSX/Common/FragmentProgramDecompiler.cpp | 28 ++++-- .../RSX/Common/FragmentProgramDecompiler.h | 1 + rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 90 +++++++++---------- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 61 ++++++------- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 84 ++++++++--------- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 60 ++++++------- 6 files changed, 160 insertions(+), 164 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index c9ff52bc08..8c63d8bdd2 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -524,7 +524,7 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) // NOTE: Hw testing showed the following: // 1. Reading from registers 1 and 2 (COL0 and COL1) is clamped to (0, 1) // 2. Reading from registers 4-12 (inclusive) is not clamped, but.. - // 3. If the texcoord control mask is enabled, the last 2 values are always 0 and 1! + // 3. If the texcoord control mask is enabled, the last 2 values are always 0 and hpos.w! const std::string reg_var = (dst.src_attr_reg_num < std::size(reg_table))? reg_table[dst.src_attr_reg_num] : "unk"; bool insert = true; @@ -541,15 +541,30 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) case 0x02: { // COL0, COL1 - ret += "_saturate(" + reg_var + ")"; - apply_precision_modifier = false; + if (!src2.use_index_reg) + { + ret += "_saturate(" + reg_var + ")"; + apply_precision_modifier = false; + } + else + { + // Raw access + ret += reg_var; + } break; } case 0x03: { // FOGC - // TODO: Confirm if precision modifiers affect this one - ret += reg_var; + if (!src2.use_index_reg) + { + ret += reg_var; + } + else + { + // Raw access + ret += "fog_c"; + } break; } case 0x4: @@ -567,7 +582,8 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) // Texcoord mask seems to reset the last 2 arguments to 0 and 1 if set if (m_prog.texcoord_is_2d(dst.src_attr_reg_num - 4)) { - ret += getFloatTypeName(4) + "(" + reg_var + ".x, " + reg_var + ".y, 0., 1.)"; + ret += getFloatTypeName(4) + "(" + reg_var + ".x, " + reg_var + ".y, 0., in_w)"; + properties.has_w_access = true; } else { diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h index 3a87cf3add..6e266352be 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h @@ -281,6 +281,7 @@ public: bool has_tex_op = false; bool has_divsq = false; bool has_clamp = false; + bool has_w_access = false; } properties; diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 5beca7413b..29a33c982b 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -202,34 +202,55 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) if (properties.in_register_mask & in_fogc) glsl::insert_fog_declaration(OS); - const std::set output_values = + std::set output_registers; + if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) { - "r0", "r1", "r2", "r3", "r4", - "h0", "h2", "h4", "h6", "h8" - }; - - std::string parameters; - const auto half4 = getHalfTypeName(4); - for (auto ®_name : output_values) + output_registers = { "r0", "r2", "r3", "r4" }; + } + else { - const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; - if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) - { - if (parameters.length()) - parameters += ", "; - - parameters += "inout " + type + " " + reg_name; - } + output_registers = { "h0", "h4", "h6", "h8" }; } - OS << "void fs_main(" << parameters << ")\n"; + if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + { + output_registers.insert("r1"); + } + + std::string registers; + std::string reg_type; + const auto half4 = getHalfTypeName(4); + for (auto ®_name : output_registers) + { + const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; + if (LIKELY(reg_type == type)) + { + registers += ", " + reg_name + " = " + type + "(0.)"; + } + else + { + if (!registers.empty()) + registers += ";\n"; + + registers += type + " " + reg_name + " = " + type + "(0.)"; + } + + reg_type = type; + } + + if (!registers.empty()) + { + OS << registers << ";\n"; + } + + OS << "void fs_main()\n"; OS << "{\n"; for (const ParamType& PT : m_parr.params[PF_PARAM_NONE]) { - for (const ParamItem& PI : PT.items) + for (const auto& PI : PT.items) { - if (output_values.find(PI.name) != output_values.end()) + if (output_registers.find(PI.name) != output_registers.end()) continue; OS << " " << PT.type << " " << PI.name; @@ -240,11 +261,8 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) } } - if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa")) - OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; - - if (properties.in_register_mask & in_wpos) - OS << " vec4 wpos = get_wpos();\n"; + if (properties.has_w_access) + OS << " float in_w = (1. / gl_FragCoord.w);\n"; if (properties.in_register_mask & in_ssa) OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; @@ -267,34 +285,12 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) { - const std::set output_values = - { - "r0", "r1", "r2", "r3", "r4", - "h0", "h2", "h4", "h6", "h8" - }; - OS << "}\n\n"; OS << "void main()\n"; OS << "{\n"; - std::string parameters; - const auto half4 = getHalfTypeName(4); - - for (auto ®_name : output_values) - { - const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; - if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) - { - if (parameters.length()) - parameters += ", "; - - parameters += reg_name; - OS << " " << type << " " << reg_name << " = " << type << "(0.);\n"; - } - } - - OS << "\n" << " fs_main(" + parameters + ");\n\n"; + OS << "\n" << " fs_main();\n\n"; glsl::insert_rop( OS, diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 1d841b084d..729f859033 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -129,20 +129,37 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) insert_glsl_legacy_function(OS, properties2); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false); - std::string parameters; - for (int i = 0; i < 16; ++i) + // Declare global registers with optional initialization + std::string registers; + if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4")) { - std::string reg_name = "dst_reg" + std::to_string(i); - if (m_parr.HasParam(PF_PARAM_OUT, "vec4", reg_name)) + for (auto &PI : vec4Types->items) { - if (parameters.length()) - parameters += ", "; + if (registers.length()) + registers += ", "; + else + registers = "vec4 "; - parameters += "inout vec4 " + reg_name; + registers += PI.name; + + if (!PI.value.empty()) + { + printf("Value=%s\n", PI.value.c_str()); + // Simplify default initialization + if (PI.value == "vec4(0.0, 0.0, 0.0, 0.0)") + registers += " = vec4(0.)"; + else + registers += " = " + PI.value; + } } } - OS << "void vs_main(" << parameters << ")\n"; + if (!registers.empty()) + { + OS << registers << ";\n"; + } + + OS << "void vs_main()\n"; OS << "{\n"; //Declare temporary registers, ignoring those mapped to outputs @@ -177,33 +194,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "void main ()\n"; OS << "{\n"; - std::string parameters; - - if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4")) - { - for (int i = 0; i < 16; ++i) - { - std::string reg_name = "dst_reg" + std::to_string(i); - for (auto &PI : vec4Types->items) - { - if (reg_name == PI.name) - { - if (parameters.length()) - parameters += ", "; - - parameters += reg_name; - OS << " vec4 " << reg_name; - - if (!PI.value.empty()) - OS << "= " << PI.value; - - OS << ";\n"; - } - } - } - } - - OS << "\n" << " vs_main(" << parameters << ");\n\n"; + OS << "\n" << " vs_main();\n\n"; for (auto &i : reg_table) { diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index c320c8e734..565969d20b 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -246,34 +246,55 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) if (properties.in_register_mask & in_fogc) glsl::insert_fog_declaration(OS); - const std::set output_values = + std::set output_registers; + if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) { - "r0", "r1", "r2", "r3", "r4", - "h0", "h2", "h4", "h6", "h8" - }; - - std::string parameters; - const auto half4 = getHalfTypeName(4); - for (auto ®_name : output_values) + output_registers = { "r0", "r2", "r3", "r4" }; + } + else { - const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; - if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) - { - if (parameters.length()) - parameters += ", "; - - parameters += "inout " + type + " " + reg_name; - } + output_registers = { "h0", "h4", "h6", "h8" }; } - OS << "void fs_main(" << parameters << ")\n"; + if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) + { + output_registers.insert("r1"); + } + + std::string registers; + std::string reg_type; + const auto half4 = getHalfTypeName(4); + for (auto ®_name : output_registers) + { + const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; + if (LIKELY(reg_type == type)) + { + registers += ", " + reg_name + " = " + type + "(0.)"; + } + else + { + if (!registers.empty()) + registers += ";\n"; + + registers += type + " " + reg_name + " = " + type + "(0.)"; + } + + reg_type = type; + } + + if (!registers.empty()) + { + OS << registers << ";\n"; + } + + OS << "void fs_main()\n"; OS << "{\n"; for (const ParamType& PT : m_parr.params[PF_PARAM_NONE]) { for (const ParamItem& PI : PT.items) { - if (output_values.find(PI.name) != output_values.end()) + if (output_registers.find(PI.name) != output_registers.end()) continue; OS << " " << PT.type << " " << PI.name; @@ -284,6 +305,9 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) } } + if (properties.has_w_access) + OS << " float in_w = (1. / gl_FragCoord.w);\n"; + if (properties.in_register_mask & in_ssa) OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; @@ -305,34 +329,12 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) { - const std::set output_values = - { - "r0", "r1", "r2", "r3", "r4", - "h0", "h2", "h4", "h6", "h8" - }; - OS << "}\n\n"; OS << "void main()\n"; OS << "{\n"; - std::string parameters; - const auto half4 = getHalfTypeName(4); - - for (auto ®_name : output_values) - { - const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; - if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) - { - if (parameters.length()) - parameters += ", "; - - parameters += reg_name; - OS << " " << type << " " << reg_name << " = " << type << "(0.);\n"; - } - } - - OS << "\n" << " fs_main(" + parameters + ");\n\n"; + OS << "\n" << " fs_main();\n\n"; glsl::insert_rop( OS, diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index c7bd2427b1..a9395f227b 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -177,20 +177,36 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_spirv); - std::string parameters; - for (int i = 0; i < 16; ++i) + // Declare global registers with optional initialization + std::string registers; + if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4")) { - std::string reg_name = "dst_reg" + std::to_string(i); - if (m_parr.HasParam(PF_PARAM_OUT, "vec4", reg_name)) + for (auto &PI : vec4Types->items) { - if (parameters.length()) - parameters += ", "; + if (registers.length()) + registers += ", "; + else + registers = "vec4 "; - parameters += "inout vec4 " + reg_name; + registers += PI.name; + + if (!PI.value.empty()) + { + // Simplify default initialization + if (PI.value == "vec4(0.0, 0.0, 0.0, 0.0)") + registers += " = vec4(0.)"; + else + registers += " = " + PI.value; + } } } - OS << "void vs_main(" << parameters << ")\n"; + if (!registers.empty()) + { + OS << registers << ";\n"; + } + + OS << "void vs_main()\n"; OS << "{\n"; //Declare temporary registers, ignoring those mapped to outputs @@ -225,33 +241,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "void main ()\n"; OS << "{\n"; - std::string parameters; - - if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4")) - { - for (int i = 0; i < 16; ++i) - { - std::string reg_name = "dst_reg" + std::to_string(i); - for (auto &PI : vec4Types->items) - { - if (reg_name == PI.name) - { - if (parameters.length()) - parameters += ", "; - - parameters += reg_name; - OS << " vec4 " << reg_name; - - if (!PI.value.empty()) - OS << "= " << PI.value; - - OS << ";\n"; - } - } - } - } - - OS << "\n" << " vs_main(" << parameters << ");\n\n"; + OS << "\n" << " vs_main();\n\n"; for (auto &i : reg_table) {