rsx: Decompiler fixups and improvements

- Fix 2D coordinate sampling of W coordinate.
  W is actually HPOS.w and not 1. Z is however always 0.
- Optimize register usage a bit
  Disassembling compiled SPV shows that global declaration results in less ops than using inout modifiers. Modifiers generate extra mov instructions.
This commit is contained in:
kd-11 2019-08-25 17:23:46 +03:00 committed by kd-11
parent 3e28e4b1e0
commit eed32cf3a4
6 changed files with 160 additions and 164 deletions

View File

@ -524,7 +524,7 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
// NOTE: Hw testing showed the following: // NOTE: Hw testing showed the following:
// 1. Reading from registers 1 and 2 (COL0 and COL1) is clamped to (0, 1) // 1. Reading from registers 1 and 2 (COL0 and COL1) is clamped to (0, 1)
// 2. Reading from registers 4-12 (inclusive) is not clamped, but.. // 2. Reading from registers 4-12 (inclusive) is not clamped, but..
// 3. If the texcoord control mask is enabled, the last 2 values are always 0 and 1! // 3. If the texcoord control mask is enabled, the last 2 values are always 0 and hpos.w!
const std::string reg_var = (dst.src_attr_reg_num < std::size(reg_table))? reg_table[dst.src_attr_reg_num] : "unk"; const std::string reg_var = (dst.src_attr_reg_num < std::size(reg_table))? reg_table[dst.src_attr_reg_num] : "unk";
bool insert = true; bool insert = true;
@ -541,15 +541,30 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
case 0x02: case 0x02:
{ {
// COL0, COL1 // COL0, COL1
ret += "_saturate(" + reg_var + ")"; if (!src2.use_index_reg)
apply_precision_modifier = false; {
ret += "_saturate(" + reg_var + ")";
apply_precision_modifier = false;
}
else
{
// Raw access
ret += reg_var;
}
break; break;
} }
case 0x03: case 0x03:
{ {
// FOGC // FOGC
// TODO: Confirm if precision modifiers affect this one if (!src2.use_index_reg)
ret += reg_var; {
ret += reg_var;
}
else
{
// Raw access
ret += "fog_c";
}
break; break;
} }
case 0x4: case 0x4:
@ -567,7 +582,8 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
// Texcoord mask seems to reset the last 2 arguments to 0 and 1 if set // Texcoord mask seems to reset the last 2 arguments to 0 and 1 if set
if (m_prog.texcoord_is_2d(dst.src_attr_reg_num - 4)) if (m_prog.texcoord_is_2d(dst.src_attr_reg_num - 4))
{ {
ret += getFloatTypeName(4) + "(" + reg_var + ".x, " + reg_var + ".y, 0., 1.)"; ret += getFloatTypeName(4) + "(" + reg_var + ".x, " + reg_var + ".y, 0., in_w)";
properties.has_w_access = true;
} }
else else
{ {

View File

@ -281,6 +281,7 @@ public:
bool has_tex_op = false; bool has_tex_op = false;
bool has_divsq = false; bool has_divsq = false;
bool has_clamp = false; bool has_clamp = false;
bool has_w_access = false;
} }
properties; properties;

View File

@ -202,34 +202,55 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
if (properties.in_register_mask & in_fogc) if (properties.in_register_mask & in_fogc)
glsl::insert_fog_declaration(OS); glsl::insert_fog_declaration(OS);
const std::set<std::string> output_values = std::set<std::string> output_registers;
if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS)
{ {
"r0", "r1", "r2", "r3", "r4", output_registers = { "r0", "r2", "r3", "r4" };
"h0", "h2", "h4", "h6", "h8" }
}; else
std::string parameters;
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_values)
{ {
const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; output_registers = { "h0", "h4", "h6", "h8" };
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
{
if (parameters.length())
parameters += ", ";
parameters += "inout " + type + " " + reg_name;
}
} }
OS << "void fs_main(" << parameters << ")\n"; if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{
output_registers.insert("r1");
}
std::string registers;
std::string reg_type;
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_registers)
{
const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
if (LIKELY(reg_type == type))
{
registers += ", " + reg_name + " = " + type + "(0.)";
}
else
{
if (!registers.empty())
registers += ";\n";
registers += type + " " + reg_name + " = " + type + "(0.)";
}
reg_type = type;
}
if (!registers.empty())
{
OS << registers << ";\n";
}
OS << "void fs_main()\n";
OS << "{\n"; OS << "{\n";
for (const ParamType& PT : m_parr.params[PF_PARAM_NONE]) for (const ParamType& PT : m_parr.params[PF_PARAM_NONE])
{ {
for (const ParamItem& PI : PT.items) for (const auto& PI : PT.items)
{ {
if (output_values.find(PI.name) != output_values.end()) if (output_registers.find(PI.name) != output_registers.end())
continue; continue;
OS << " " << PT.type << " " << PI.name; OS << " " << PT.type << " " << PI.name;
@ -240,11 +261,8 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
} }
} }
if (m_parr.HasParam(PF_PARAM_IN, "vec4", "ssa")) if (properties.has_w_access)
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; OS << " float in_w = (1. / gl_FragCoord.w);\n";
if (properties.in_register_mask & in_wpos)
OS << " vec4 wpos = get_wpos();\n";
if (properties.in_register_mask & in_ssa) if (properties.in_register_mask & in_ssa)
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
@ -267,34 +285,12 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
{ {
const std::set<std::string> output_values =
{
"r0", "r1", "r2", "r3", "r4",
"h0", "h2", "h4", "h6", "h8"
};
OS << "}\n\n"; OS << "}\n\n";
OS << "void main()\n"; OS << "void main()\n";
OS << "{\n"; OS << "{\n";
std::string parameters; OS << "\n" << " fs_main();\n\n";
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_values)
{
const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
{
if (parameters.length())
parameters += ", ";
parameters += reg_name;
OS << " " << type << " " << reg_name << " = " << type << "(0.);\n";
}
}
OS << "\n" << " fs_main(" + parameters + ");\n\n";
glsl::insert_rop( glsl::insert_rop(
OS, OS,

View File

@ -129,20 +129,37 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
insert_glsl_legacy_function(OS, properties2); insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false);
std::string parameters; // Declare global registers with optional initialization
for (int i = 0; i < 16; ++i) std::string registers;
if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4"))
{ {
std::string reg_name = "dst_reg" + std::to_string(i); for (auto &PI : vec4Types->items)
if (m_parr.HasParam(PF_PARAM_OUT, "vec4", reg_name))
{ {
if (parameters.length()) if (registers.length())
parameters += ", "; registers += ", ";
else
registers = "vec4 ";
parameters += "inout vec4 " + reg_name; registers += PI.name;
if (!PI.value.empty())
{
printf("Value=%s\n", PI.value.c_str());
// Simplify default initialization
if (PI.value == "vec4(0.0, 0.0, 0.0, 0.0)")
registers += " = vec4(0.)";
else
registers += " = " + PI.value;
}
} }
} }
OS << "void vs_main(" << parameters << ")\n"; if (!registers.empty())
{
OS << registers << ";\n";
}
OS << "void vs_main()\n";
OS << "{\n"; OS << "{\n";
//Declare temporary registers, ignoring those mapped to outputs //Declare temporary registers, ignoring those mapped to outputs
@ -177,33 +194,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "void main ()\n"; OS << "void main ()\n";
OS << "{\n"; OS << "{\n";
std::string parameters; OS << "\n" << " vs_main();\n\n";
if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4"))
{
for (int i = 0; i < 16; ++i)
{
std::string reg_name = "dst_reg" + std::to_string(i);
for (auto &PI : vec4Types->items)
{
if (reg_name == PI.name)
{
if (parameters.length())
parameters += ", ";
parameters += reg_name;
OS << " vec4 " << reg_name;
if (!PI.value.empty())
OS << "= " << PI.value;
OS << ";\n";
}
}
}
}
OS << "\n" << " vs_main(" << parameters << ");\n\n";
for (auto &i : reg_table) for (auto &i : reg_table)
{ {

View File

@ -246,34 +246,55 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
if (properties.in_register_mask & in_fogc) if (properties.in_register_mask & in_fogc)
glsl::insert_fog_declaration(OS); glsl::insert_fog_declaration(OS);
const std::set<std::string> output_values = std::set<std::string> output_registers;
if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS)
{ {
"r0", "r1", "r2", "r3", "r4", output_registers = { "r0", "r2", "r3", "r4" };
"h0", "h2", "h4", "h6", "h8" }
}; else
std::string parameters;
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_values)
{ {
const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; output_registers = { "h0", "h4", "h6", "h8" };
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
{
if (parameters.length())
parameters += ", ";
parameters += "inout " + type + " " + reg_name;
}
} }
OS << "void fs_main(" << parameters << ")\n"; if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{
output_registers.insert("r1");
}
std::string registers;
std::string reg_type;
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_registers)
{
const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
if (LIKELY(reg_type == type))
{
registers += ", " + reg_name + " = " + type + "(0.)";
}
else
{
if (!registers.empty())
registers += ";\n";
registers += type + " " + reg_name + " = " + type + "(0.)";
}
reg_type = type;
}
if (!registers.empty())
{
OS << registers << ";\n";
}
OS << "void fs_main()\n";
OS << "{\n"; OS << "{\n";
for (const ParamType& PT : m_parr.params[PF_PARAM_NONE]) for (const ParamType& PT : m_parr.params[PF_PARAM_NONE])
{ {
for (const ParamItem& PI : PT.items) for (const ParamItem& PI : PT.items)
{ {
if (output_values.find(PI.name) != output_values.end()) if (output_registers.find(PI.name) != output_registers.end())
continue; continue;
OS << " " << PT.type << " " << PI.name; OS << " " << PT.type << " " << PI.name;
@ -284,6 +305,9 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
} }
} }
if (properties.has_w_access)
OS << " float in_w = (1. / gl_FragCoord.w);\n";
if (properties.in_register_mask & in_ssa) if (properties.in_register_mask & in_ssa)
OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n"; OS << " vec4 ssa = gl_FrontFacing ? vec4(1.) : vec4(-1.);\n";
@ -305,34 +329,12 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
{ {
const std::set<std::string> output_values =
{
"r0", "r1", "r2", "r3", "r4",
"h0", "h2", "h4", "h6", "h8"
};
OS << "}\n\n"; OS << "}\n\n";
OS << "void main()\n"; OS << "void main()\n";
OS << "{\n"; OS << "{\n";
std::string parameters; OS << "\n" << " fs_main();\n\n";
const auto half4 = getHalfTypeName(4);
for (auto &reg_name : output_values)
{
const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4;
if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name))
{
if (parameters.length())
parameters += ", ";
parameters += reg_name;
OS << " " << type << " " << reg_name << " = " << type << "(0.);\n";
}
}
OS << "\n" << " fs_main(" + parameters + ");\n\n";
glsl::insert_rop( glsl::insert_rop(
OS, OS,

View File

@ -177,20 +177,36 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_spirv); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_spirv);
std::string parameters; // Declare global registers with optional initialization
for (int i = 0; i < 16; ++i) std::string registers;
if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4"))
{ {
std::string reg_name = "dst_reg" + std::to_string(i); for (auto &PI : vec4Types->items)
if (m_parr.HasParam(PF_PARAM_OUT, "vec4", reg_name))
{ {
if (parameters.length()) if (registers.length())
parameters += ", "; registers += ", ";
else
registers = "vec4 ";
parameters += "inout vec4 " + reg_name; registers += PI.name;
if (!PI.value.empty())
{
// Simplify default initialization
if (PI.value == "vec4(0.0, 0.0, 0.0, 0.0)")
registers += " = vec4(0.)";
else
registers += " = " + PI.value;
}
} }
} }
OS << "void vs_main(" << parameters << ")\n"; if (!registers.empty())
{
OS << registers << ";\n";
}
OS << "void vs_main()\n";
OS << "{\n"; OS << "{\n";
//Declare temporary registers, ignoring those mapped to outputs //Declare temporary registers, ignoring those mapped to outputs
@ -225,33 +241,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "void main ()\n"; OS << "void main ()\n";
OS << "{\n"; OS << "{\n";
std::string parameters; OS << "\n" << " vs_main();\n\n";
if (ParamType *vec4Types = m_parr.SearchParam(PF_PARAM_OUT, "vec4"))
{
for (int i = 0; i < 16; ++i)
{
std::string reg_name = "dst_reg" + std::to_string(i);
for (auto &PI : vec4Types->items)
{
if (reg_name == PI.name)
{
if (parameters.length())
parameters += ", ";
parameters += reg_name;
OS << " vec4 " << reg_name;
if (!PI.value.empty())
OS << "= " << PI.value;
OS << ";\n";
}
}
}
}
OS << "\n" << " vs_main(" << parameters << ");\n\n";
for (auto &i : reg_table) for (auto &i : reg_table)
{ {