rsx: Reimplement conditional execution in shaders

- Per-channel conditional execution introduces RAW hazards all over the place
- Its cheaper to process both branches and select between the two
- Also improves ShaderVariable functionality to allow functionality such as match_size and taking complex variables as inputs
This commit is contained in:
kd-11 2018-11-20 22:41:08 +03:00 committed by kd-11
parent c75749f8ce
commit 696b91cb9b
6 changed files with 84 additions and 94 deletions

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "Emu/Memory/vm.h"
#include "Emu/System.h"
@ -372,25 +372,9 @@ void FragmentProgramDecompiler::AddCodeCond(const std::string& dst, const std::s
return;
}
static const char f[4] = { 'x', 'y', 'z', 'w' };
std::string cond = GetRawCond();
ShaderVariable dst_var(dst);
dst_var.simplify();
//const char *c_mask = f;
if (dst_var.swizzles[0].length() == 1)
{
AddCode("if (" + cond + ".x) " + dst + " = " + src + ";");
}
else
{
for (int i = 0; i < dst_var.swizzles[0].length(); ++i)
{
AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";");
}
}
// NOTE: dst = _select(dst, src, cond) is equivalent to dst = cond? src : dst;
const auto cond = ShaderVariable(dst).match_size(GetRawCond());
AddCode(dst + " = _select(" + dst + ", " + src + ", " + cond + ");");
}
template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)

View File

@ -415,6 +415,8 @@ namespace glsl
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true)
{
OS << "#define _select mix\n\n";
if (require_lit_emulation)
{
OS <<

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include <string>
#include <vector>
@ -179,22 +179,30 @@ public:
{
// Separate 'double destination' variables 'X=Y=SRC'
std::string simple_var;
const auto pos = var.find('=');
const auto eq_pos = var.find('=');
if (pos != std::string::npos)
if (eq_pos != std::string::npos)
{
simple_var = var.substr(0, pos - 1);
simple_var = var.substr(0, eq_pos - 1);
}
else
{
simple_var = var;
}
const auto brace_pos = var.find_last_of(")");
std::string prefix;
if (brace_pos != std::string::npos)
{
prefix = simple_var.substr(0, brace_pos);
simple_var = simple_var.substr(brace_pos);
}
auto var_blocks = fmt::split(simple_var, { "." });
verify(HERE), (var_blocks.size() != 0);
name = var_blocks[0];
name = prefix + var_blocks[0];
if (var_blocks.size() == 1)
{
@ -263,6 +271,45 @@ public:
return name + "." + fmt::merge({ swizzles }, ".");
}
std::string match_size(const std::string& other_var) const
{
// Make other_var the same vector length as this var
ShaderVariable other(other_var);
const auto this_size = get_vector_size();
const auto other_size = other.get_vector_size();
if (LIKELY(this_size == other_size))
{
return other_var;
}
if (LIKELY(this_size < other_size))
{
switch (this_size)
{
case 0:
case 4:
return other_var;
case 1:
return other_var + ".x";
case 2:
return other_var + ".xy";
case 3:
return other_var + ".xyz";
default:
fmt::throw_exception("Unreachable" HERE);
}
}
else
{
auto remaining = this_size - other_size;
std::string result = other_var;
while (remaining--) result += "x";
return result;
}
}
};
struct vertex_reg_info

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "Emu/System.h"
#include "VertexProgramDecompiler.h"
@ -240,18 +240,8 @@ std::string VertexProgramDecompiler::Format(const std::string& code)
return fmt::replace_all(code, repl_list);
}
std::string VertexProgramDecompiler::GetCond()
std::string VertexProgramDecompiler::GetRawCond()
{
enum
{
lt = 0x1,
eq = 0x2,
gt = 0x4,
};
if (d0.cond == 0) return "false";
if (d0.cond == (lt | gt | eq)) return "true";
static const COMPARE cond_string_table[(lt | gt | eq) + 1] =
{
COMPARE::FUNCTION_SLT, // "error"
@ -272,7 +262,15 @@ std::string VertexProgramDecompiler::GetCond()
swizzle += f[d0.mask_w];
swizzle = swizzle == "xyzw" ? "" : "." + swizzle;
return "any(" + compareFunction(cond_string_table[d0.cond], AddCondReg() + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")";
return compareFunction(cond_string_table[d0.cond], AddCondReg() + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle);
}
std::string VertexProgramDecompiler::GetCond()
{
if (d0.cond == 0) return "false";
if (d0.cond == (lt | gt | eq)) return "true";
return "any(" + GetRawCond() + ")";
}
std::string VertexProgramDecompiler::GetOptionalBranchCond()
@ -292,7 +290,6 @@ void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::str
gt = 0x4,
};
if (!d0.cond_test_enable || d0.cond == (lt | gt | eq))
{
AddCode(dst + " = " + src + ";");
@ -305,58 +302,9 @@ void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::str
return;
}
static const COMPARE cond_string_table[(lt | gt | eq) + 1] =
{
COMPARE::FUNCTION_SLT, // "error"
COMPARE::FUNCTION_SLT,
COMPARE::FUNCTION_SEQ,
COMPARE::FUNCTION_SLE,
COMPARE::FUNCTION_SGT,
COMPARE::FUNCTION_SNE,
COMPARE::FUNCTION_SGE,
};
ShaderVariable dst_var(dst);
dst_var.simplify();
static const char f[4] = { 'x', 'y', 'z', 'w' };
const u32 mask_index[4] = { d0.mask_x, d0.mask_y, d0.mask_z, d0.mask_w };
auto get_masked_dst = [](const std::string& dest, const char mask)
{
const auto selector = std::string(".") + mask;
const auto pos = dest.find('=');
std::string result = dest + selector;
if (pos != std::string::npos)
{
result.insert(pos - 1, selector);
}
return result;
};
auto get_cond_func = [this, &mask_index](COMPARE op, int index)
{
// Condition reg check for single element (x,y,z,w)
const auto cond_mask = f[mask_index[index]];
return compareFunction(op, AddCondReg() + "." + cond_mask, "0.", true);
};
if (dst_var.swizzles[0].length() == 1)
{
const std::string cond = get_cond_func(cond_string_table[d0.cond], 0);
AddCode("if (" + cond + ") " + dst + " = " + src + ";");
}
else
{
for (int i = 0; i < dst_var.swizzles[0].length(); ++i)
{
const std::string cond = get_cond_func(cond_string_table[d0.cond], i);
AddCode("if (" + cond + ") " + get_masked_dst(dst, f[i]) + " = " + src + "." + f[i] + ";");
}
}
// NOTE: dst = _select(dst, src, cond) is equivalent to dst = cond? src : dst;
const auto cond = ShaderVariable(dst).match_size(GetRawCond());
AddCode(dst + " = _select(" + dst + ", " + src + ", " + cond + ");");
}
std::string VertexProgramDecompiler::AddAddrReg()

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "Emu/RSX/RSXVertexProgram.h"
#include <vector>
#include <set>
@ -27,6 +27,13 @@ struct VertexProgramDecompiler
D3 d3;
SRC src[3];
enum
{
lt = 0x1,
eq = 0x2,
gt = 0x4,
};
struct FuncInfo
{
u32 offset;
@ -66,9 +73,9 @@ struct VertexProgramDecompiler
std::string GetDST(bool is_sca = false);
std::string GetSRC(const u32 n);
std::string GetTex();
std::string GetRawCond();
std::string GetCond();
std::string GetOptionalBranchCond(); //Conditional branch expression modified externally at runtime
std::string AddAddrMask();
std::string AddAddrReg();
std::string AddCondReg();
u32 GetAddr();

View File

@ -1,4 +1,4 @@
#ifdef _MSC_VER
#ifdef _MSC_VER
#include "stdafx.h"
#include "stdafx_d3d12.h"
#include "D3D12CommonDecompiler.h"
@ -114,6 +114,8 @@ std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::str
void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program)
{
OS << "#define _select lerp\n\n";
OS << "float4 lit_legacy(float4 val)";
OS << "{\n";
OS << " float4 clamped_val = val;\n";