diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 96dfb1e995..f071cf3f8c 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -392,6 +392,7 @@ target_sources(rpcs3_emu PRIVATE RSX/rsx_utils.cpp RSX/Common/BufferUtils.cpp RSX/Common/FragmentProgramDecompiler.cpp + RSX/Common/GLSLCommon.cpp RSX/Common/ProgramStateCache.cpp RSX/Common/surface_store.cpp RSX/Common/TextureUtils.cpp diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.cpp b/rpcs3/Emu/RSX/Common/GLSLCommon.cpp new file mode 100644 index 0000000000..3c60f27769 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.cpp @@ -0,0 +1,996 @@ +#include "stdafx.h" +#include "Utilities/StrFmt.h" + +#include "GLSLCommon.h" + +namespace program_common +{ + void insert_compare_op(std::ostream& OS, bool low_precision) + { + if (low_precision) + { + OS << + "int compare(const in float a, const in float b)\n" + "{\n" + " if (abs(a - b) < 0.000001) return 2;\n" + " return (a > b)? 4 : 1;\n" + "}\n\n" + + "bool comparison_passes(const in float a, const in float b, const in uint func)\n" + "{\n" + " if (func == 0) return false; // never\n" + " if (func == 7) return true; // always\n\n" + + " int op = compare(a, b);\n" + " switch (func)\n" + " {\n" + " case 1: return op == 1; // less\n" + " case 2: return op == 2; // equal\n" + " case 3: return op <= 2; // lequal\n" + " case 4: return op == 4; // greater\n" + " case 5: return op != 2; // nequal\n" + " case 6: return (op == 4 || op == 2); // gequal\n" + " }\n\n" + + " return false; // unreachable\n" + "}\n\n"; + } + else + { + OS << + "bool comparison_passes(const in float a, const in float b, const in uint func)\n" + "{\n" + " switch (func)\n" + " {\n" + " default:\n" + " case 0: return false; //never\n" + " case 1: return (a < b); //less\n" + " case 2: return (a == b); //equal\n" + " case 3: return (a <= b); //lequal\n" + " case 4: return (a > b); //greater\n" + " case 5: return (a != b); //nequal\n" + " case 6: return (a >= b); //gequal\n" + " case 7: return true; //always\n" + " }\n" + "}\n\n"; + } + } + + void insert_compare_op_vector(std::ostream& OS) + { + OS << + "bvec4 comparison_passes(const in vec4 a, const in vec4 b, const in uint func)\n" + "{\n" + " switch (func)\n" + " {\n" + " default:\n" + " case 0: return bvec4(false); //never\n" + " case 1: return lessThan(a, b); //less\n" + " case 2: return equal(a, b); //equal\n" + " case 3: return lessThanEqual(a, b); //lequal\n" + " case 4: return greaterThan(a, b); //greater\n" + " case 5: return notEqual(a, b); //nequal\n" + " case 6: return greaterThanEqual(a, b); //gequal\n" + " case 7: return bvec4(true); //always\n" + " }\n" + "}\n\n"; + } + + void insert_fog_declaration(std::ostream& OS, const std::string& wide_vector_type, const std::string& input_coord, bool declare) + { + std::string template_body; + + if (!declare) + template_body += "$T fetch_fog_value(const in uint mode)\n"; + else + template_body += "$T fetch_fog_value(const in uint mode, const in $T $I)\n"; + + template_body += + "{\n" + " $T result = $T($I.x, 0., 0., 0.);\n" + " switch(mode)\n" + " {\n" + " default:\n" + " return result;\n" + " case 0:\n" + " //linear\n" + " result.y = fog_param1 * $I.x + (fog_param0 - 1.);\n" + " break;\n" + " case 1:\n" + " //exponential\n" + " result.y = exp(11.084 * (fog_param1 * $I.x + fog_param0 - 1.5));\n" + " break;\n" + " case 2:\n" + " //exponential2\n" + " result.y = exp(-pow(4.709 * (fog_param1 * $I.x + fog_param0 - 1.5), 2.));\n" + " break;\n" + " case 3:\n" + " //exponential_abs\n" + " result.y = exp(11.084 * (fog_param1 * abs($I.x) + fog_param0 - 1.5));\n" + " break;\n" + " case 4:\n" + " //exponential2_abs\n" + " result.y = exp(-pow(4.709 * (fog_param1 * abs($I.x) + fog_param0 - 1.5), 2.));\n" + " break;\n" + " case 5:\n" + " //linear_abs\n" + " result.y = fog_param1 * abs($I.x) + (fog_param0 - 1.);\n" + " break;\n" + " }\n" + "\n" + " result.y = clamp(result.y, 0., 1.);\n" + " return result;\n" + "}\n\n"; + + std::pair replacements[] = + {std::make_pair("$T", wide_vector_type), + std::make_pair("$I", input_coord)}; + + OS << fmt::replace_all(template_body, replacements); + } +} + +namespace glsl +{ + std::string getFloatTypeNameImpl(usz elementCount) + { + switch (elementCount) + { + default: + abort(); + case 1: + return "float"; + case 2: + return "vec2"; + case 3: + return "vec3"; + case 4: + return "vec4"; + } + } + + std::string getHalfTypeNameImpl(usz elementCount) + { + switch (elementCount) + { + default: + abort(); + case 1: + return "float16_t"; + case 2: + return "f16vec2"; + case 3: + return "f16vec3"; + case 4: + return "f16vec4"; + } + } + + std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1, bool scalar) + { + if (scalar) + { + switch (f) + { + case COMPARE::FUNCTION_SEQ: + return Op0 + " == " + Op1; + case COMPARE::FUNCTION_SGE: + return Op0 + " >= " + Op1; + case COMPARE::FUNCTION_SGT: + return Op0 + " > " + Op1; + case COMPARE::FUNCTION_SLE: + return Op0 + " <= " + Op1; + case COMPARE::FUNCTION_SLT: + return Op0 + " < " + Op1; + case COMPARE::FUNCTION_SNE: + return Op0 + " != " + Op1; + } + } + else + { + switch (f) + { + case COMPARE::FUNCTION_SEQ: + return "equal(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGE: + return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SGT: + return "greaterThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLE: + return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SLT: + return "lessThan(" + Op0 + ", " + Op1 + ")"; + case COMPARE::FUNCTION_SNE: + return "notEqual(" + Op0 + ", " + Op1 + ")"; + } + } + + fmt::throw_exception("Unknown compare function"); + } + + void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules, bool glsl4_compliant) + { + std::string vertex_id_name = (rules != glsl_rules_spirv) ? "gl_VertexID" : "gl_VertexIndex"; + + //Actually decode a vertex attribute from a raw byte stream + OS << + "#define VTX_FMT_SNORM16 0\n" + "#define VTX_FMT_FLOAT32 1\n" + "#define VTX_FMT_FLOAT16 2\n" + "#define VTX_FMT_UNORM8 3\n" + "#define VTX_FMT_SINT16 4\n" + "#define VTX_FMT_COMP32 5\n" + "#define VTX_FMT_UINT8 6\n\n"; + + // For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?) + // Note: Tested on Mesa iris with HD 530 and compilant path works fine, may be a bug on Windows proprietary drivers + if (!glsl4_compliant) + { + OS << + "void mov(inout vec4 vector, const in int index, const in float scalar)\n" + "{\n" + " switch(index)\n" + " {\n" + " case 0: vector.x = scalar; return;\n" + " case 1: vector.y = scalar; return;\n" + " case 2: vector.z = scalar; return;\n" + " case 3: vector.w = scalar; return;\n" + " }\n" + "}\n\n" + + "uint ref(const in uvec4 vector, const in int index)\n" + "{\n" + " switch(index)\n" + " {\n" + " case 0: return vector.x;\n" + " case 1: return vector.y;\n" + " case 2: return vector.z;\n" + " case 3: return vector.w;\n" + " }\n" + "}\n\n"; + } + else + { + OS << + "#define mov(v, i, s) v[i] = s\n" + "#define ref(v, i) v[i]\n\n"; + } + + OS << + "struct attribute_desc\n" + "{\n" + " uint type;\n" + " uint attribute_size;\n" + " uint starting_offset;\n" + " uint stride;\n" + " uint frequency;\n" + " bool swap_bytes;\n" + " bool is_volatile;\n" + " bool modulo;\n" + "};\n\n" + + "uint gen_bits(const in uint x, const in uint y, const in uint z, const in uint w, const in bool swap)\n" + "{\n" + " return (swap) ?\n" + " _set_bits(_set_bits(_set_bits(w, z, 8, 8), y, 16, 8), x, 24, 8) :\n" + " _set_bits(_set_bits(_set_bits(x, y, 8, 8), z, 16, 8), w, 24, 8);\n" + "}\n\n" + + "uint gen_bits(const in uint x, const in uint y, const in bool swap)\n" + "{\n" + " return (swap)? _set_bits(y, x, 8, 8) : _set_bits(x, y, 8, 8);\n" + "}\n\n" + + // NOTE: (int(n) or int(n)) is broken on some NVIDIA and INTEL hardware when the sign bit is involved. + // See https://github.com/RPCS3/rpcs3/issues/8990 + "vec4 sext(const in ivec4 bits)\n" + "{\n" + " // convert raw 16 bit values into signed 32-bit float4 counterpart\n" + " bvec4 sign_check = lessThan(bits, ivec4(0x8000));\n" + " return _select(bits - 65536, bits, sign_check);\n" + "}\n\n" + + "float sext(const in int bits)\n" + "{\n" + " return (bits < 0x8000) ? float(bits) : float(bits - 65536); \n" + "}\n\n" + + "vec4 fetch_attribute(const in attribute_desc desc, const in int vertex_id, usamplerBuffer input_stream)\n" + "{\n" + " const int elem_size_table[] = { 2, 4, 2, 1, 2, 4, 1 };\n" + " const float scaling_table[] = { 32768., 1., 1., 255., 1., 32767., 1. };\n" + " const int elem_size = elem_size_table[desc.type];\n" + " const vec4 scale = scaling_table[desc.type].xxxx;\n\n" + + " uvec4 tmp, result = uvec4(0u);\n" + " vec4 ret;\n" + " int n, i = int((vertex_id * desc.stride) + desc.starting_offset);\n\n" + + " for (n = 0; n < desc.attribute_size; n++)\n" + " {\n" + " tmp.x = texelFetch(input_stream, i++).x;\n" + " if (elem_size == 2)\n" + " {\n" + " tmp.y = texelFetch(input_stream, i++).x;\n" + " tmp.x = gen_bits(tmp.x, tmp.y, desc.swap_bytes);\n" + " }\n" + " else if (elem_size == 4)\n" + " {\n" + " tmp.y = texelFetch(input_stream, i++).x;\n" + " tmp.z = texelFetch(input_stream, i++).x;\n" + " tmp.w = texelFetch(input_stream, i++).x;\n" + " tmp.x = gen_bits(tmp.x, tmp.y, tmp.z, tmp.w, desc.swap_bytes);\n" + " }\n\n" + + " mov(result, n, tmp.x);\n" + " }\n\n" + + " // Actual decoding step is done in vector space, outside the loop\n" + " if (desc.type == VTX_FMT_SNORM16 || desc.type == VTX_FMT_SINT16)\n" + " {\n" + " ret = sext(ivec4(result));\n" + " }\n" + " else if (desc.type == VTX_FMT_FLOAT32)\n" + " {\n" + " ret = uintBitsToFloat(result);\n" + " }\n" + " else if (desc.type == VTX_FMT_FLOAT16)\n" + " {\n" + " tmp.x = _set_bits(result.x, result.y, 16, 16);\n" + " tmp.y = _set_bits(result.z, result.w, 16, 16);\n" + " ret.xy = unpackHalf2x16(tmp.x);\n" + " ret.zw = unpackHalf2x16(tmp.y);\n" + " }\n" + " else if (desc.type == VTX_FMT_UINT8 || desc.type == VTX_FMT_UNORM8)\n" + " {\n" + " ret = vec4(desc.swap_bytes? result.wzyx : result);\n" + " }\n" + " else //if (desc.type == VTX_FMT_COMP32)\n" + " {\n" + " result = uvec4(_get_bits(result.x, 0, 11),\n" + " _get_bits(result.x, 11, 11),\n" + " _get_bits(result.x, 22, 10),\n" + " uint(scale.x));\n" + " ret = sext(ivec4(result) << ivec4(5, 5, 6, 0));\n" + " }\n\n" + + " if (desc.attribute_size < 4)\n" + " {\n" + " ret.w = scale.x;\n" + " }\n\n" + + " return ret / scale; \n" + "}\n\n" + + "attribute_desc fetch_desc(const in int location)\n" + "{\n" + " // Each descriptor is 64 bits wide\n" + " // [0-8] attribute stride\n" + " // [8-24] attribute divisor\n" + " // [24-27] attribute type\n" + " // [27-30] attribute size\n" + " // [30-31] reserved\n" + " // [32-60] starting offset\n" + " // [60-61] swap bytes flag\n" + " // [61-62] volatile flag\n" + " // [62-63] modulo enable flag\n\n"; + + if (rules == glsl_rules_opengl4) + { + // Data is packed into a ubo + OS << + " int block = (location >> 1);\n" + " int sub_block = (location & 1) << 1;\n" + " uvec2 attrib = uvec2(\n" + " ref(input_attributes_blob[block], sub_block + 0),\n" + " ref(input_attributes_blob[block], sub_block + 1));\n\n"; + } + else + { + // Fetch parameters streamed separately from draw parameters + OS << + " uvec2 attrib = texelFetch(vertex_layout_stream, location + int(layout_ptr_offset)).xy;\n\n"; + } + + OS << + " attribute_desc result;\n" + " result.stride = _get_bits(attrib.x, 0, 8);\n" + " result.frequency = _get_bits(attrib.x, 8, 16);\n" + " result.type = _get_bits(attrib.x, 24, 3);\n" + " result.attribute_size = _get_bits(attrib.x, 27, 3);\n" + " result.starting_offset = _get_bits(attrib.y, 0, 29);\n" + " result.swap_bytes = _test_bit(attrib.y, 29);\n" + " result.is_volatile = _test_bit(attrib.y, 30);\n" + " result.modulo = _test_bit(attrib.y, 31);\n" + " return result;\n" + "}\n\n" + + "vec4 read_location(const in int location)\n" + "{\n" + " attribute_desc desc = fetch_desc(location);\n" + " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n" + " if (desc.frequency == 0)\n" + " {\n" + " vertex_id = 0;\n" + " }\n" + " else if (desc.modulo)\n" + " {\n" + " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" + " vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) % int(desc.frequency);\n" + " }\n" + " else\n" + " {\n" + " vertex_id /= int(desc.frequency); \n" + " }\n\n" + + " if (desc.is_volatile)\n" + " return fetch_attribute(desc, vertex_id, volatile_input_stream);\n" + " else\n" + " return fetch_attribute(desc, vertex_id, persistent_input_stream);\n" + "}\n\n"; + } + + void insert_rop_init(std::ostream& OS) + { + OS << + " if (_test_bit(rop_control, 9))\n" + " {\n" + " // Convert x,y to linear address\n" + " const ivec2 stipple_coord = ivec2(gl_FragCoord.xy) % ivec2(32, 32);\n" + " const int address = stipple_coord.y * 32 + stipple_coord.x;\n" + " const int bit_offset = (address & 31);\n" + " const int word_index = _get_bits(address, 7, 3);\n" + " const int sub_index = _get_bits(address, 5, 2);\n\n" + + " if (_test_bit(stipple_pattern[word_index][sub_index], bit_offset))\n" + " {\n" + " _kill();\n" + " }\n" + " }\n\n"; + } + + void insert_rop(std::ostream& OS, const shader_properties& props) + { + const std::string reg0 = props.fp32_outputs ? "r0" : "h0"; + const std::string reg1 = props.fp32_outputs ? "r2" : "h4"; + const std::string reg2 = props.fp32_outputs ? "r3" : "h6"; + const std::string reg3 = props.fp32_outputs ? "r4" : "h8"; + + //TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here + if (props.disable_early_discard) + { + OS << + " if (_fragment_discard)\n" + " {\n" + " discard;\n" + " }\n" + " else if (_get_bits(rop_control, 0, 8) != 0)\n"; + } + else + { + OS << " if (_get_bits(rop_control, 0, 8) != 0)\n"; + } + + OS << + " {\n" + " const bool alpha_test = _test_bit(rop_control, 0);\n" + " const uint alpha_func = _get_bits(rop_control, 16, 3);\n"; + + if (!props.fp32_outputs) + { + OS << " const bool srgb_convert = _test_bit(rop_control, 1);\n\n"; + } + + if (props.emulate_coverage_tests) + { + OS << " const bool a2c_enabled = _test_bit(rop_control, 4);\n"; + } + + OS << + " if (alpha_test && !comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func))\n" + " {\n" + " discard;\n" + " }\n"; + + if (props.emulate_coverage_tests) + { + OS << + " else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n" + " {\n" + " discard;\n" + " }\n"; + } + + if (!props.fp32_outputs) + { + // Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags + if (props.supports_native_fp16) + { + OS << + " else if (srgb_convert)\n" + " {\n" + " " << reg0 << ".rgb = clamp16(linear_to_srgb(" << reg0 << ")).rgb;\n" + " " << reg1 << ".rgb = clamp16(linear_to_srgb(" << reg1 << ")).rgb;\n" + " " << reg2 << ".rgb = clamp16(linear_to_srgb(" << reg2 << ")).rgb;\n" + " " << reg3 << ".rgb = clamp16(linear_to_srgb(" << reg3 << ")).rgb;\n" + " }\n"; + } + else + { + OS << + " else if (srgb_convert)\n" + " {\n" + " " << reg0 << ".rgb = linear_to_srgb(" << reg0 << ").rgb;\n" + " " << reg1 << ".rgb = linear_to_srgb(" << reg1 << ").rgb;\n" + " " << reg2 << ".rgb = linear_to_srgb(" << reg2 << ").rgb;\n" + " " << reg3 << ".rgb = linear_to_srgb(" << reg3 << ").rgb;\n" + " }\n"; + } + } + + OS << + " }\n\n" + + " ocol0 = " << reg0 << ";\n" + " ocol1 = " << reg1 << ";\n" + " ocol2 = " << reg2 << ";\n" + " ocol3 = " << reg3 << ";\n\n"; + } + + void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props) + { + OS << "#define _select mix\n"; + OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; + OS << "#define _get_bits(x, off, count) bitfieldExtract(x, off, count)\n"; + OS << "#define _set_bits(x, y, off, count) bitfieldInsert(x, y, off, count)\n"; + OS << "#define _test_bit(x, y) (_get_bits(x, y, 1) != 0)\n"; + OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; + + if (props.domain == glsl::program_domain::glsl_fragment_program) + { + OS << "// Workaround for broken early discard in some drivers\n"; + + if (props.disable_early_discard) + { + OS << "bool _fragment_discard = false;\n"; + OS << "#define _kill() _fragment_discard = true\n\n"; + } + else + { + OS << "#define _kill() discard\n\n"; + } + + if (props.require_texture_ops) + { + OS << + // Declare special texture control flags + "#define GAMMA_R_MASK (1 << " << rsx::texture_control_bits::GAMMA_R << ")\n" + "#define GAMMA_G_MASK (1 << " << rsx::texture_control_bits::GAMMA_G << ")\n" + "#define GAMMA_B_MASK (1 << " << rsx::texture_control_bits::GAMMA_B << ")\n" + "#define GAMMA_A_MASK (1 << " << rsx::texture_control_bits::GAMMA_A << ")\n" + "#define EXPAND_R_MASK (1 << " << rsx::texture_control_bits::EXPAND_R << ")\n" + "#define EXPAND_G_MASK (1 << " << rsx::texture_control_bits::EXPAND_G << ")\n" + "#define EXPAND_B_MASK (1 << " << rsx::texture_control_bits::EXPAND_B << ")\n" + "#define EXPAND_A_MASK (1 << " << rsx::texture_control_bits::EXPAND_A << ")\n\n" + + "#define ALPHAKILL " << rsx::texture_control_bits::ALPHAKILL << "\n" + "#define RENORMALIZE " << rsx::texture_control_bits::RENORMALIZE << "\n" + "#define DEPTH_FLOAT " << rsx::texture_control_bits::DEPTH_FLOAT << "\n" + "#define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK)\n" + "#define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK)\n\n"; + } + } + + if (props.require_lit_emulation) + { + OS << + "vec4 lit_legacy(const in vec4 val)" + "{\n" + " vec4 clamped_val = val;\n" + " clamped_val.x = max(val.x, 0.);\n" + " clamped_val.y = max(val.y, 0.);\n" + " vec4 result;\n" + " result.x = 1.;\n" + " result.w = 1.;\n" + " result.y = clamped_val.x;\n" + " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n" + " return result;\n" + "}\n\n"; + } + + if (props.domain == glsl::program_domain::glsl_vertex_program && props.emulate_zclip_transform) + { + if (props.emulate_depth_clip_only) + { + // Declare rcp_precise. Requires f64 support in the drivers. + // This is required to handle precision drift during division for extended depth range. + OS << + "double rcp_precise(double x)\n" + "{\n" + " double scaled = x * 0.0009765625;\n" + " double inv = 1.0 / scaled;\n" + " return inv * 0.0009765625;\n" + "}\n" + "\n" + // Technically the depth value here is the 'final' depth that should be stored in the Z buffer. + // Forward mapping eqn is d' = d * (f - n) + n, where d' is the stored Z value (this) and d is the normalized API value. + "vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n" + "{\n" + " if (far_plane != 0.0)\n" + " {\n" + " double z_range = (far_plane > near_plane)? (far_plane - near_plane) : far_plane;\n" + " double inv_range = rcp_precise(z_range);\n" + " float d = float(pos.z * rcp_precise(pos.w));\n" + " float new_d = (d - near_plane) * float(inv_range);\n" + " return vec4(pos.x, pos.y, (new_d * pos.w), pos.w);\n" + " }\n" + " else\n" + " {\n" + " return pos;\n" // Only values where Z=0 can ever pass this clip + " }\n" + "}\n\n"; + } + else + { + OS << + "vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n" + "{\n" + " float d = float(pos.z / pos.w);\n" + " if (d < 0.f && d >= near_plane)\n" + " {\n" + " // Clamp\n" + " d = 0.f;\n" + " }\n" + " else if (d > 1.f && d <= far_plane)\n" + " {\n" + " // Compress Z and store towards highest end of the range\n" + " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n" + " }\n" + " else\n" + " {\n" + " return pos;\n" + " }\n" + "\n" + " return vec4(pos.x, pos.y, d * pos.w, pos.w);\n" + "}\n\n"; + } + + return; + } + + program_common::insert_compare_op(OS, props.low_precision_tests); + + if (props.emulate_coverage_tests) + { + // Purely stochastic + OS << + "bool coverage_test_passes(const in vec4 _sample, const in uint control)\n" + "{\n" + " if (!_test_bit(control, 0)) return false;\n" + "\n" + " float random = _rand(gl_FragCoord);\n" + " return (_sample.a > random);\n" + "}\n\n"; + } + + if (!props.fp32_outputs) + { + OS << + "vec4 linear_to_srgb(const in vec4 cl)\n" + "{\n" + " vec4 low = cl * 12.92;\n" + " vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n" + " bvec4 select = lessThan(cl, vec4(0.0031308));\n" + " return clamp(mix(high, low, select), 0., 1.);\n" + "}\n\n"; + } + + if (props.require_depth_conversion) + { + ensure(props.require_texture_ops); + + //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) + //The A component (Z) is useless (should contain stencil8 or just 1) + OS << + "vec4 decode_depth24(const in float depth_value, const in bool depth_float)\n" + "{\n" + " uint value;\n" + " if (!depth_float)\n" + " value = uint(depth_value * 16777215.);\n" + " else\n" + " value = _get_bits(floatBitsToUint(depth_value), 7, 24);\n" + "\n" + " uint b = _get_bits(value, 0, 8);\n" + " uint g = _get_bits(value, 8, 8);\n" + " uint r = _get_bits(value, 16, 8);\n" + " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n" + "}\n\n" + + "vec4 remap_vector(const in vec4 color, const in uint remap)\n" + "{\n" + " vec4 result;\n" + " if (_get_bits(remap, 0, 8) == 0xE4)\n" + " {\n" + " result = color;\n" + " }\n" + " else\n" + " {\n" + " uvec4 remap_channel = uvec4(remap) >> uvec4(2, 4, 6, 0);\n" + " remap_channel &= 3;\n" + " remap_channel = (remap_channel + 3) % 4; // Map A-R-G-B to R-G-B-A\n\n" + + " // Generate remapped result\n" + " result.a = color[remap_channel.a];\n" + " result.r = color[remap_channel.r];\n" + " result.g = color[remap_channel.g];\n" + " result.b = color[remap_channel.b];\n" + " }\n\n" + + " if (_get_bits(remap, 8, 8) == 0xAA)\n" + " return result;\n\n" + + " uvec4 remap_select = uvec4(remap) >> uvec4(10, 12, 14, 8);\n" + " remap_select &= 3;\n" + " bvec4 choice = lessThan(remap_select, uvec4(2));\n" + " return _select(result, vec4(remap_select), choice);\n" + "}\n\n" + + "vec4 texture2DReconstruct(sampler2D tex, usampler2D stencil_tex, const in vec2 coord, const in uint remap, const in uint flags)\n" + "{\n" + " vec4 result = decode_depth24(texture(tex, coord.xy).r, _test_bit(flags, DEPTH_FLOAT));\n" + " result.z = float(texture(stencil_tex, coord.xy).x) / 255.f;\n\n" + + " if (remap == 0xAAE4)\n" + " return result;\n\n" + + " return remap_vector(result, remap);\n" + "}\n\n"; + } + + if (props.require_texture_ops) + { + OS << + +#ifdef __APPLE__ + "vec4 remap_vector(const in vec4 rgba, const in uint remap_bits)\n" + "{\n" + " uvec4 selector = (uvec4(remap_bits) >> uvec4(3, 6, 9, 0)) & 0x7;\n" + " bvec4 choice = greaterThan(selector, uvec4(1));\n" + "\n" + " vec4 direct = vec4(selector);\n" + " selector = min(selector - 2, selector);\n" + " vec4 indexed = vec4(rgba[selector.r], rgba[selector.g], rgba[selector.b], rgba[selector.a]);\n" + " return mix(direct, indexed, choice);\n" + "}\n\n" +#endif + "vec4 srgb_to_linear(const in vec4 cs)\n" + "{\n" + " vec4 a = cs / 12.92;\n" + " vec4 b = pow((cs + 0.055) / 1.055, vec4(2.4));\n" + " return _select(a, b, greaterThan(cs, vec4(0.04045)));\n" + "}\n\n" + + //TODO: Move all the texture read control operations here + "vec4 process_texel(in vec4 rgba, const in uint control_bits)\n" + "{\n" +#ifdef __APPLE__ + " uint remap_bits = (control_bits >> 16) & 0xFFFF;\n" + " if (remap_bits != 0x8D5) rgba = remap_vector(rgba, remap_bits);\n\n" +#endif + " if (control_bits == 0)\n" + " {\n" + " return rgba;\n" + " }\n" + "\n" + " if (_test_bit(control_bits, ALPHAKILL))\n" + " {\n" + " // Alphakill\n" + " if (rgba.a < 0.000001)\n" + " {\n" + " _kill();\n" + " return rgba;\n" + " }\n" + " }\n" + "\n" + " if (_test_bit(control_bits, RENORMALIZE))\n" + " {\n" + " // Renormalize to 8-bit (PS3) accuracy\n" + " rgba = floor(rgba * 255.);\n" + " rgba /= 255.;" + " }\n" + "\n" + " uvec4 mask;\n" + " vec4 convert;\n" + " uint op_mask = control_bits & SIGN_EXPAND_MASK;\n" + "\n" + " if (op_mask != 0)\n" + " {\n" + " // Expand to signed normalized\n" + " mask = uvec4(op_mask) & uvec4(EXPAND_R_MASK, EXPAND_G_MASK, EXPAND_B_MASK, EXPAND_A_MASK);\n" + " convert = (rgba * 2.f - 1.f);\n" + " rgba = _select(rgba, convert, notEqual(mask, uvec4(0)));\n" + " }\n" + "\n" + " op_mask = control_bits & GAMMA_CTRL_MASK;\n" + " if (op_mask != 0u)\n" + " {\n" + " // Gamma correction\n" + " mask = uvec4(op_mask) & uvec4(GAMMA_R_MASK, GAMMA_G_MASK, GAMMA_B_MASK, GAMMA_A_MASK);\n" + " convert = srgb_to_linear(rgba);\n" + " return _select(rgba, convert, notEqual(mask, uvec4(0)));\n" + " }\n" + "\n" + " return rgba;\n" + "}\n\n"; + + if (props.require_texture_expand) + { + OS << + "uint _texture_flag_override = 0;\n" + "#define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK\n" + "#define _disable_texture_expand() _texture_flag_override = 0\n" + "#define TEX_FLAGS(index) (texture_parameters[index].flags | _texture_flag_override)\n"; + } + else + { + OS << + "#define TEX_FLAGS(index) texture_parameters[index].flags\n"; + } + + OS << + "#define TEX_NAME(index) tex##index\n" + "#define TEX_NAME_STENCIL(index) tex##index##_stencil\n\n" + + "#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x), TEX_FLAGS(index))\n" + "#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, bias), TEX_FLAGS(index))\n" + "#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, lod), TEX_FLAGS(index))\n" + "#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, dpdx, dpdy), TEX_FLAGS(index))\n" + "#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), coord2 * vec2(texture_parameters[index].scale.x, 1.)), TEX_FLAGS(index))\n" + + "#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale), TEX_FLAGS(index))\n" + "#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale, bias), TEX_FLAGS(index))\n" + "#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), coord2 * texture_parameters[index].scale, lod), TEX_FLAGS(index))\n" + "#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].scale, dpdx, dpdy), TEX_FLAGS(index))\n" + "#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.)), TEX_FLAGS(index))\n" + + "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].scale, texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))\n"; + + if (props.emulate_shadow_compare) + { + OS << + "#define SHADOW_COORD(coord3, scale, flags) vec3(coord3.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord3.z : min(coord3.z, 1.0))\n" + "#define SHADOW_COORD_PROJ(coord4, scale, flags) vec4(coord4.xy * scale, _test_bit(flags, DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w)\n" + "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(coord3, texture_parameters[index].scale, TEX_FLAGS(index)))\n" + "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(coord4, texture_parameters[index].scale, TEX_FLAGS(index)))\n"; + } + else + { + OS << + "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].scale, 1.))\n" + "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.))\n"; + } + + OS << + "#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), TEX_FLAGS(index))\n" + "#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), TEX_FLAGS(index))\n" + "#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), TEX_FLAGS(index))\n" + "#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord3, dpdx, dpdy), TEX_FLAGS(index))\n" + "#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), TEX_FLAGS(index))\n\n"; + } + + if (props.require_wpos) + { + OS << + "vec4 get_wpos()\n" + "{\n" + " float abs_scale = abs(wpos_scale);\n" + " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n" + "}\n\n"; + } + } + + void insert_fog_declaration(std::ostream& OS) + { + program_common::insert_fog_declaration(OS, "vec4", "fog_c"); + } + + std::string getFunctionImpl(FUNCTION f) + { + switch (f) + { + default: + abort(); + case FUNCTION::FUNCTION_DP2: + return "$Ty(dot($0.xy, $1.xy))"; + case FUNCTION::FUNCTION_DP2A: + return "$Ty(dot($0.xy, $1.xy) + $2.x)"; + case FUNCTION::FUNCTION_DP3: + return "$Ty(dot($0.xyz, $1.xyz))"; + case FUNCTION::FUNCTION_DP4: + return "$Ty(dot($0, $1))"; + case FUNCTION::FUNCTION_DPH: + return "$Ty(dot(vec4($0.xyz, 1.0), $1))"; + case FUNCTION::FUNCTION_SFL: + return "$Ty(0.)"; + case FUNCTION::FUNCTION_STR: + return "$Ty(1.)"; + case FUNCTION::FUNCTION_FRACT: + return "fract($0)"; + case FUNCTION::FUNCTION_REFL: + return "reflect($0, $1)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D: + return "TEX1D($_i, $0.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_BIAS: + return "TEX1D_BIAS($_i, $0.x, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_PROJ: + return "TEX1D_PROJ($_i, $0.xy)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_LOD: + return "TEX1D_LOD($_i, $0.x, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_GRAD: + return "TEX1D_GRAD($_i, $0.x, $1.x, $2.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D: + return "TEX2D($_i, $0.xy)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_BIAS: + return "TEX2D_BIAS($_i, $0.xy, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_PROJ: + return "TEX2D_PROJ($_i, $0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_LOD: + return "TEX2D_LOD($_i, $0.xy, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_GRAD: + return "TEX2D_GRAD($_i, $0.xy, $1.xy, $2.xy)"; + case FUNCTION::FUNCTION_TEXTURE_SHADOW2D: + return "TEX2D_SHADOW($_i, $0.xyz)"; + case FUNCTION::FUNCTION_TEXTURE_SHADOW2D_PROJ: + return "TEX2D_SHADOWPROJ($_i, $0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE: + return "TEX3D($_i, $0.xyz)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_BIAS: + return "TEX3D_BIAS($_i, $0.xyz, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_PROJ: + return "TEX3D($_i, ($0.xyz / $0.w))"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_LOD: + return "TEX3D_LOD($_i, $0.xyz, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_GRAD: + return "TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D: + return "TEX3D($_i, $0.xyz)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_BIAS: + return "TEX3D_BIAS($_i, $0.xyz, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_PROJ: + return "TEX3D_PROJ($_i, $0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_LOD: + return "TEX3D_LOD($_i, $0.xyz, $1.x)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_GRAD: + return "TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz)"; + case FUNCTION::FUNCTION_DFDX: + return "dFdx($0)"; + case FUNCTION::FUNCTION_DFDY: + return "dFdy($0)"; + case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH1D: + return "textureLod($t, $0.x, 0)"; + case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH2D: + return "textureLod($t, $0.xy, 0)"; + case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH3D: + case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCHCUBE: + return "textureLod($t, $0.xyz, 0)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_DEPTH_RGBA: + return "TEX2D_DEPTH_RGBA8($_i, $0.xy)"; + case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_DEPTH_RGBA_PROJ: + return "TEX2D_DEPTH_RGBA8($_i, ($0.xy / $0.w))"; + } + } + + void insert_subheader_block(std::ostream& OS) + { + // Global types and stuff + // Must be compatible with std140 packing rules + OS << + "struct sampler_info\n" + "{\n" + " vec2 scale;\n" + " uint remap;\n" + " uint flags;\n" + "};\n" + "\n"; + } +} diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 20209c60a8..ff144d4a27 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -4,972 +4,46 @@ #include "GLSLTypes.h" #include "ShaderParam.h" -#include "Utilities/StrFmt.h" +namespace rsx +{ + // TODO: Move this somewhere else once more compilers are supported other than glsl + enum texture_control_bits + { + GAMMA_R = 0, + GAMMA_G, + GAMMA_B, + GAMMA_A, + ALPHAKILL, + RENORMALIZE, + EXPAND_A, + EXPAND_R, + EXPAND_G, + EXPAND_B, + DEPTH_FLOAT, + + GAMMA_CTRL_MASK = (1 << GAMMA_R) | (1 << GAMMA_G) | (1 << GAMMA_B) | (1 << GAMMA_A), + EXPAND_MASK = (1 << EXPAND_R) | (1 << EXPAND_G) | (1 << EXPAND_B) | (1 << EXPAND_A), + EXPAND_OFFSET = EXPAND_A + }; +} namespace program_common { - static void insert_compare_op(std::ostream& OS, bool low_precision) - { - if (low_precision) - { - OS << - "int compare(const in float a, const in float b)\n" - "{\n" - " if (abs(a - b) < 0.000001) return 2;\n" - " return (a > b)? 4 : 1;\n" - "}\n\n" - - "bool comparison_passes(const in float a, const in float b, const in uint func)\n" - "{\n" - " if (func == 0) return false; // never\n" - " if (func == 7) return true; // always\n\n" - - " int op = compare(a, b);\n" - " switch (func)\n" - " {\n" - " case 1: return op == 1; // less\n" - " case 2: return op == 2; // equal\n" - " case 3: return op <= 2; // lequal\n" - " case 4: return op == 4; // greater\n" - " case 5: return op != 2; // nequal\n" - " case 6: return (op == 4 || op == 2); // gequal\n" - " }\n\n" - - " return false; // unreachable\n" - "}\n\n"; - } - else - { - OS << - "bool comparison_passes(const in float a, const in float b, const in uint func)\n" - "{\n" - " switch (func)\n" - " {\n" - " default:\n" - " case 0: return false; //never\n" - " case 1: return (a < b); //less\n" - " case 2: return (a == b); //equal\n" - " case 3: return (a <= b); //lequal\n" - " case 4: return (a > b); //greater\n" - " case 5: return (a != b); //nequal\n" - " case 6: return (a >= b); //gequal\n" - " case 7: return true; //always\n" - " }\n" - "}\n\n"; - } - } - - static void insert_compare_op_vector(std::ostream& OS) - { - OS << - "bvec4 comparison_passes(const in vec4 a, const in vec4 b, const in uint func)\n" - "{\n" - " switch (func)\n" - " {\n" - " default:\n" - " case 0: return bvec4(false); //never\n" - " case 1: return lessThan(a, b); //less\n" - " case 2: return equal(a, b); //equal\n" - " case 3: return lessThanEqual(a, b); //lequal\n" - " case 4: return greaterThan(a, b); //greater\n" - " case 5: return notEqual(a, b); //nequal\n" - " case 6: return greaterThanEqual(a, b); //gequal\n" - " case 7: return bvec4(true); //always\n" - " }\n" - "}\n\n"; - } - - static void insert_fog_declaration(std::ostream& OS, const std::string& wide_vector_type, const std::string& input_coord, bool declare = false) - { - std::string template_body; - - if (!declare) - template_body += "$T fetch_fog_value(const in uint mode)\n"; - else - template_body += "$T fetch_fog_value(const in uint mode, const in $T $I)\n"; - - template_body += - "{\n" - " $T result = $T($I.x, 0., 0., 0.);\n" - " switch(mode)\n" - " {\n" - " default:\n" - " return result;\n" - " case 0:\n" - " //linear\n" - " result.y = fog_param1 * $I.x + (fog_param0 - 1.);\n" - " break;\n" - " case 1:\n" - " //exponential\n" - " result.y = exp(11.084 * (fog_param1 * $I.x + fog_param0 - 1.5));\n" - " break;\n" - " case 2:\n" - " //exponential2\n" - " result.y = exp(-pow(4.709 * (fog_param1 * $I.x + fog_param0 - 1.5), 2.));\n" - " break;\n" - " case 3:\n" - " //exponential_abs\n" - " result.y = exp(11.084 * (fog_param1 * abs($I.x) + fog_param0 - 1.5));\n" - " break;\n" - " case 4:\n" - " //exponential2_abs\n" - " result.y = exp(-pow(4.709 * (fog_param1 * abs($I.x) + fog_param0 - 1.5), 2.));\n" - " break;\n" - " case 5:\n" - " //linear_abs\n" - " result.y = fog_param1 * abs($I.x) + (fog_param0 - 1.);\n" - " break;\n" - " }\n" - "\n" - " result.y = clamp(result.y, 0., 1.);\n" - " return result;\n" - "}\n\n"; - - std::pair replacements[] = - {std::make_pair("$T", wide_vector_type), - std::make_pair("$I", input_coord)}; - - OS << fmt::replace_all(template_body, replacements); - } + void insert_compare_op(std::ostream& OS, bool low_precision); + void insert_compare_op_vector(std::ostream& OS); + void insert_fog_declaration(std::ostream& OS, const std::string& wide_vector_type, const std::string& input_coord, bool declare = false); } namespace glsl { - static std::string getFloatTypeNameImpl(usz elementCount) - { - switch (elementCount) - { - default: - abort(); - case 1: - return "float"; - case 2: - return "vec2"; - case 3: - return "vec3"; - case 4: - return "vec4"; - } - } - - static std::string getHalfTypeNameImpl(usz elementCount) - { - switch (elementCount) - { - default: - abort(); - case 1: - return "float16_t"; - case 2: - return "f16vec2"; - case 3: - return "f16vec3"; - case 4: - return "f16vec4"; - } - } - - static std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1, bool scalar = false) - { - if (scalar) - { - switch (f) - { - case COMPARE::FUNCTION_SEQ: - return Op0 + " == " + Op1; - case COMPARE::FUNCTION_SGE: - return Op0 + " >= " + Op1; - case COMPARE::FUNCTION_SGT: - return Op0 + " > " + Op1; - case COMPARE::FUNCTION_SLE: - return Op0 + " <= " + Op1; - case COMPARE::FUNCTION_SLT: - return Op0 + " < " + Op1; - case COMPARE::FUNCTION_SNE: - return Op0 + " != " + Op1; - } - } - else - { - switch (f) - { - case COMPARE::FUNCTION_SEQ: - return "equal(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SGE: - return "greaterThanEqual(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SGT: - return "greaterThan(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SLE: - return "lessThanEqual(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SLT: - return "lessThan(" + Op0 + ", " + Op1 + ")"; - case COMPARE::FUNCTION_SNE: - return "notEqual(" + Op0 + ", " + Op1 + ")"; - } - } - - fmt::throw_exception("Unknown compare function"); - } - - static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules, bool glsl4_compliant=true) - { - std::string vertex_id_name = (rules != glsl_rules_spirv) ? "gl_VertexID" : "gl_VertexIndex"; - - //Actually decode a vertex attribute from a raw byte stream - OS << - "#define VTX_FMT_SNORM16 0\n" - "#define VTX_FMT_FLOAT32 1\n" - "#define VTX_FMT_FLOAT16 2\n" - "#define VTX_FMT_UNORM8 3\n" - "#define VTX_FMT_SINT16 4\n" - "#define VTX_FMT_COMP32 5\n" - "#define VTX_FMT_UINT8 6\n\n"; - - // For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?) - // Note: Tested on Mesa iris with HD 530 and compilant path works fine, may be a bug on Windows proprietary drivers - if (!glsl4_compliant) - { - OS << - "void mov(inout vec4 vector, const in int index, const in float scalar)\n" - "{\n" - " switch(index)\n" - " {\n" - " case 0: vector.x = scalar; return;\n" - " case 1: vector.y = scalar; return;\n" - " case 2: vector.z = scalar; return;\n" - " case 3: vector.w = scalar; return;\n" - " }\n" - "}\n\n" - - "uint ref(const in uvec4 vector, const in int index)\n" - "{\n" - " switch(index)\n" - " {\n" - " case 0: return vector.x;\n" - " case 1: return vector.y;\n" - " case 2: return vector.z;\n" - " case 3: return vector.w;\n" - " }\n" - "}\n\n"; - } - else - { - OS << - "#define mov(v, i, s) v[i] = s\n" - "#define ref(v, i) v[i]\n\n"; - } - - OS << - "struct attribute_desc\n" - "{\n" - " uint type;\n" - " uint attribute_size;\n" - " uint starting_offset;\n" - " uint stride;\n" - " uint frequency;\n" - " bool swap_bytes;\n" - " bool is_volatile;\n" - " bool modulo;\n" - "};\n\n" - - "uint gen_bits(const in uint x, const in uint y, const in uint z, const in uint w, const in bool swap)\n" - "{\n" - " return (swap) ?\n" - " _set_bits(_set_bits(_set_bits(w, z, 8, 8), y, 16, 8), x, 24, 8) :\n" - " _set_bits(_set_bits(_set_bits(x, y, 8, 8), z, 16, 8), w, 24, 8);\n" - "}\n\n" - - "uint gen_bits(const in uint x, const in uint y, const in bool swap)\n" - "{\n" - " return (swap)? _set_bits(y, x, 8, 8) : _set_bits(x, y, 8, 8);\n" - "}\n\n" - - // NOTE: (int(n) or int(n)) is broken on some NVIDIA and INTEL hardware when the sign bit is involved. - // See https://github.com/RPCS3/rpcs3/issues/8990 - "vec4 sext(const in ivec4 bits)\n" - "{\n" - " // convert raw 16 bit values into signed 32-bit float4 counterpart\n" - " bvec4 sign_check = lessThan(bits, ivec4(0x8000));\n" - " return _select(bits - 65536, bits, sign_check);\n" - "}\n\n" - - "float sext(const in int bits)\n" - "{\n" - " return (bits < 0x8000) ? float(bits) : float(bits - 65536); \n" - "}\n\n" - - "vec4 fetch_attribute(const in attribute_desc desc, const in int vertex_id, usamplerBuffer input_stream)\n" - "{\n" - " const int elem_size_table[] = { 2, 4, 2, 1, 2, 4, 1 };\n" - " const float scaling_table[] = { 32768., 1., 1., 255., 1., 32767., 1. };\n" - " const int elem_size = elem_size_table[desc.type];\n" - " const vec4 scale = scaling_table[desc.type].xxxx;\n\n" - - " uvec4 tmp, result = uvec4(0u);\n" - " vec4 ret;\n" - " int n, i = int((vertex_id * desc.stride) + desc.starting_offset);\n\n" - - " for (n = 0; n < desc.attribute_size; n++)\n" - " {\n" - " tmp.x = texelFetch(input_stream, i++).x;\n" - " if (elem_size == 2)\n" - " {\n" - " tmp.y = texelFetch(input_stream, i++).x;\n" - " tmp.x = gen_bits(tmp.x, tmp.y, desc.swap_bytes);\n" - " }\n" - " else if (elem_size == 4)\n" - " {\n" - " tmp.y = texelFetch(input_stream, i++).x;\n" - " tmp.z = texelFetch(input_stream, i++).x;\n" - " tmp.w = texelFetch(input_stream, i++).x;\n" - " tmp.x = gen_bits(tmp.x, tmp.y, tmp.z, tmp.w, desc.swap_bytes);\n" - " }\n\n" - - " mov(result, n, tmp.x);\n" - " }\n\n" - - " // Actual decoding step is done in vector space, outside the loop\n" - " if (desc.type == VTX_FMT_SNORM16 || desc.type == VTX_FMT_SINT16)\n" - " {\n" - " ret = sext(ivec4(result));\n" - " }\n" - " else if (desc.type == VTX_FMT_FLOAT32)\n" - " {\n" - " ret = uintBitsToFloat(result);\n" - " }\n" - " else if (desc.type == VTX_FMT_FLOAT16)\n" - " {\n" - " tmp.x = _set_bits(result.x, result.y, 16, 16);\n" - " tmp.y = _set_bits(result.z, result.w, 16, 16);\n" - " ret.xy = unpackHalf2x16(tmp.x);\n" - " ret.zw = unpackHalf2x16(tmp.y);\n" - " }\n" - " else if (desc.type == VTX_FMT_UINT8 || desc.type == VTX_FMT_UNORM8)\n" - " {\n" - " ret = vec4(desc.swap_bytes? result.wzyx : result);\n" - " }\n" - " else //if (desc.type == VTX_FMT_COMP32)\n" - " {\n" - " result = uvec4(_get_bits(result.x, 0, 11),\n" - " _get_bits(result.x, 11, 11),\n" - " _get_bits(result.x, 22, 10),\n" - " uint(scale.x));\n" - " ret = sext(ivec4(result) << ivec4(5, 5, 6, 0));\n" - " }\n\n" - - " if (desc.attribute_size < 4)\n" - " {\n" - " ret.w = scale.x;\n" - " }\n\n" - - " return ret / scale; \n" - "}\n\n" - - "attribute_desc fetch_desc(const in int location)\n" - "{\n" - " // Each descriptor is 64 bits wide\n" - " // [0-8] attribute stride\n" - " // [8-24] attribute divisor\n" - " // [24-27] attribute type\n" - " // [27-30] attribute size\n" - " // [30-31] reserved\n" - " // [32-60] starting offset\n" - " // [60-61] swap bytes flag\n" - " // [61-62] volatile flag\n" - " // [62-63] modulo enable flag\n\n"; - - if (rules == glsl_rules_opengl4) - { - // Data is packed into a ubo - OS << - " int block = (location >> 1);\n" - " int sub_block = (location & 1) << 1;\n" - " uvec2 attrib = uvec2(\n" - " ref(input_attributes_blob[block], sub_block + 0),\n" - " ref(input_attributes_blob[block], sub_block + 1));\n\n"; - } - else - { - // Fetch parameters streamed separately from draw parameters - OS << - " uvec2 attrib = texelFetch(vertex_layout_stream, location + int(layout_ptr_offset)).xy;\n\n"; - } - - OS << - " attribute_desc result;\n" - " result.stride = _get_bits(attrib.x, 0, 8);\n" - " result.frequency = _get_bits(attrib.x, 8, 16);\n" - " result.type = _get_bits(attrib.x, 24, 3);\n" - " result.attribute_size = _get_bits(attrib.x, 27, 3);\n" - " result.starting_offset = _get_bits(attrib.y, 0, 29);\n" - " result.swap_bytes = _test_bit(attrib.y, 29);\n" - " result.is_volatile = _test_bit(attrib.y, 30);\n" - " result.modulo = _test_bit(attrib.y, 31);\n" - " return result;\n" - "}\n\n" - - "vec4 read_location(const in int location)\n" - "{\n" - " attribute_desc desc = fetch_desc(location);\n" - " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n" - " if (desc.frequency == 0)\n" - " {\n" - " vertex_id = 0;\n" - " }\n" - " else if (desc.modulo)\n" - " {\n" - " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" - " vertex_id = (" << vertex_id_name << " + int(vertex_index_offset)) % int(desc.frequency);\n" - " }\n" - " else\n" - " {\n" - " vertex_id /= int(desc.frequency); \n" - " }\n\n" - - " if (desc.is_volatile)\n" - " return fetch_attribute(desc, vertex_id, volatile_input_stream);\n" - " else\n" - " return fetch_attribute(desc, vertex_id, persistent_input_stream);\n" - "}\n\n"; - } - - static void insert_rop_init(std::ostream& OS) - { - OS << - " if (_test_bit(rop_control, 9))\n" - " {\n" - " // Convert x,y to linear address\n" - " const ivec2 stipple_coord = ivec2(gl_FragCoord.xy) % ivec2(32, 32);\n" - " const int address = stipple_coord.y * 32 + stipple_coord.x;\n" - " const int bit_offset = (address & 31);\n" - " const int word_index = _get_bits(address, 7, 3);\n" - " const int sub_index = _get_bits(address, 5, 2);\n\n" - - " if (_test_bit(stipple_pattern[word_index][sub_index], bit_offset))\n" - " {\n" - " _kill();\n" - " }\n" - " }\n\n"; - } - - static void insert_rop(std::ostream& OS, const shader_properties& props) - { - const std::string reg0 = props.fp32_outputs ? "r0" : "h0"; - const std::string reg1 = props.fp32_outputs ? "r2" : "h4"; - const std::string reg2 = props.fp32_outputs ? "r3" : "h6"; - const std::string reg3 = props.fp32_outputs ? "r4" : "h8"; - - //TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here - if (props.disable_early_discard) - { - OS << - " if (_fragment_discard)\n" - " {\n" - " discard;\n" - " }\n" - " else if (_get_bits(rop_control, 0, 8) != 0)\n"; - } - else - { - OS << " if (_get_bits(rop_control, 0, 8) != 0)\n"; - } - - OS << - " {\n" - " const bool alpha_test = _test_bit(rop_control, 0);\n" - " const uint alpha_func = _get_bits(rop_control, 16, 3);\n"; - - if (!props.fp32_outputs) - { - OS << " const bool srgb_convert = _test_bit(rop_control, 1);\n\n"; - } - - if (props.emulate_coverage_tests) - { - OS << " const bool a2c_enabled = _test_bit(rop_control, 4);\n"; - } - - OS << - " if (alpha_test && !comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func))\n" - " {\n" - " discard;\n" - " }\n"; - - if (props.emulate_coverage_tests) - { - OS << - " else if (a2c_enabled && !coverage_test_passes(" << reg0 << ", rop_control >> 5))\n" - " {\n" - " discard;\n" - " }\n"; - } - - if (!props.fp32_outputs) - { - // Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags - if (props.supports_native_fp16) - { - OS << - " else if (srgb_convert)\n" - " {\n" - " " << reg0 << ".rgb = clamp16(linear_to_srgb(" << reg0 << ")).rgb;\n" - " " << reg1 << ".rgb = clamp16(linear_to_srgb(" << reg1 << ")).rgb;\n" - " " << reg2 << ".rgb = clamp16(linear_to_srgb(" << reg2 << ")).rgb;\n" - " " << reg3 << ".rgb = clamp16(linear_to_srgb(" << reg3 << ")).rgb;\n" - " }\n"; - } - else - { - OS << - " else if (srgb_convert)\n" - " {\n" - " " << reg0 << ".rgb = linear_to_srgb(" << reg0 << ").rgb;\n" - " " << reg1 << ".rgb = linear_to_srgb(" << reg1 << ").rgb;\n" - " " << reg2 << ".rgb = linear_to_srgb(" << reg2 << ").rgb;\n" - " " << reg3 << ".rgb = linear_to_srgb(" << reg3 << ").rgb;\n" - " }\n"; - } - } - - OS << - " }\n\n" - - " ocol0 = " << reg0 << ";\n" - " ocol1 = " << reg1 << ";\n" - " ocol2 = " << reg2 << ";\n" - " ocol3 = " << reg3 << ";\n\n"; - } - - static void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props) - { - OS << "#define _select mix\n"; - OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; - OS << "#define _get_bits(x, off, count) bitfieldExtract(x, off, count)\n"; - OS << "#define _set_bits(x, y, off, count) bitfieldInsert(x, y, off, count)\n"; - OS << "#define _test_bit(x, y) (_get_bits(x, y, 1) != 0)\n"; - OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; - - if (props.domain == glsl::program_domain::glsl_fragment_program) - { - OS << "// Workaround for broken early discard in some drivers\n"; - - if (props.disable_early_discard) - { - OS << "bool _fragment_discard = false;\n"; - OS << "#define _kill() _fragment_discard = true\n\n"; - } - else - { - OS << "#define _kill() discard\n\n"; - } - } - - if (props.require_lit_emulation) - { - OS << - "vec4 lit_legacy(const in vec4 val)" - "{\n" - " vec4 clamped_val = val;\n" - " clamped_val.x = max(val.x, 0.);\n" - " clamped_val.y = max(val.y, 0.);\n" - " vec4 result;\n" - " result.x = 1.;\n" - " result.w = 1.;\n" - " result.y = clamped_val.x;\n" - " result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.;\n" - " return result;\n" - "}\n\n"; - } - - if (props.domain == glsl::program_domain::glsl_vertex_program && props.emulate_zclip_transform) - { - if (props.emulate_depth_clip_only) - { - // Declare rcp_precise. Requires f64 support in the drivers. - // This is required to handle precision drift during division for extended depth range. - OS << - "double rcp_precise(double x)\n" - "{\n" - " double scaled = x * 0.0009765625;\n" - " double inv = 1.0 / scaled;\n" - " return inv * 0.0009765625;\n" - "}\n" - "\n" - // Technically the depth value here is the 'final' depth that should be stored in the Z buffer. - // Forward mapping eqn is d' = d * (f - n) + n, where d' is the stored Z value (this) and d is the normalized API value. - "vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n" - "{\n" - " if (far_plane != 0.0)\n" - " {\n" - " double z_range = (far_plane > near_plane)? (far_plane - near_plane) : far_plane;\n" - " double inv_range = rcp_precise(z_range);\n" - " float d = float(pos.z * rcp_precise(pos.w));\n" - " float new_d = (d - near_plane) * float(inv_range);\n" - " return vec4(pos.x, pos.y, (new_d * pos.w), pos.w);\n" - " }\n" - " else\n" - " {\n" - " return pos;\n" // Only values where Z=0 can ever pass this clip - " }\n" - "}\n\n"; - } - else - { - OS << - "vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane)\n" - "{\n" - " float d = float(pos.z / pos.w);\n" - " if (d < 0.f && d >= near_plane)\n" - " {\n" - " // Clamp\n" - " d = 0.f;\n" - " }\n" - " else if (d > 1.f && d <= far_plane)\n" - " {\n" - " // Compress Z and store towards highest end of the range\n" - " d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane)));\n" - " }\n" - " else\n" - " {\n" - " return pos;\n" - " }\n" - "\n" - " return vec4(pos.x, pos.y, d * pos.w, pos.w);\n" - "}\n\n"; - } - - return; - } - - program_common::insert_compare_op(OS, props.low_precision_tests); - - if (props.emulate_coverage_tests) - { - // Purely stochastic - OS << - "bool coverage_test_passes(const in vec4 _sample, const in uint control)\n" - "{\n" - " if (!_test_bit(control, 0)) return false;\n" - "\n" - " float random = _rand(gl_FragCoord);\n" - " return (_sample.a > random);\n" - "}\n\n"; - } - - if (!props.fp32_outputs) - { - OS << - "vec4 linear_to_srgb(const in vec4 cl)\n" - "{\n" - " vec4 low = cl * 12.92;\n" - " vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055;\n" - " bvec4 select = lessThan(cl, vec4(0.0031308));\n" - " return clamp(mix(high, low, select), 0., 1.);\n" - "}\n\n"; - } - - if (props.require_depth_conversion) - { - //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) - //The A component (Z) is useless (should contain stencil8 or just 1) - OS << - "vec4 decode_depth24(const in float depth_value, const in uint depth_float)\n" - "{\n" - " uint value;\n" - " if (depth_float == 0)\n" - " value = uint(depth_value * 16777215.);\n" - " else\n" - " value = _get_bits(floatBitsToUint(depth_value), 7, 24);\n" - "\n" - " uint b = _get_bits(value, 0, 8);\n" - " uint g = _get_bits(value, 8, 8);\n" - " uint r = _get_bits(value, 16, 8);\n" - " return vec4(float(g)/255., float(b)/255., 1., float(r)/255.);\n" - "}\n\n" - - "vec4 remap_vector(const in vec4 color, const in uint remap)\n" - "{\n" - " vec4 result;\n" - " if (_get_bits(remap, 0, 8) == 0xE4)\n" - " {\n" - " result = color;\n" - " }\n" - " else\n" - " {\n" - " uvec4 remap_channel = uvec4(remap) >> uvec4(2, 4, 6, 0);\n" - " remap_channel &= 3;\n" - " remap_channel = (remap_channel + 3) % 4; // Map A-R-G-B to R-G-B-A\n\n" - - " // Generate remapped result\n" - " result.a = color[remap_channel.a];\n" - " result.r = color[remap_channel.r];\n" - " result.g = color[remap_channel.g];\n" - " result.b = color[remap_channel.b];\n" - " }\n\n" - - " if (_get_bits(remap, 8, 8) == 0xAA)\n" - " return result;\n\n" - - " uvec4 remap_select = uvec4(remap) >> uvec4(10, 12, 14, 8);\n" - " remap_select &= 3;\n" - " bvec4 choice = lessThan(remap_select, uvec4(2));\n" - " return _select(result, vec4(remap_select), choice);\n" - "}\n\n" - - "vec4 texture2DReconstruct(sampler2D tex, usampler2D stencil_tex, const in vec2 coord, const in uint remap)\n" - "{\n" - " vec4 result = decode_depth24(texture(tex, coord.xy).r, remap >> 16);\n" - " result.z = float(texture(stencil_tex, coord.xy).x) / 255.f;\n\n" - - " if (remap == 0xAAE4)\n" - " return result;\n\n" - - " return remap_vector(result, remap);\n" - "}\n\n"; - } - - if (props.require_texture_ops) - { - OS << - -#ifdef __APPLE__ - "vec4 remap_vector(const in vec4 rgba, const in uint remap_bits)\n" - "{\n" - " uvec4 selector = (uvec4(remap_bits) >> uvec4(3, 6, 9, 0)) & 0x7;\n" - " bvec4 choice = greaterThan(selector, uvec4(1));\n" - "\n" - " vec4 direct = vec4(selector);\n" - " selector = min(selector - 2, selector);\n" - " vec4 indexed = vec4(rgba[selector.r], rgba[selector.g], rgba[selector.b], rgba[selector.a]);\n" - " return mix(direct, indexed, choice);\n" - "}\n\n" -#endif - "vec4 srgb_to_linear(const in vec4 cs)\n" - "{\n" - " vec4 a = cs / 12.92;\n" - " vec4 b = pow((cs + 0.055) / 1.055, vec4(2.4));\n" - " return _select(a, b, greaterThan(cs, vec4(0.04045)));\n" - "}\n\n" - - //TODO: Move all the texture read control operations here - "vec4 process_texel(in vec4 rgba, const in uint control_bits)\n" - "{\n" -#ifdef __APPLE__ - " uint remap_bits = (control_bits >> 16) & 0xFFFF;\n" - " if (remap_bits != 0x8D5) rgba = remap_vector(rgba, remap_bits);\n\n" -#endif - " if (control_bits == 0)\n" - " {\n" - " return rgba;\n" - " }\n" - "\n" - " if (_test_bit(control_bits, 4))\n" - " {\n" - " // Alphakill\n" - " if (rgba.a < 0.000001)\n" - " {\n" - " _kill();\n" - " return rgba;\n" - " }\n" - " }\n" - "\n" - " if (_test_bit(control_bits, 5))\n" - " {\n" - " // Renormalize to 8-bit (PS3) accuracy\n" - " rgba = floor(rgba * 255.);\n" - " rgba /= 255.;" - " }\n" - "\n" - " uvec4 mask;\n" - " vec4 convert;\n" - " uint op_mask = control_bits & 0x3C0u;\n" - "\n" - " if (op_mask != 0)\n" - " {\n" - " // Expand to signed normalized\n" - " mask = uvec4(op_mask) & uvec4(0x80, 0x100, 0x200, 0x40);\n" - " convert = (rgba * 2.f - 1.f);\n" - " rgba = _select(rgba, convert, notEqual(mask, uvec4(0)));\n" - " }\n" - "\n" - " op_mask = control_bits & 0xFu;\n" - " if (op_mask != 0u)\n" - " {\n" - " // Gamma correction\n" - " mask = uvec4(op_mask) & uvec4(0x1, 0x2, 0x4, 0x8);\n" - " convert = srgb_to_linear(rgba);\n" - " return _select(rgba, convert, notEqual(mask, uvec4(0)));\n" - " }\n" - "\n" - " return rgba;\n" - "}\n\n"; - - if (props.require_texture_expand) - { - OS << - "uint _texture_flag_override = 0;\n" - "#define _enable_texture_expand() _texture_flag_override = 0x3C0\n" - "#define _disable_texture_expand() _texture_flag_override = 0\n" - "#define TEX_FLAGS(index) (texture_parameters[index].flags | _texture_flag_override)\n"; - } - else - { - OS << - "#define TEX_FLAGS(index) texture_parameters[index].flags\n"; - } - - OS << - "#define TEX_NAME(index) tex##index\n" - "#define TEX_NAME_STENCIL(index) tex##index##_stencil\n\n" - - "#define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x), TEX_FLAGS(index))\n" - "#define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, bias), TEX_FLAGS(index))\n" - "#define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, lod), TEX_FLAGS(index))\n" - "#define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord1 * texture_parameters[index].scale.x, dpdx, dpdy), TEX_FLAGS(index))\n" - "#define TEX1D_PROJ(index, coord2) process_texel(textureProj(TEX_NAME(index), coord2 * vec2(texture_parameters[index].scale.x, 1.)), TEX_FLAGS(index))\n" - - "#define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale), TEX_FLAGS(index))\n" - "#define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), coord2 * texture_parameters[index].scale, bias), TEX_FLAGS(index))\n" - "#define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), coord2 * texture_parameters[index].scale, lod), TEX_FLAGS(index))\n" - "#define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord2 * texture_parameters[index].scale, dpdx, dpdy), TEX_FLAGS(index))\n" - "#define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.)), TEX_FLAGS(index))\n" - - "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].scale, texture_parameters[index].remap), TEX_FLAGS(index))\n"; - - if (props.emulate_shadow_compare) - { - OS << - "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), vec3(coord3.xy * texture_parameters[index].scale, min(float(coord3.z), 1.)))\n" - "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), vec4(coord4.xy, min(coord4.z, coord4.w), coord4.w) * vec4(texture_parameters[index].scale, 1., 1.))\n"; - } - else - { - OS << - "#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), coord3 * vec3(texture_parameters[index].scale, 1.))\n" - "#define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), coord4 * vec4(texture_parameters[index].scale, 1., 1.))\n"; - } - - OS << - "#define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), coord3), TEX_FLAGS(index))\n" - "#define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), coord3, bias), TEX_FLAGS(index))\n" - "#define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), coord3, lod), TEX_FLAGS(index))\n" - "#define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), coord3, dpdx, dpdy), TEX_FLAGS(index))\n" - "#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), TEX_FLAGS(index))\n\n"; - } - - if (props.require_wpos) - { - OS << - "vec4 get_wpos()\n" - "{\n" - " float abs_scale = abs(wpos_scale);\n" - " return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.);\n" - "}\n\n"; - } - } - - static void insert_fog_declaration(std::ostream& OS) - { - program_common::insert_fog_declaration(OS, "vec4", "fog_c"); - } - - static std::string getFunctionImpl(FUNCTION f) - { - switch (f) - { - default: - abort(); - case FUNCTION::FUNCTION_DP2: - return "$Ty(dot($0.xy, $1.xy))"; - case FUNCTION::FUNCTION_DP2A: - return "$Ty(dot($0.xy, $1.xy) + $2.x)"; - case FUNCTION::FUNCTION_DP3: - return "$Ty(dot($0.xyz, $1.xyz))"; - case FUNCTION::FUNCTION_DP4: - return "$Ty(dot($0, $1))"; - case FUNCTION::FUNCTION_DPH: - return "$Ty(dot(vec4($0.xyz, 1.0), $1))"; - case FUNCTION::FUNCTION_SFL: - return "$Ty(0.)"; - case FUNCTION::FUNCTION_STR: - return "$Ty(1.)"; - case FUNCTION::FUNCTION_FRACT: - return "fract($0)"; - case FUNCTION::FUNCTION_REFL: - return "reflect($0, $1)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D: - return "TEX1D($_i, $0.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_BIAS: - return "TEX1D_BIAS($_i, $0.x, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_PROJ: - return "TEX1D_PROJ($_i, $0.xy)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_LOD: - return "TEX1D_LOD($_i, $0.x, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE1D_GRAD: - return "TEX1D_GRAD($_i, $0.x, $1.x, $2.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D: - return "TEX2D($_i, $0.xy)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_BIAS: - return "TEX2D_BIAS($_i, $0.xy, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_PROJ: - return "TEX2D_PROJ($_i, $0)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_LOD: - return "TEX2D_LOD($_i, $0.xy, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_GRAD: - return "TEX2D_GRAD($_i, $0.xy, $1.xy, $2.xy)"; - case FUNCTION::FUNCTION_TEXTURE_SHADOW2D: - return "TEX2D_SHADOW($_i, $0.xyz)"; - case FUNCTION::FUNCTION_TEXTURE_SHADOW2D_PROJ: - return "TEX2D_SHADOWPROJ($_i, $0)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE: - return "TEX3D($_i, $0.xyz)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_BIAS: - return "TEX3D_BIAS($_i, $0.xyz, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_PROJ: - return "TEX3D($_i, ($0.xyz / $0.w))"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_LOD: - return "TEX3D_LOD($_i, $0.xyz, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLECUBE_GRAD: - return "TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D: - return "TEX3D($_i, $0.xyz)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_BIAS: - return "TEX3D_BIAS($_i, $0.xyz, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_PROJ: - return "TEX3D_PROJ($_i, $0)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_LOD: - return "TEX3D_LOD($_i, $0.xyz, $1.x)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE3D_GRAD: - return "TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz)"; - case FUNCTION::FUNCTION_DFDX: - return "dFdx($0)"; - case FUNCTION::FUNCTION_DFDY: - return "dFdy($0)"; - case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH1D: - return "textureLod($t, $0.x, 0)"; - case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH2D: - return "textureLod($t, $0.xy, 0)"; - case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCH3D: - case FUNCTION::FUNCTION_VERTEX_TEXTURE_FETCHCUBE: - return "textureLod($t, $0.xyz, 0)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_DEPTH_RGBA: - return "TEX2D_DEPTH_RGBA8($_i, $0.xy)"; - case FUNCTION::FUNCTION_TEXTURE_SAMPLE2D_DEPTH_RGBA_PROJ: - return "TEX2D_DEPTH_RGBA8($_i, ($0.xy / $0.w))"; - } - } - - static void insert_subheader_block(std::ostream& OS) - { - // Global types and stuff - // Must be compatible with std140 packing rules - OS << - "struct sampler_info\n" - "{\n" - " vec2 scale;\n" - " uint remap;\n" - " uint flags;\n" - "};\n" - "\n"; - } + std::string getFloatTypeNameImpl(usz elementCount); + std::string getHalfTypeNameImpl(usz elementCount); + std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1, bool scalar = false); + void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules, bool glsl4_compliant=true); + void insert_rop_init(std::ostream& OS); + void insert_rop(std::ostream& OS, const shader_properties& props); + void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props); + void insert_fog_declaration(std::ostream& OS); + std::string getFunctionImpl(FUNCTION f); + void insert_subheader_block(std::ostream& OS); } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index f98cc52bed..02c257ed66 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -4,6 +4,7 @@ #include "Emu/Cell/PPUCallback.h" #include "Common/BufferUtils.h" +#include "Common/GLSLCommon.h" #include "Common/texture_cache.h" #include "Common/surface_store.h" #include "Capture/rsx_capture.h" @@ -1853,7 +1854,7 @@ namespace rsx if (tex.alpha_kill_enabled()) { //alphakill can be ignored unless a valid comparison function is set - texture_control |= (1 << 4); + texture_control |= (1 << texture_control_bits::ALPHAKILL); } const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); @@ -1865,37 +1866,34 @@ namespace rsx if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR) { - switch (format) + switch (sampler_descriptors[i]->format_class) { - case CELL_GCM_TEXTURE_X16: - { - // A simple way to quickly read DEPTH16 data without shadow comparison + case RSX_FORMAT_CLASS_DEPTH16_FLOAT: + case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32: + texture_control |= (1 << texture_control_bits::DEPTH_FLOAT); + break; + default: break; } + + switch (format) + { case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_D8R8G8B8: { - // Reading depth data as XRGB8 is supported with in-shader conversion - // TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that) - u32 control_bits = sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32? (1u << 16) : 0u; - control_bits |= tex.remap() & 0xFFFF; + // Emulate bitcast in shader current_fragment_program.redirected_textures |= (1 << i); - current_fragment_program.texture_scale[i][2] = std::bit_cast(control_bits); + const auto float_en = (sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)? 1 : 0; + texture_control |= (float_en << texture_control_bits::DEPTH_FLOAT); break; } + case CELL_GCM_TEXTURE_X16: // A simple way to quickly read DEPTH16 data without shadow comparison case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: { - const auto compare_mode = tex.zfunc(); - if (!tex.alpha_kill_enabled() && - compare_mode < rsx::comparison_function::always && - compare_mode > rsx::comparison_function::never) - { - current_fragment_program.shadow_textures |= (1 << i); - texture_control |= u32(tex.zfunc()) << 8; - } + // Supported formats, nothing to do break; } default: @@ -1912,7 +1910,7 @@ namespace rsx case CELL_GCM_TEXTURE_R5G5B5A1: case CELL_GCM_TEXTURE_R5G6B5: case CELL_GCM_TEXTURE_R6G5B5: - texture_control |= (1 << 5); + texture_control |= (1 << texture_control_bits::RENORMALIZE); break; default: break; @@ -1932,14 +1930,14 @@ namespace rsx const auto remap_ctrl = (tex.remap() >> 8) & 0xAA; if (remap_ctrl == 0xAA) { - argb8_convert |= (sign_convert & 0xFu) << 6; + argb8_convert |= (sign_convert & 0xFu) << texture_control_bits::EXPAND_OFFSET; } else { - if (remap_ctrl & 0x03) argb8_convert |= (sign_convert & 0x1u) << 6; - if (remap_ctrl & 0x0C) argb8_convert |= (sign_convert & 0x2u) << 6; - if (remap_ctrl & 0x30) argb8_convert |= (sign_convert & 0x4u) << 6; - if (remap_ctrl & 0xC0) argb8_convert |= (sign_convert & 0x8u) << 6; + if (remap_ctrl & 0x03) argb8_convert |= (sign_convert & 0x1u) << texture_control_bits::EXPAND_OFFSET; + if (remap_ctrl & 0x0C) argb8_convert |= (sign_convert & 0x2u) << texture_control_bits::EXPAND_OFFSET; + if (remap_ctrl & 0x30) argb8_convert |= (sign_convert & 0x4u) << texture_control_bits::EXPAND_OFFSET; + if (remap_ctrl & 0xC0) argb8_convert |= (sign_convert & 0x8u) << texture_control_bits::EXPAND_OFFSET; } } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 64b3a803f1..ff0a3243a6 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -345,6 +345,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 5f768d7cd3..85234f20a7 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -206,6 +206,9 @@ Emu\GPU\RSX\Common + + Emu\GPU\RSX\Common + Emu\GPU\RSX\Null