diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 0c907d1065..05754ca4a0 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -16,7 +16,6 @@ FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &p m_size = 0; } - void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask) { if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) return; @@ -45,6 +44,20 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask) code = "((" + code + "- 0.5) * 2.)"; } + if (dst.fp16 && device_props.has_native_half_support) + { + // Cast to native data type + if (dst.opcode == RSX_FP_OPCODE_NRM) + { + // Returns a 3-component vector as the result + code = ClampValue(code + ".xyzz", 1); + } + else + { + code = ClampValue(code, 1); + } + } + if (dst.saturate) { code = saturate(code); @@ -67,16 +80,20 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask) case RSX_FP_OPCODE_LG2: break; case RSX_FP_OPCODE_MOV: - //NOTE: Sometimes varying inputs from VS are out of range so do not exempt any input types, unless fp16 (Naruto UNS) + // NOTE: Sometimes varying inputs from VS are out of range so do not exempt any input types, unless fp16 (Naruto UNS) if (dst.fp16 && src0.fp16 && src0.reg_type == RSX_FP_REGISTER_TYPE_TEMP) break; default: { - //fp16 precsion flag on f32 register; ignore + // fp16 precsion flag on f32 register; ignore if (dst.prec == 1 && !dst.fp16) break; - //clamp value to allowed range + // Native type already has fp16 clamped (input must have been cast) + if (dst.prec == 1 && dst.fp16 && device_props.has_native_half_support) + break; + + // clamp value to allowed range code = ClampValue(code, dst.prec); break; } @@ -100,7 +117,7 @@ void FragmentProgramDecompiler::SetDst(std::string code, bool append_mask) return; } - std::string dest = AddReg(dst.dest_reg, dst.fp16) + "$m"; + std::string dest = AddReg(dst.dest_reg, !!dst.fp16) + "$m"; AddCodeCond(Format(dest), code); //AddCode("$ifcond " + dest + code + (append_mask ? "$m;" : ";")); @@ -159,15 +176,20 @@ std::string FragmentProgramDecompiler::GetMask() return ret.empty() || strncmp(ret.c_str(), dst_mask, 4) == 0 ? "" : ("." + ret); } -std::string FragmentProgramDecompiler::AddReg(u32 index, int fp16) +std::string FragmentProgramDecompiler::AddReg(u32 index, bool fp16) { - return m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), std::string(fp16 ? "h" : "r") + std::to_string(index), getFloatTypeName(4) + "(0., 0., 0., 0.)"); + const std::string type_name = (fp16 && device_props.has_native_half_support)? getHalfTypeName(4) : getFloatTypeName(4); + const std::string reg_name = std::string(fp16 ? "h" : "r") + std::to_string(index); + + return m_parr.AddParam(PF_PARAM_NONE, type_name, reg_name, type_name + "(0., 0., 0., 0.)"); } -bool FragmentProgramDecompiler::HasReg(u32 index, int fp16) +bool FragmentProgramDecompiler::HasReg(u32 index, bool fp16) { - return m_parr.HasParam(PF_PARAM_NONE, getFloatTypeName(4), - std::string(fp16 ? "h" : "r") + std::to_string(index)); + const std::string type_name = (fp16 && device_props.has_native_half_support)? getHalfTypeName(4) : getFloatTypeName(4); + const std::string reg_name = std::string(fp16 ? "h" : "r") + std::to_string(index); + + return m_parr.HasParam(PF_PARAM_NONE, type_name, reg_name); } std::string FragmentProgramDecompiler::AddCond() @@ -177,22 +199,23 @@ std::string FragmentProgramDecompiler::AddCond() std::string FragmentProgramDecompiler::AddConst() { - std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); - if (m_parr.HasParam(PF_PARAM_UNIFORM, getFloatTypeName(4), name)) + const std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); + const std::string type = getFloatTypeName(4); + + if (m_parr.HasParam(PF_PARAM_UNIFORM, type, name)) { return name; } auto data = (be_t*) ((char*)m_prog.addr + m_size + 4 * u32{sizeof(u32)}); - m_offset = 2 * 4 * sizeof(u32); u32 x = GetData(data[0]); u32 y = GetData(data[1]); u32 z = GetData(data[2]); u32 w = GetData(data[3]); - return m_parr.AddParam(PF_PARAM_UNIFORM, getFloatTypeName(4), name, - std::string(getFloatTypeName(4) + "(") + std::to_string((float&)x) + ", " + std::to_string((float&)y) - + ", " + std::to_string((float&)z) + ", " + std::to_string((float&)w) + ")"); + + const auto var = fmt::format("%s(%f, %f, %f, %f)", type, (f32&)x, (f32&)y, (f32&)z, (f32&)w); + return m_parr.AddParam(PF_PARAM_UNIFORM, type, name, var); } std::string FragmentProgramDecompiler::AddTex() @@ -240,9 +263,9 @@ std::string FragmentProgramDecompiler::NotZeroPositive(const std::string& code) std::string FragmentProgramDecompiler::ClampValue(const std::string& code, u32 precision) { - //FP16 is expected to overflow a lot easier at 0+-65504 - //FP32 can still work up to 0+-3.4E38 - //See http://http.download.nvidia.com/developer/Papers/2005/FP_Specials/FP_Specials.pdf + // FP16 is expected to overflow a lot easier at 0+-65504 + // FP32 can still work up to 0+-3.4E38 + // See http://http.download.nvidia.com/developer/Papers/2005/FP_Specials/FP_Specials.pdf switch (precision) { @@ -405,7 +428,7 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) } } - ret += AddReg(src.tmp_reg_index, src.fp16); + ret += AddReg(src.tmp_reg_index, !!src.fp16); break; case RSX_FP_REGISTER_TYPE_INPUT: @@ -472,7 +495,7 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) if (strncmp(swizzle.c_str(), f, 4) != 0) ret += "." + swizzle; - //Warning: Modifier order matters. e.g neg should be applied after precision clamping (tested with Naruto UNS) + // Warning: Modifier order matters. e.g neg should be applied after precision clamping (tested with Naruto UNS) if (src.abs) ret = "abs(" + ret + ")"; if (apply_precision_modifier) ret = ClampValue(ret, src1.input_prec_mod); if (src.neg) ret = "-" + ret; @@ -485,7 +508,8 @@ std::string FragmentProgramDecompiler::BuildCode() // Shader validation // Shader must at least write to one output for the body to be considered valid - const std::string vec4_type = getFloatTypeName(4); + const bool fp16_out = !(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS); + const std::string vec4_type = (fp16_out && device_props.has_native_half_support)? getHalfTypeName(4) : getFloatTypeName(4); const std::string init_value = vec4_type + "(0., 0., 0., 0.)"; std::array output_register_names; std::array ouput_register_indices = { 0, 2, 3, 4 }; @@ -502,7 +526,7 @@ std::string FragmentProgramDecompiler::BuildCode() // Add the color output registers. They are statically written to and have guaranteed initialization (except r1.z which == wpos.z) // This can be used instead of an explicit clear pass in some games (Motorstorm) - if (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) + if (!fp16_out) { output_register_names = { "r0", "r2", "r3", "r4" }; } @@ -548,22 +572,32 @@ std::string FragmentProgramDecompiler::BuildCode() // Insert global function definitions insertGlobalFunctions(OS); - // Accurate float to half clamping (preserves IEEE-754 NaN) - OS << - "vec4 clamp16(vec4 x)\n" - "{\n" - " bvec4 sel = isnan(x);\n" - " vec4 clamped = clamp(x, -65504., +65504.);\n" - " if (!any(sel))\n" - " {\n" - " return clamped;\n" - " }\n\n" - " return _select(clamped, x, sel);\n" - "}\n\n" + if (!device_props.has_native_half_support) + { + // Accurate float to half clamping (preserves IEEE-754 NaN) + OS << + "vec4 clamp16(vec4 x)\n" + "{\n" + " bvec4 sel = isnan(x);\n" + " vec4 clamped = clamp(x, -65504., +65504.);\n" + " if (!any(sel))\n" + " {\n" + " return clamped;\n" + " }\n\n" + " return _select(clamped, x, sel);\n" + "}\n\n" - "vec3 clamp16(vec3 x){ return clamp16(x.xyzz).xyz; }\n" - "vec2 clamp16(vec2 x){ return clamp16(x.xyxy).xy; }\n" - "float clamp16(float x){ return isnan(x)? x : clamp(x, -65504., +65504.); }\n"; + "vec3 clamp16(vec3 x){ return clamp16(x.xyzz).xyz; }\n" + "vec2 clamp16(vec2 x){ return clamp16(x.xyxy).xy; }\n" + "float clamp16(float x){ return isnan(x)? x : clamp(x, -65504., +65504.); }\n\n"; + } + else + { + // Define raw casts from f32->f16 + const std::string half4 = getHalfTypeName(4); + OS << "#define clamp16(x) " << half4 << "(x)\n\n"; + } + // Declare register gather/merge if needed if (properties.has_gather_op) @@ -595,7 +629,7 @@ std::string FragmentProgramDecompiler::BuildCode() return OS.str(); } -bool FragmentProgramDecompiler::handle_sct(u32 opcode) +bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode) { switch (opcode) { @@ -626,24 +660,9 @@ bool FragmentProgramDecompiler::handle_sct(u32 opcode) case RSX_FP_OPCODE_SLT: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SLT, "$0", "$1") + ")"); return true; case RSX_FP_OPCODE_SNE: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SNE, "$0", "$1") + ")"); return true; case RSX_FP_OPCODE_STR: SetDst(getFunction(FUNCTION::FUNCTION_STR)); return true; - } - return false; -} -bool FragmentProgramDecompiler::handle_scb(u32 opcode) -{ - switch (opcode) - { - case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); return true; + // SCB-only ops case RSX_FP_OPCODE_COS: SetDst("cos($0.xxxx)"); return true; - case RSX_FP_OPCODE_DIV: SetDst("($0 / " + NotZero("$1.x") + ")"); return true; - // Note: DIVSQ is not IEEE compliant. sqrt(0, 0) is 0 (Super Puzzle Fighter II Turbo HD Remix). - // sqrt(x, 0) might be equal to some big value (in absolute) whose sign is sign(x) but it has to be proven. - case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt(" + NotZeroPositive("$1.x") + "))"); return true; - case RSX_FP_OPCODE_DP2: SetDst(getFunction(FUNCTION::FUNCTION_DP2)); return true; - case RSX_FP_OPCODE_DP3: SetDst(getFunction(FUNCTION::FUNCTION_DP3)); return true; - case RSX_FP_OPCODE_DP4: SetDst(getFunction(FUNCTION::FUNCTION_DP4)); return true; - case RSX_FP_OPCODE_DP2A: SetDst(getFunction(FUNCTION::FUNCTION_DP2A)); return true; case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); return true; case RSX_FP_OPCODE_REFL: SetDst(getFunction(FUNCTION::FUNCTION_REFL)); return true; case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true; @@ -656,27 +675,14 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode) case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true; case RSX_FP_OPCODE_LRP: SetDst(getFloatTypeName(4) + "($2 * (1 - $0) + $1 * $0)"); return true; case RSX_FP_OPCODE_LG2: SetDst("log2(" + NotZeroPositive("$0.x") + ").xxxx"); return true; - case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); return true; - case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); return true; - case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); return true; - case RSX_FP_OPCODE_MOV: SetDst("$0"); return true; - case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); return true; - //Pack operations. See https://www.khronos.org/registry/OpenGL/extensions/NV/NV_fragment_program.txt + //Pack operations. See https://www.khronos.org/registry/OpenGL/extensions/NV/NV_fragment_program.txt case RSX_FP_OPCODE_PK2: SetDst(getFloatTypeName(4) + "(uintBitsToFloat(packHalf2x16($0.xy)))"); return true; case RSX_FP_OPCODE_PK4: SetDst(getFloatTypeName(4) + "(uintBitsToFloat(packSnorm4x8($0)))"); return true; case RSX_FP_OPCODE_PK16: SetDst(getFloatTypeName(4) + "(uintBitsToFloat(packSnorm2x16($0.xy)))"); return true; case RSX_FP_OPCODE_PKG: - //Should be similar to PKB but with gamma correction, see description of PK4UBG in khronos page + //Should be similar to PKB but with gamma correction, see description of PK4UBG in khronos page case RSX_FP_OPCODE_PKB: SetDst(getFloatTypeName(4) + "(uintBitsToFloat(packUnorm4x8($0)))"); return true; - case RSX_FP_OPCODE_SEQ: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SEQ, "$0", "$1") + ")"); return true; - case RSX_FP_OPCODE_SFL: SetDst(getFunction(FUNCTION::FUNCTION_SFL)); return true; - case RSX_FP_OPCODE_SGE: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SGE, "$0", "$1") + ")"); return true; - case RSX_FP_OPCODE_SGT: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SGT, "$0", "$1") + ")"); return true; case RSX_FP_OPCODE_SIN: SetDst("sin($0.xxxx)"); return true; - case RSX_FP_OPCODE_SLE: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SLE, "$0", "$1") + ")"); return true; - case RSX_FP_OPCODE_SLT: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SLT, "$0", "$1") + ")"); return true; - case RSX_FP_OPCODE_SNE: SetDst(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SNE, "$0", "$1") + ")"); return true; - case RSX_FP_OPCODE_STR: SetDst(getFunction(FUNCTION::FUNCTION_STR)); return true; } return false; } @@ -949,16 +955,14 @@ std::string FragmentProgramDecompiler::Decompile() default: int prev_force_unit = forced_unit; - //Some instructions do not respect forced unit - //Tested with Tales of Vesperia + // Some instructions do not respect forced unit + // Tested with Tales of Vesperia if (SIP()) break; if (handle_tex_srb(opcode)) break; - //FENCT/FENCB do not actually reject instructions if they dont match the forced unit - //Tested with Dark Souls II where the respecting FENCX instruction will result in empty luminance averaging shaders - //TODO: More research is needed to determine what real HW does - if (handle_sct(opcode)) break; - if (handle_scb(opcode)) break; + // FENCT/FENCB do not actually reject instructions if they dont match the forced unit + // Looks like they are optimization hints and not hard-coded forced paths + if (handle_sct_scb(opcode)) break; forced_unit = FORCE_NONE; LOG_ERROR(RSX, "Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h index fd95663851..6afed73702 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.h @@ -3,10 +3,119 @@ #include "Emu/RSX/RSXFragmentProgram.h" #include +// Helper for GPR occupancy tracking +struct temp_register +{ + bool aliased_r0 = false; + bool aliased_h0 = false; + bool aliased_h1 = false; + bool last_write_half[4] = { false, false, false, false }; + + u32 real_index = UINT32_MAX; + + u32 h0_writes = 0u; // Number of writes to the first 64-bits of the register + u32 h1_writes = 0u; // Number of writes to the last 64-bits of the register + + void tag(u32 index, bool half_register, bool x, bool y, bool z, bool w) + { + if (half_register) + { + if (index & 1) + { + if (x) last_write_half[2] = true; + if (y) last_write_half[2] = true; + if (z) last_write_half[3] = true; + if (w) last_write_half[3] = true; + + aliased_h1 = true; + h1_writes++; + } + else + { + if (x) last_write_half[0] = true; + if (y) last_write_half[0] = true; + if (z) last_write_half[1] = true; + if (w) last_write_half[1] = true; + + aliased_h0 = true; + h0_writes++; + } + } + else + { + if (x) last_write_half[0] = false; + if (y) last_write_half[1] = false; + if (z) last_write_half[2] = false; + if (w) last_write_half[3] = false; + + aliased_r0 = true; + + h0_writes++; + h1_writes++; + } + + if (real_index == UINT32_MAX) + { + if (half_register) + real_index = index >> 1; + else + real_index = index; + } + } + + bool requires_gather(u8 channel) const + { + //Data fetched from the single precision register requires merging of the two half registers + verify(HERE), channel < 4; + if (aliased_h0 && channel < 2) + { + return last_write_half[channel]; + } + + if (aliased_h1 && channel > 1) + { + return last_write_half[channel]; + } + + return false; + } + + bool requires_split(u32 /*index*/) const + { + //Data fetched from any of the two half registers requires sync with the full register + if (!(last_write_half[0] || last_write_half[1]) && aliased_r0) + { + //r0 has been written to + //TODO: Check for specific elements in real32 register + return true; + } + + return false; + } + + std::string gather_r() + { + std::string h0 = "h" + std::to_string(real_index << 1); + std::string h1 = "h" + std::to_string(real_index << 1 | 1); + std::string reg = "r" + std::to_string(real_index); + std::string ret = "//Invalid gather"; + + if (aliased_h0 && aliased_h1) + ret = "(gather(" + h0 + ", " + h1 + "))"; + else if (aliased_h0) + ret = "(gather(" + h0 + "), " + reg + ".zw)"; + else if (aliased_h1) + ret = "(" + reg + ".xy, gather(" + h1 + "))"; + + return ret; + } +}; + /** * This class is used to translate RSX Fragment program to GLSL/HLSL code * Backend with text based shader can subclass this class and implement : * - virtual std::string getFloatTypeName(size_t elementCount) = 0; + * - virtual std::string getHalfTypeName(size_t elementCount) = 0; * - virtual std::string getFunction(enum class FUNCTION) = 0; * - virtual std::string saturate(const std::string &code) = 0; * - virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) = 0; @@ -19,113 +128,6 @@ */ class FragmentProgramDecompiler { - struct temp_register - { - bool aliased_r0 = false; - bool aliased_h0 = false; - bool aliased_h1 = false; - bool last_write_half[4] = { false, false, false, false }; - - u32 real_index = UINT32_MAX; - - u32 h0_writes = 0u; // Number of writes to the first 64-bits of the register - u32 h1_writes = 0u; // Number of writes to the last 64-bits of the register - - void tag(u32 index, bool half_register, bool x, bool y, bool z, bool w) - { - if (half_register) - { - if (index & 1) - { - if (x) last_write_half[2] = true; - if (y) last_write_half[2] = true; - if (z) last_write_half[3] = true; - if (w) last_write_half[3] = true; - - aliased_h1 = true; - h1_writes++; - } - else - { - if (x) last_write_half[0] = true; - if (y) last_write_half[0] = true; - if (z) last_write_half[1] = true; - if (w) last_write_half[1] = true; - - aliased_h0 = true; - h0_writes++; - } - } - else - { - if (x) last_write_half[0] = false; - if (y) last_write_half[1] = false; - if (z) last_write_half[2] = false; - if (w) last_write_half[3] = false; - - aliased_r0 = true; - - h0_writes++; - h1_writes++; - } - - if (real_index == UINT32_MAX) - { - if (half_register) - real_index = index >> 1; - else - real_index = index; - } - } - - bool requires_gather(u8 channel) const - { - //Data fetched from the single precision register requires merging of the two half registers - verify(HERE), channel < 4; - if (aliased_h0 && channel < 2) - { - return last_write_half[channel]; - } - - if (aliased_h1 && channel > 1) - { - return last_write_half[channel]; - } - - return false; - } - - bool requires_split(u32 /*index*/) const - { - //Data fetched from any of the two half registers requires sync with the full register - if (!(last_write_half[0] || last_write_half[1]) && aliased_r0) - { - //r0 has been written to - //TODO: Check for specific elements in real32 register - return true; - } - - return false; - } - - std::string gather_r() - { - std::string h0 = "h" + std::to_string(real_index << 1); - std::string h1 = "h" + std::to_string(real_index << 1 | 1); - std::string reg = "r" + std::to_string(real_index); - std::string ret = "//Invalid gather"; - - if (aliased_h0 && aliased_h1) - ret = "(gather(" + h0 + ", " + h1 + "))"; - else if (aliased_h0) - ret = "(gather(" + h0 + "), " + reg + ".zw)"; - else if (aliased_h1) - ret = "(" + reg + ".xy, gather(" + h1 + "))"; - - return ret; - } - }; - OPDEST dst; SRC0 src0; SRC1 src1; @@ -148,8 +150,8 @@ class FragmentProgramDecompiler void SetDst(std::string code, bool append_mask = true); void AddCode(const std::string& code); - std::string AddReg(u32 index, int fp16); - bool HasReg(u32 index, int fp16); + std::string AddReg(u32 index, bool fp16); + bool HasReg(u32 index, bool fp16); std::string AddCond(); std::string AddConst(); std::string AddTex(); @@ -184,18 +186,11 @@ class FragmentProgramDecompiler u32 GetData(const u32 d) const { return d << 16 | d >> 16; } /** - * Emits code if opcode is an SCT one and returns true, + * Emits code if opcode is an SCT/SCB one and returns true, * otherwise do nothing and return false. * NOTE: What does SCT means ??? */ - bool handle_sct(u32 opcode); - - /** - * Emits code if opcode is an SCB one and returns true, - * otherwise do nothing and return false. - * NOTE: What does SCB means ??? - */ - bool handle_scb(u32 opcode); + bool handle_sct_scb(u32 opcode); /** * Emits code if opcode is an TEX SRB one and returns true, @@ -203,6 +198,7 @@ class FragmentProgramDecompiler * NOTE: What does TEX SRB means ??? */ bool handle_tex_srb(u32 opcode); + protected: const RSXFragmentProgram &m_prog; u32 m_ctrl = 0; @@ -214,6 +210,10 @@ protected: */ virtual std::string getFloatTypeName(size_t elementCount) = 0; + /** returns the type name of half vectors. + */ + virtual std::string getHalfTypeName(size_t elementCount) = 0; + /** returns string calling function where arguments are passed via * $0 $1 $2 substring. */ @@ -259,6 +259,12 @@ public: } properties; + struct + { + bool has_native_half_support = false; + } + device_props; + ParamArray m_parr; FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size); FragmentProgramDecompiler(const FragmentProgramDecompiler&) = delete; diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index e42bbba919..4cf245dd75 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -111,6 +111,23 @@ namespace glsl } } + static std::string getHalfTypeNameImpl(size_t elementCount) + { + switch (elementCount) + { + default: + abort(); + case 1: + return "float16_t"; + case 2: + return "f16vec2"; + case 3: + return "f16vec3"; + case 4: + return "f16vec4"; + } + } + static std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1, bool scalar = false) { if (scalar) @@ -372,7 +389,7 @@ namespace glsl "}\n\n"; } - static void insert_rop(std::ostream& OS, bool _32_bit_exports) + static void insert_rop(std::ostream& OS, bool _32_bit_exports, bool native_half_support) { const std::string reg0 = _32_bit_exports ? "r0" : "h0"; const std::string reg1 = _32_bit_exports ? "r2" : "h4"; @@ -398,15 +415,29 @@ namespace glsl if (!_32_bit_exports) { - //Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags - OS << - " else if (srgb_convert)\n" - " {\n" - " " << reg0 << ".rgb = linear_to_srgb(" << reg0 << ").rgb;\n" - " " << reg1 << ".rgb = linear_to_srgb(" << reg1 << ").rgb;\n" - " " << reg2 << ".rgb = linear_to_srgb(" << reg2 << ").rgb;\n" - " " << reg3 << ".rgb = linear_to_srgb(" << reg3 << ").rgb;\n" - " }\n"; + // Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags + if (native_half_support) + { + OS << + " else if (srgb_convert)\n" + " {\n" + " " << reg0 << ".rgb = clamp16(linear_to_srgb(" << reg0 << ")).rgb;\n" + " " << reg1 << ".rgb = clamp16(linear_to_srgb(" << reg1 << ")).rgb;\n" + " " << reg2 << ".rgb = clamp16(linear_to_srgb(" << reg2 << ")).rgb;\n" + " " << reg3 << ".rgb = clamp16(linear_to_srgb(" << reg3 << ")).rgb;\n" + " }\n"; + } + else + { + OS << + " else if (srgb_convert)\n" + " {\n" + " " << reg0 << ".rgb = linear_to_srgb(" << reg0 << ").rgb;\n" + " " << reg1 << ".rgb = linear_to_srgb(" << reg1 << ").rgb;\n" + " " << reg2 << ".rgb = linear_to_srgb(" << reg2 << ").rgb;\n" + " " << reg3 << ".rgb = linear_to_srgb(" << reg3 << ").rgb;\n" + " }\n"; + } } OS << @@ -468,7 +499,7 @@ namespace glsl // Alpha lower than the real threshold (e.g 0.25 for 4 samples) gets a randomized chance to make it to the lowest transparency state // Helps to avoid A2C tested foliage disappearing in the distance OS << - "bool coverage_test_passes(inout vec4 _sample, uint control)\n" + "bool coverage_test_passes(/*inout*/in vec4 _sample, uint control)\n" "{\n" " if ((control & 0x1) == 0) return false;\n" "\n" diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index c510ffe760..9cbc597afa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -18,6 +18,11 @@ std::string D3D12FragmentDecompiler::getFloatTypeName(size_t elementCount) return getFloatTypeNameImp(elementCount); } +std::string D3D12FragmentDecompiler::getHalfTypeName(size_t elementCount) +{ + return getFloatTypeNameImp(elementCount); +} + std::string D3D12FragmentDecompiler::getFunction(enum class FUNCTION f) { return getFunctionImp(f); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h index 1377f8ec06..4b4d699c1e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.h @@ -9,6 +9,7 @@ class D3D12FragmentDecompiler : public FragmentProgramDecompiler { protected: virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getHalfTypeName(size_t elementCount) override; virtual std::string getFunction(enum class FUNCTION) override; virtual std::string saturate(const std::string &code) override; virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) override; diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 7b77b7108f..cd9e63bb8a 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -2,8 +2,8 @@ #include #include "Emu/Memory/vm.h" #include "Emu/System.h" +#include "GLHelpers.h" #include "GLFragmentProgram.h" -#include "../Common/ProgramStateCache.h" #include "GLCommonDecompiler.h" #include "../GCM.h" @@ -13,6 +13,11 @@ std::string GLFragmentDecompilerThread::getFloatTypeName(size_t elementCount) return glsl::getFloatTypeNameImpl(elementCount); } +std::string GLFragmentDecompilerThread::getHalfTypeName(size_t elementCount) +{ + return glsl::getHalfTypeNameImpl(elementCount); +} + std::string GLFragmentDecompilerThread::getFunction(FUNCTION f) { return glsl::getFunctionImpl(f); @@ -31,6 +36,19 @@ std::string GLFragmentDecompilerThread::compareFunction(COMPARE f, const std::st void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS) { OS << "#version 430\n"; + + if (device_props.has_native_half_support) + { + const auto driver_caps = gl::get_driver_caps(); + if (driver_caps.NV_gpu_shader5_supported) + { + OS << "#extension GL_NV_gpu_shader5: require\n"; + } + else if (driver_caps.AMD_gpu_shader_half_float_supported) + { + OS << "#extension GL_AMD_gpu_shader_half_float: require\n"; + } + } } void GLFragmentDecompilerThread::insertInputs(std::stringstream & OS) @@ -92,9 +110,10 @@ void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS) { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, }; + const auto reg_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? "vec4" : getHalfTypeName(4); for (int i = 0; i < std::size(table); ++i) { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second)) OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n"; } } @@ -206,14 +225,16 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) }; std::string parameters = ""; + const auto half4 = getHalfTypeName(4); for (auto ®_name : output_values) { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name)) + const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; + if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) { if (parameters.length()) parameters += ", "; - parameters += "inout vec4 " + reg_name; + parameters += "inout " + type + " " + reg_name; } } @@ -307,21 +328,24 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "{\n"; std::string parameters = ""; + const auto half4 = getHalfTypeName(4); + for (auto ®_name : output_values) { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name)) + const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; + if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) { if (parameters.length()) parameters += ", "; parameters += reg_name; - OS << " vec4 " << reg_name << " = vec4(0.);\n"; + OS << " " << type << " " << reg_name << " = " << type << "(0.);\n"; } } OS << "\n" << " fs_main(" + parameters + ");\n\n"; - glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS)); + glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS), device_props.has_native_half_support); if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { @@ -359,7 +383,15 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog) { u32 size; GLFragmentDecompilerThread decompiler(shader, parr, prog, size); + + if (!g_cfg.video.disable_native_float16) + { + const auto driver_caps = gl::get_driver_caps(); + decompiler.device_props.has_native_half_support = driver_caps.NV_gpu_shader5_supported || driver_caps.AMD_gpu_shader_half_float_supported; + } + decompiler.Task(); + for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM]) { for (const ParamItem& PI : PT.items) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h index 9e65e2370f..9852697b9f 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h @@ -20,6 +20,7 @@ public: protected: virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getHalfTypeName(size_t elementCount) override; virtual std::string getFunction(FUNCTION) override; virtual std::string saturate(const std::string &code) override; virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 560e348d9e..80c65127ea 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -102,15 +102,28 @@ namespace gl bool ARB_depth_buffer_float_supported = false; bool ARB_texture_barrier_supported = false; bool NV_texture_barrier_supported = false; + bool NV_gpu_shader5_supported = false; + bool AMD_gpu_shader_half_float_supported = false; bool initialized = false; - bool vendor_INTEL = false; //has broken GLSL compiler - bool vendor_AMD = false; //has broken ARB_multidraw - bool vendor_NVIDIA = false; //has NaN poisoning issues - bool vendor_MESA = false; //requires CLIENT_STORAGE bit set for streaming buffers + bool vendor_INTEL = false; // has broken GLSL compiler + bool vendor_AMD = false; // has broken ARB_multidraw + bool vendor_NVIDIA = false; // has NaN poisoning issues + bool vendor_MESA = false; // requires CLIENT_STORAGE bit set for streaming buffers + + bool check(const std::string& ext_name, const char* test) + { + if (ext_name == test) + { + LOG_NOTICE(RSX, "Extension %s is supported", ext_name); + return true; + } + + return false; + } void initialize() { - int find_count = 8; + int find_count = 10; int ext_count = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count); @@ -120,64 +133,78 @@ namespace gl const std::string ext_name = reinterpret_cast(glGetStringi(GL_EXTENSIONS, i)); - if (ext_name == "GL_ARB_shader_draw_parameters") + if (check(ext_name, "GL_ARB_shader_draw_parameters")) { ARB_shader_draw_parameters_supported = true; find_count--; continue; } - if (ext_name == "GL_EXT_direct_state_access") + if (check(ext_name, "GL_EXT_direct_state_access")) { EXT_dsa_supported = true; find_count--; continue; } - if (ext_name == "GL_ARB_direct_state_access") + if (check(ext_name, "GL_ARB_direct_state_access")) { ARB_dsa_supported = true; find_count--; continue; } - if (ext_name == "GL_ARB_buffer_storage") + if (check(ext_name, "GL_ARB_buffer_storage")) { ARB_buffer_storage_supported = true; find_count--; continue; } - if (ext_name == "GL_ARB_texture_buffer_object") + if (check(ext_name, "GL_ARB_texture_buffer_object")) { ARB_texture_buffer_supported = true; find_count--; continue; } - if (ext_name == "GL_ARB_depth_buffer_float") + if (check(ext_name, "GL_ARB_depth_buffer_float")) { ARB_depth_buffer_float_supported = true; find_count--; continue; } - if (ext_name == "GL_ARB_texture_barrier") + if (check(ext_name, "GL_ARB_texture_barrier")) { ARB_texture_barrier_supported = true; find_count--; continue; } - if (ext_name == "GL_NV_texture_barrier") + if (check(ext_name, "GL_NV_texture_barrier")) { NV_texture_barrier_supported = true; find_count--; continue; } + + if (check(ext_name, "GL_NV_gpu_shader5")) + { + NV_gpu_shader5_supported = true; + find_count--; + continue; + } + + if (check(ext_name, "GL_AMD_gpu_shader_half_float")) + { + AMD_gpu_shader_half_float_supported = true; + find_count--; + continue; + } } - //Workaround for intel drivers which have terrible capability reporting + // Workaround for intel drivers which have terrible capability reporting std::string vendor_string = reinterpret_cast(glGetString(GL_VENDOR)); if (!vendor_string.empty()) { diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 523b11ea74..8c44888dae 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -149,7 +149,7 @@ namespace vk shader_object.setEnvInput(glslang::EShSourceGlsl, lang, glslang::EShClientVulkan, 100); shader_object.setEnvClient(glslang::EShClientVulkan, glslang::EShTargetClientVersion::EShTargetVulkan_1_0); shader_object.setEnvTarget(glslang::EshTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_0); - + bool success = false; const char *shader_text = shader.data(); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index a3ad70180a..b1b78c054e 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -12,6 +12,11 @@ std::string VKFragmentDecompilerThread::getFloatTypeName(size_t elementCount) return glsl::getFloatTypeNameImpl(elementCount); } +std::string VKFragmentDecompilerThread::getHalfTypeName(size_t elementCount) +{ + return glsl::getHalfTypeNameImpl(elementCount); +} + std::string VKFragmentDecompilerThread::getFunction(FUNCTION f) { return glsl::getFunctionImpl(f); @@ -29,7 +34,16 @@ std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::st void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) { - OS << "#version 420\n"; + if (device_props.has_native_half_support) + { + OS << "#version 450\n"; + OS << "#extension GL_KHX_shader_explicit_arithmetic_types_float16: enable\n"; + } + else + { + OS << "#version 420\n"; + } + OS << "#extension GL_ARB_separate_shader_objects: enable\n\n"; } @@ -93,9 +107,10 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) //NOTE: We do not skip outputs, the only possible combinations are a(0), b(0), ab(0,1), abc(0,1,2), abcd(0,1,2,3) u8 output_index = 0; + const auto reg_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? "vec4" : getHalfTypeName(4); for (int i = 0; i < std::size(table); ++i) { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second)) { OS << "layout(location=" << std::to_string(output_index++) << ") " << "out vec4 " << table[i].first << ";\n"; vk_prog->output_color_masks[i] = UINT32_MAX; @@ -242,14 +257,16 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) }; std::string parameters = ""; + const auto half4 = getHalfTypeName(4); for (auto ®_name : output_values) { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name)) + const auto type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; + if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) { if (parameters.length()) parameters += ", "; - parameters += "inout vec4 " + reg_name; + parameters += "inout " + type + " " + reg_name; } } @@ -346,21 +363,24 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "{\n"; std::string parameters = ""; + const auto half4 = getHalfTypeName(4); + for (auto ®_name : output_values) { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", reg_name)) + const std::string type = (reg_name[0] == 'r' || !device_props.has_native_half_support)? "vec4" : half4; + if (m_parr.HasParam(PF_PARAM_NONE, type, reg_name)) { if (parameters.length()) parameters += ", "; parameters += reg_name; - OS << " vec4 " << reg_name << " = vec4(0.);\n"; + OS << " " << type << " " << reg_name << " = " << type << "(0.);\n"; } } OS << "\n" << " fs_main(" + parameters + ");\n\n"; - glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS)); + glsl::insert_rop(OS, !!(m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS), device_props.has_native_half_support); if (m_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { @@ -400,6 +420,12 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) u32 size; std::string source; VKFragmentDecompilerThread decompiler(source, parr, prog, size, *this); + + if (!g_cfg.video.disable_native_float16) + { + decompiler.device_props.has_native_half_support = vk::get_current_renderer()->get_shader_types_support().allow_float16; + } + decompiler.Task(); shader.create(::glsl::program_domain::glsl_fragment_program, source); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index 6d75e021e8..839b19bf7b 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -24,6 +24,7 @@ public: const std::vector& get_inputs() { return inputs; } protected: virtual std::string getFloatTypeName(size_t elementCount) override; + virtual std::string getHalfTypeName(size_t elementCount) override; virtual std::string getFunction(FUNCTION) override; virtual std::string saturate(const std::string &code) override; virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index dfcbcbe276..d8cc447284 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -201,6 +201,12 @@ namespace vk bool bgra8_linear; }; + struct gpu_shader_types_support + { + bool allow_float16; + bool allow_int8; + }; + // Memory Allocator - base class class mem_allocator_base @@ -515,9 +521,32 @@ namespace vk physical_device *pgpu = nullptr; memory_type_mapping memory_map{}; gpu_formats_support m_formats_support{}; + gpu_shader_types_support m_shader_types_support{}; std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; + void get_physical_device_features(VkPhysicalDeviceFeatures& features) + { + if (!vkGetPhysicalDeviceFeatures2) + { + vkGetPhysicalDeviceFeatures(*pgpu, &features); + } + else + { + VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{}; + shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + + VkPhysicalDeviceFeatures2 features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + features2.pNext = &shader_support_info; + vkGetPhysicalDeviceFeatures2(*pgpu, &features2); + + m_shader_types_support.allow_float16 = false;//!!shader_support_info.shaderFloat16; + m_shader_types_support.allow_int8 = !!shader_support_info.shaderInt8; + features = features2.features; + } + } + public: render_device() {} @@ -549,7 +578,7 @@ namespace vk //2. DXT support //3. Indexable storage buffers VkPhysicalDeviceFeatures available_features; - vkGetPhysicalDeviceFeatures(*pgpu, &available_features); + get_physical_device_features(available_features); available_features.samplerAnisotropy = VK_TRUE; available_features.textureCompressionBC = VK_TRUE; @@ -566,6 +595,21 @@ namespace vk device.ppEnabledExtensionNames = requested_extensions; device.pEnabledFeatures = &available_features; + VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{}; + if (m_shader_types_support.allow_float16) + { + // Allow use of f16 type in shaders if possible + shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + shader_support_info.shaderFloat16 = VK_TRUE; + device.pNext = &shader_support_info; + + LOG_NOTICE(RSX, "GPU/driver supports float16 data types natively. Using native float16_t variables if possible."); + } + else + { + LOG_NOTICE(RSX, "GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t."); + } + CHECK_RESULT(vkCreateDevice(*pgpu, &device, nullptr, &dev)); memory_map = vk::get_memory_mapping(pdev); @@ -634,6 +678,11 @@ namespace vk return m_formats_support; } + const gpu_shader_types_support& get_shader_types_support() const + { + return m_shader_types_support; + } + mem_allocator_base* get_allocator() const { return m_allocator.get(); diff --git a/rpcs3/Emu/RSX/VK/VulkanAPI.h b/rpcs3/Emu/RSX/VK/VulkanAPI.h index 235f5061e3..a7b1503e0e 100644 --- a/rpcs3/Emu/RSX/VK/VulkanAPI.h +++ b/rpcs3/Emu/RSX/VK/VulkanAPI.h @@ -14,6 +14,21 @@ #include "define_new_memleakdetect.h" #include "Utilities/types.h" +// TODO: Remove when packages catch up, ubuntu is stuck at 1.1.73 (bionic) and 1.1.82 (cosmic) +// Do we still use libvulkan-dev package on travis?????? +#if VK_HEADER_VERSION < 95 + +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; + +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR VkStructureType(1000082000) + +#endif + namespace vk { void init(); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 80405303e5..8d19ea9f7f 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -448,6 +448,7 @@ struct cfg_root : cfg::node cfg::_bool full_rgb_range_output{this, "Use full RGB output range", true}; // Video out dynamic range cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false}; cfg::_bool strict_texture_flushing{this, "Strict Texture Flushing", false}; + cfg::_bool disable_native_float16{this, "Disable native float16 support", false}; cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1}; cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1}; cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100};