From 650c1c64f1646503e3037af8c1ac2924a4b615e5 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 14 Aug 2017 15:45:50 +0300 Subject: [PATCH] gl: Workarounds for intel GPUs which dont seem to be truly GL4 compliant --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 81 ++++++++++++++-------------- rpcs3/Emu/RSX/GL/GLHelpers.h | 3 ++ rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 3 +- 3 files changed, 46 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index a142b7dfc8..add643e2c0 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -54,7 +54,7 @@ namespace glsl fmt::throw_exception("Unknown compare function" HERE); } - static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules) + static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules, bool glsl4_compliant=true) { std::string vertex_id_name = (rules == glsl_rules_opengl4) ? "gl_VertexID" : "gl_VertexIndex"; @@ -92,23 +92,24 @@ namespace glsl OS << " return int(bits);\n"; OS << "}\n\n"; - /* TODO: For intel GPUs that seemingly cannot generate fp32 values from raw bits - OS << "float convert_to_f32(uint bits)\n"; - OS << "{\n"; - OS << " uint sign = (bits >> 31) & 1;\n"; - OS << " uint exp = (bits >> 23) & 0xff;\n"; - OS << " uint mantissa = bits & 0x7fffff;\n"; - OS << " float base = (sign != 0)? -1.f: 1.f;\n"; - OS << " base *= exp2(exp - 127);\n"; - OS << " float scale = 0.f;\n\n"; - OS << " for (int x = 0; x < 23; x++)\n"; - OS << " {\n"; - OS << " int inv = (22 - x);\n"; - OS << " if ((mantissa & (1 << inv)) == 0) continue;\n"; - OS << " scale += 1.f / pow(2.f, float(inv));\n"; - OS << " }\n"; - OS << " return base * scale;\n"; - OS << "}\n";*/ + //For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?) + if (!glsl4_compliant) + { + OS << "void mov(inout vec4 vector, in int index, in float scalar)\n"; + OS << "{\n"; + OS << " switch(index)\n"; + OS << " {\n"; + OS << " case 0: vector.x = scalar; return;\n"; + OS << " case 1: vector.y = scalar; return;\n"; + OS << " case 2: vector.z = scalar; return;\n"; + OS << " case 3: vector.w = scalar; return;\n"; + OS << " }\n"; + OS << "}\n"; + } + else + { + OS << "#define mov(v, i, s) v[i] = s\n"; + } OS << "#define get_s16(v, s) preserve_sign_s16(get_bits(v, s))\n\n"; @@ -129,43 +130,43 @@ namespace glsl OS << " {\n"; OS << " case 0:\n"; OS << " //signed normalized 16-bit\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; - OS << " scale[n] = 32767.;\n"; + OS << " tmp.x = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.y = texelFetch(input_stream, first_byte++).x;\n"; + OS << " mov(result, n, get_s16(tmp.xy, desc.swap_bytes));\n"; + OS << " mov(scale, n, 32767.);\n"; OS << " break;\n"; OS << " case 1:\n"; OS << " //float\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));\n"; + OS << " tmp.x = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.y = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.z = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.w = texelFetch(input_stream, first_byte++).x;\n"; + OS << " mov(result, n, uintBitsToFloat(get_bits(tmp, desc.swap_bytes)));\n"; OS << " break;\n"; OS << " case 2:\n"; OS << " //half\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x;\n"; + OS << " tmp.x = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.y = texelFetch(input_stream, first_byte++).x;\n"; + OS << " mov(result, n, unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x);\n"; OS << " break;\n"; OS << " case 3:\n"; OS << " //unsigned byte\n"; - OS << " result[n] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " scale[n] = 255.;\n"; + OS << " mov(result, n, texelFetch(input_stream, first_byte++).x);\n"; + OS << " mov(scale, n, 255.);\n"; OS << " reverse_order = (desc.swap_bytes != 0);\n"; OS << " break;\n"; OS << " case 4:\n"; OS << " //signed word\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " result[n] = get_s16(tmp.xy, desc.swap_bytes);\n"; + OS << " tmp.x = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.y = texelFetch(input_stream, first_byte++).x;\n"; + OS << " mov(result, n, get_s16(tmp.xy, desc.swap_bytes));\n"; OS << " break;\n"; OS << " case 5:\n"; OS << " //cmp\n"; - OS << " tmp[0] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[1] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[2] = texelFetch(input_stream, first_byte++).x;\n"; - OS << " tmp[3] = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.x = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.y = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.z = texelFetch(input_stream, first_byte++).x;\n"; + OS << " tmp.w = texelFetch(input_stream, first_byte++).x;\n"; OS << " bits = get_bits(tmp, desc.swap_bytes);\n"; OS << " result.x = preserve_sign_s16((bits & 0x7FF) << 5);\n"; OS << " result.y = preserve_sign_s16(((bits >> 11) & 0x7FF) << 5);\n"; @@ -175,7 +176,7 @@ namespace glsl OS << " break;\n"; OS << " case 6:\n"; OS << " //ub256\n"; - OS << " result[n] = float(texelFetch(input_stream, first_byte++).x);\n"; + OS << " mov(result, n, float(texelFetch(input_stream, first_byte++).x));\n"; OS << " reverse_order = (desc.swap_bytes != 0);\n"; OS << " break;\n"; OS << " }\n"; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 4ad99e7dd0..192e0ba5d1 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -82,6 +82,7 @@ namespace gl bool ARB_texture_barrier_supported = false; bool NV_texture_barrier_supported = false; bool initialized = false; + bool vendor_INTEL = false; void initialize() { @@ -174,6 +175,8 @@ namespace gl glGetIntegerv(GL_MAJOR_VERSION, &version_major); glGetIntegerv(GL_MINOR_VERSION, &version_minor); + vendor_INTEL = true; + //Texture buffers moved into core at GL 3.3 if (version_major > 3 || (version_major == 3 && version_minor >= 3)) ARB_texture_buffer_supported = true; diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 3035234287..4425869ccf 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -3,6 +3,7 @@ #include "GLVertexProgram.h" #include "GLCommonDecompiler.h" +#include "GLHelpers.h" #include "../GCM.h" #include @@ -152,7 +153,7 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { insert_glsl_legacy_function(OS, glsl::glsl_vertex_program); - glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4); + glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false); std::string parameters = ""; for (int i = 0; i < 16; ++i)