From 650c1c64f1646503e3037af8c1ac2924a4b615e5 Mon Sep 17 00:00:00 2001
From: kd-11 <karokidii@gmail.com>
Date: Mon, 14 Aug 2017 15:45:50 +0300
Subject: [PATCH] gl: Workarounds for intel GPUs which dont seem to be truly
 GL4 compliant

---
 rpcs3/Emu/RSX/Common/GLSLCommon.h    | 81 ++++++++++++++--------------
 rpcs3/Emu/RSX/GL/GLHelpers.h         |  3 ++
 rpcs3/Emu/RSX/GL/GLVertexProgram.cpp |  3 +-
 3 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h
index a142b7dfc8..add643e2c0 100644
--- a/rpcs3/Emu/RSX/Common/GLSLCommon.h
+++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h
@@ -54,7 +54,7 @@ namespace glsl
 		fmt::throw_exception("Unknown compare function" HERE);
 	}
 
-	static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules)
+	static void insert_vertex_input_fetch(std::stringstream& OS, glsl_rules rules, bool glsl4_compliant=true)
 	{
 		std::string vertex_id_name = (rules == glsl_rules_opengl4) ? "gl_VertexID" : "gl_VertexIndex";
 
@@ -92,23 +92,24 @@ namespace glsl
 		OS << "	return int(bits);\n";
 		OS << "}\n\n";
 
-		/* TODO: For intel GPUs that seemingly cannot generate fp32 values from raw bits
-		OS << "float convert_to_f32(uint bits)\n";
-		OS << "{\n";
-		OS << "	uint sign = (bits >> 31) & 1;\n";
-		OS << "	uint exp = (bits >> 23) & 0xff;\n";
-		OS << "	uint mantissa = bits & 0x7fffff;\n";
-		OS << "	float base = (sign != 0)? -1.f: 1.f;\n";
-		OS << "	base *= exp2(exp - 127);\n";
-		OS << "	float scale = 0.f;\n\n";
-		OS << "	for (int x = 0; x < 23; x++)\n";
-		OS << "	{\n";
-		OS << "		int inv = (22 - x);\n";
-		OS << "		if ((mantissa & (1 << inv)) == 0) continue;\n";
-		OS << "		scale += 1.f / pow(2.f, float(inv));\n";
-		OS << "	}\n";
-		OS << "	return base * scale;\n";
-		OS << "}\n";*/
+		//For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?)
+		if (!glsl4_compliant)
+		{
+			OS << "void mov(inout vec4 vector, in int index, in float scalar)\n";
+			OS << "{\n";
+			OS << "	switch(index)\n";
+			OS << "	{\n";
+			OS << "		case 0: vector.x = scalar; return;\n";
+			OS << "		case 1: vector.y = scalar; return;\n";
+			OS << "		case 2: vector.z = scalar; return;\n";
+			OS << "		case 3: vector.w = scalar; return;\n";
+			OS << "	}\n";
+			OS << "}\n";
+		}
+		else
+		{
+			OS << "#define mov(v, i, s) v[i] = s\n";
+		}
 
 		OS << "#define get_s16(v, s) preserve_sign_s16(get_bits(v, s))\n\n";
 
@@ -129,43 +130,43 @@ namespace glsl
 		OS << "		{\n";
 		OS << "		case 0:\n";
 		OS << "			//signed normalized 16-bit\n";
-		OS << "			tmp[0] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[1] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			result[n] = get_s16(tmp.xy, desc.swap_bytes);\n";
-		OS << "			scale[n] = 32767.;\n";
+		OS << "			tmp.x = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.y = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			mov(result, n, get_s16(tmp.xy, desc.swap_bytes));\n";
+		OS << "			mov(scale, n, 32767.);\n";
 		OS << "			break;\n";
 		OS << "		case 1:\n";
 		OS << "			//float\n";
-		OS << "			tmp[0] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[1] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[2] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[3] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));\n";
+		OS << "			tmp.x = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.y = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.z = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.w = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			mov(result, n, uintBitsToFloat(get_bits(tmp, desc.swap_bytes)));\n";
 		OS << "			break;\n";
 		OS << "		case 2:\n";
 		OS << "			//half\n";
-		OS << "			tmp[0] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[1] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			result[n] = unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x;\n";
+		OS << "			tmp.x = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.y = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			mov(result, n, unpackHalf2x16(uint(get_bits(tmp.xy, desc.swap_bytes))).x);\n";
 		OS << "			break;\n";
 		OS << "		case 3:\n";
 		OS << "			//unsigned byte\n";
-		OS << "			result[n] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			scale[n] = 255.;\n";
+		OS << "			mov(result, n, texelFetch(input_stream, first_byte++).x);\n";
+		OS << "			mov(scale, n, 255.);\n";
 		OS << "			reverse_order = (desc.swap_bytes != 0);\n";
 		OS << "			break;\n";
 		OS << "		case 4:\n";
 		OS << "			//signed word\n";
-		OS << "			tmp[0] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[1] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			result[n] = get_s16(tmp.xy, desc.swap_bytes);\n";
+		OS << "			tmp.x = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.y = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			mov(result, n, get_s16(tmp.xy, desc.swap_bytes));\n";
 		OS << "			break;\n";
 		OS << "		case 5:\n";
 		OS << "			//cmp\n";
-		OS << "			tmp[0] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[1] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[2] = texelFetch(input_stream, first_byte++).x;\n";
-		OS << "			tmp[3] = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.x = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.y = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.z = texelFetch(input_stream, first_byte++).x;\n";
+		OS << "			tmp.w = texelFetch(input_stream, first_byte++).x;\n";
 		OS << "			bits = get_bits(tmp, desc.swap_bytes);\n";
 		OS << "			result.x = preserve_sign_s16((bits & 0x7FF) << 5);\n";
 		OS << "			result.y = preserve_sign_s16(((bits >> 11) & 0x7FF) << 5);\n";
@@ -175,7 +176,7 @@ namespace glsl
 		OS << "			break;\n";
 		OS << "		case 6:\n";
 		OS << "			//ub256\n";
-		OS << "			result[n] = float(texelFetch(input_stream, first_byte++).x);\n";
+		OS << "			mov(result, n, float(texelFetch(input_stream, first_byte++).x));\n";
 		OS << "			reverse_order = (desc.swap_bytes != 0);\n";
 		OS << "			break;\n";
 		OS << "		}\n";
diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h
index 4ad99e7dd0..192e0ba5d1 100644
--- a/rpcs3/Emu/RSX/GL/GLHelpers.h
+++ b/rpcs3/Emu/RSX/GL/GLHelpers.h
@@ -82,6 +82,7 @@ namespace gl
 		bool ARB_texture_barrier_supported = false;
 		bool NV_texture_barrier_supported = false;
 		bool initialized = false;
+		bool vendor_INTEL = false;
 
 		void initialize()
 		{
@@ -174,6 +175,8 @@ namespace gl
 				glGetIntegerv(GL_MAJOR_VERSION, &version_major);
 				glGetIntegerv(GL_MINOR_VERSION, &version_minor);
 
+				vendor_INTEL = true;
+
 				//Texture buffers moved into core at GL 3.3
 				if (version_major > 3 || (version_major == 3 && version_minor >= 3))
 					ARB_texture_buffer_supported = true;
diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
index 3035234287..4425869ccf 100644
--- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
+++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
@@ -3,6 +3,7 @@
 
 #include "GLVertexProgram.h"
 #include "GLCommonDecompiler.h"
+#include "GLHelpers.h"
 #include "../GCM.h"
 
 #include <algorithm>
@@ -152,7 +153,7 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
 void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
 {
 	insert_glsl_legacy_function(OS, glsl::glsl_vertex_program);
-	glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4);
+	glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);
 
 	std::string parameters = "";
 	for (int i = 0; i < 16; ++i)