From 5a4babb0fdbd13d87c0862bd6d90bdd9bfa59b40 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 18:26:44 +0200
Subject: [PATCH 01/13] Implemented basic tiled regions support

---
 rpcs3/Emu/RSX/RSXThread.cpp | 136 ++++++++++++++++++++++++++++++++++++
 rpcs3/Emu/RSX/RSXThread.h   |  14 ++++
 2 files changed, 150 insertions(+)

diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index 8c11798d31..d40b67cd9f 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -934,6 +934,108 @@ namespace rsx
 			return 1;
 		}
 	}
+	
+	void tiled_address::write(const void *src, u32 width, u32 height, u32 pitch)
+	{
+		if (!tile)
+		{
+			memcpy(ptr, src, height * pitch);
+			return;
+		}
+
+		u32 offset_x = base % tile->pitch;
+		u32 offset_y = base / tile->pitch;
+
+		switch (tile->comp)
+		{
+		case CELL_GCM_COMPMODE_C32_2X1:
+		case CELL_GCM_COMPMODE_DISABLED:
+			for (int y = 0; y < height; ++y)
+			{
+				memcpy(ptr + (offset_y + y) * tile->pitch + offset_x, (u8*)src + pitch * y, pitch);
+			}
+			break;
+			/*
+		case CELL_GCM_COMPMODE_C32_2X1:
+			for (u32 y = 0; y < height; ++y)
+			{
+				for (u32 x = 0; x < width; ++x)
+				{
+					u32 value = *(u32*)((u8*)src + pitch * y + x * sizeof(u32));
+
+					*(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value;
+					*(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value;
+				}
+			}
+			break;
+			*/
+		case CELL_GCM_COMPMODE_C32_2X2:
+			for (u32 y = 0; y < height; ++y)
+			{
+				for (u32 x = 0; x < width; ++x)
+				{
+					u32 value = *(u32*)((u8*)src + pitch * y + x * sizeof(u32));
+
+					*(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value;
+					*(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value;
+					*(u32*)(ptr + (offset_y + y * 2 + 1) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value;
+					*(u32*)(ptr + (offset_y + y * 2 + 1) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value;
+				}
+			}
+			break;
+		default:
+			throw;
+		}
+	}
+
+	void tiled_address::read(void *dst, u32 width, u32 height, u32 pitch)
+	{
+		if (!tile)
+		{
+			memcpy(dst, ptr, height * pitch);
+			return;
+		}
+
+		u32 offset_x = base % tile->pitch;
+		u32 offset_y = base / tile->pitch;
+
+		switch (tile->comp)
+		{
+		case CELL_GCM_COMPMODE_C32_2X1:
+		case CELL_GCM_COMPMODE_DISABLED:
+			for (int y = 0; y < height; ++y)
+			{
+				memcpy((u8*)dst + pitch * y, ptr + (offset_y + y) * tile->pitch + offset_x, pitch);
+			}
+			break;
+			/*
+		case CELL_GCM_COMPMODE_C32_2X1:
+			for (u32 y = 0; y < height; ++y)
+			{
+				for (u32 x = 0; x < width; ++x)
+				{
+					u32 value = *(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32));
+
+					*(u32*)((u8*)dst + pitch * y + x * sizeof(u32)) = value;
+				}
+			}
+			break;
+			*/
+		case CELL_GCM_COMPMODE_C32_2X2:
+			for (u32 y = 0; y < height; ++y)
+			{
+				for (u32 x = 0; x < width; ++x)
+				{
+					u32 value = *(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32));
+
+					*(u32*)((u8*)dst + pitch * y + x * sizeof(u32)) = value;
+				}
+			}
+			break;
+		default:
+			throw;
+		}
+	}
 
 	void thread::load_vertex_data(u32 first, u32 count)
 	{
@@ -1341,6 +1443,40 @@ namespace rsx
 		start();
 	}
 
+	GcmTileInfo *thread::find_tile(u32 offset, u32 location)
+	{
+		for (GcmTileInfo &tile : tiles)
+		{
+			if (!tile.binded || tile.location != location)
+			{
+				continue;
+			}
+
+			if (offset >= tile.offset && offset < tile.offset + tile.size)
+			{
+				return &tile;
+			}
+		}
+
+		return nullptr;
+	}
+
+	tiled_address thread::get_tiled_address(u32 offset, u32 location)
+	{
+		u32 address = get_address(offset, location);
+
+		GcmTileInfo *tile = find_tile(offset, location);
+		u32 base = 0;
+		
+		if (tile)
+		{
+			base = offset - tile->offset;
+			address = get_address(tile->offset, location);
+		}
+
+		return{ address, base, tile, (u8*)vm::base(address) };
+	}
+
 	u32 thread::ReadIO32(u32 addr)
 	{
 		u32 value;
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index b383233ff0..0cecb914ed 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -215,6 +215,17 @@ namespace rsx
 
 	u32 get_address(u32 offset, u32 location);
 
+	struct tiled_address
+	{
+		u32 address;
+		u32 base;
+		GcmTileInfo *tile;
+		u8 *ptr;
+
+		void write(const void *src, u32 width, u32 height, u32 pitch);
+		void read(void *dst, u32 width, u32 height, u32 pitch);
+	};
+
 	template<typename T>
 	void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight) 
 	{
@@ -504,6 +515,9 @@ namespace rsx
 		void reset();
 		void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);
 
+		tiled_address get_tiled_address(u32 offset, u32 location);
+		GcmTileInfo *find_tile(u32 offset, u32 location);
+
 		u32 ReadIO32(u32 addr);
 		void WriteIO32(u32 addr, u32 value);
 	};

From 836d14c8afecc21cf360083d3b7415c8cb289125 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 18:42:54 +0200
Subject: [PATCH 02/13] rsx::pad_texture & rsx::convert_linear_swizzle moved to
 rsx_utils Added rsx::convert_scale_image & rsx::clip_image to rsx_utils

---
 rpcs3/Emu/RSX/RSXThread.cpp   |   1 +
 rpcs3/Emu/RSX/RSXThread.h     |  86 ----------------------------
 rpcs3/Emu/RSX/rsx_utils.cpp   |  45 +++++++++++++++
 rpcs3/Emu/RSX/rsx_utils.h     | 104 ++++++++++++++++++++++++++++++++++
 rpcs3/emucore.vcxproj         |   2 +
 rpcs3/emucore.vcxproj.filters |   6 ++
 6 files changed, 158 insertions(+), 86 deletions(-)
 create mode 100644 rpcs3/Emu/RSX/rsx_utils.cpp
 create mode 100644 rpcs3/Emu/RSX/rsx_utils.h

diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index d40b67cd9f..9903bd1ffd 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -10,6 +10,7 @@
 #include "Emu/SysCalls/lv2/sys_time.h"
 
 #include "Common/BufferUtils.h"
+#include "rsx_utils.h"
 
 extern "C"
 {
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index 0cecb914ed..620d989d1f 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -226,92 +226,6 @@ namespace rsx
 		void read(void *dst, u32 width, u32 height, u32 pitch);
 	};
 
-	template<typename T>
-	void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight) 
-	{
-		T *src, *dst;
-		src = static_cast<T*>(inputPixels);
-		dst = static_cast<T*>(outputPixels);
-
-		for (u16 h = 0; h < inputHeight; ++h)
-		{
-			const u32 padded_pos = h * outputWidth;
-			const u32 pos = h * inputWidth;
-			for (u16 w = 0; w < inputWidth; ++w)
-			{
-				dst[padded_pos + w] = src[pos + w];
-			}
-		}
-	}
-
-	/*   Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels
-	*       - Input can be swizzled or linear, bool flag handles conversion to and from
-	*       - It will handle any width and height that are a power of 2, square or non square
-	*	 Restriction: It has mixed results if the height or width is not a power of 2
-	*/
-	template<typename T>
-	void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled)
-	{
-		u32 log2width, log2height;
-
-		log2width = log2(width);
-		log2height = log2(height);
-
-		// Max mask possible for square texture
-		u32 x_mask = 0x55555555;
-		u32 y_mask = 0xAAAAAAAA;
-
-		// We have to limit the masks to the lower of the two dimensions to allow for non-square textures
-		u32 limit_mask = (log2width < log2height) ? log2width : log2height;
-		// double the limit mask to account for bits in both x and y
-		limit_mask = 1 << (limit_mask << 1);
-
-		//x_mask, bits above limit are 1's for x-carry
-		x_mask = (x_mask | ~(limit_mask - 1));
-		//y_mask. bits above limit are 0'd, as we use a different method for y-carry over
-		y_mask = (y_mask & (limit_mask - 1));
-
-		u32 offs_y = 0;
-		u32 offs_x = 0;
-		u32 offs_x0 = 0; //total y-carry offset for x
-		u32 y_incr = limit_mask;
-
-		T *src, *dst;
-
-		if (!inputIsSwizzled)
-		{
-			for (int y = 0; y < height; ++y) 
-			{
-				src = static_cast<T*>(inputPixels) + y*width;
-				dst = static_cast<T*>(outputPixels) + offs_y;
-				offs_x = offs_x0;
-				for (int x = 0; x < width; ++x) 
-				{
-					dst[offs_x] = src[x];
-					offs_x = (offs_x - x_mask) & x_mask;
-				}
-				offs_y = (offs_y - y_mask) & y_mask;
-				if (offs_y == 0) offs_x0 += y_incr;
-			}
-		}
-		else 
-		{
-			for (int y = 0; y < height; ++y) 
-			{
-				src = static_cast<T*>(inputPixels) + offs_y;
-				dst = static_cast<T*>(outputPixels) + y*width;
-				offs_x = offs_x0;
-				for (int x = 0; x < width; ++x) 
-				{
-					dst[x] = src[offs_x];
-					offs_x = (offs_x - x_mask) & x_mask;
-				}
-				offs_y = (offs_y - y_mask) & y_mask;
-				if (offs_y == 0) offs_x0 += y_incr;
-			}
-		}
-	}
-
 	struct surface_info
 	{
 		u8 log2height;
diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp
new file mode 100644
index 0000000000..25a9b5ecf2
--- /dev/null
+++ b/rpcs3/Emu/RSX/rsx_utils.cpp
@@ -0,0 +1,45 @@
+#include "stdafx.h"
+#include "rsx_utils.h"
+
+extern "C"
+{
+#include "libswscale/swscale.h"
+}
+
+namespace rsx
+{
+	void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
+		const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear)
+	{
+		std::unique_ptr<SwsContext, void(*)(SwsContext*)> sws(sws_getContext(src_width, src_height, src_format,
+			dst_width, dst_height, dst_format, bilinear ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext);
+
+		sws_scale(sws.get(), &src, &src_pitch, 0, src_slice_h, &dst, &dst_pitch);
+	}
+
+	void convert_scale_image(std::unique_ptr<u8[]>& dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
+		const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear)
+	{
+		dst.reset(new u8[dst_pitch * dst_height]);
+		convert_scale_image(dst.get(), dst_format, dst_width, dst_height, dst_pitch,
+			src, src_format, src_width, src_height, src_pitch, src_slice_h, bilinear);
+	}
+
+	void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch)
+	{
+		for (int y = 0; y < clip_h; ++y)
+		{
+			u8 *dst_row = dst + y * dst_pitch;
+			const u8 *src_row = src + (y + clip_y) * src_pitch + clip_x * bpp;
+
+			std::memmove(dst_row, src_row, clip_w * bpp);
+		}
+	}
+
+	void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src,
+		int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch)
+	{
+		dst.reset(new u8[clip_h * dst_pitch]);
+		clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch);
+	}
+}
diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h
new file mode 100644
index 0000000000..33ad93be5f
--- /dev/null
+++ b/rpcs3/Emu/RSX/rsx_utils.h
@@ -0,0 +1,104 @@
+#pragma once
+
+extern "C"
+{
+#include <libavutil/pixfmt.h>
+}
+
+namespace rsx
+{
+	template<typename T>
+	void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight)
+	{
+		T *src, *dst;
+		src = static_cast<T*>(inputPixels);
+		dst = static_cast<T*>(outputPixels);
+
+		for (u16 h = 0; h < inputHeight; ++h)
+		{
+			const u32 padded_pos = h * outputWidth;
+			const u32 pos = h * inputWidth;
+			for (u16 w = 0; w < inputWidth; ++w)
+			{
+				dst[padded_pos + w] = src[pos + w];
+			}
+		}
+	}
+
+	/*   Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels
+	*       - Input can be swizzled or linear, bool flag handles conversion to and from
+	*       - It will handle any width and height that are a power of 2, square or non square
+	*	 Restriction: It has mixed results if the height or width is not a power of 2
+	*/
+	template<typename T>
+	void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled)
+	{
+		u32 log2width, log2height;
+
+		log2width = log2(width);
+		log2height = log2(height);
+
+		// Max mask possible for square texture
+		u32 x_mask = 0x55555555;
+		u32 y_mask = 0xAAAAAAAA;
+
+		// We have to limit the masks to the lower of the two dimensions to allow for non-square textures
+		u32 limit_mask = (log2width < log2height) ? log2width : log2height;
+		// double the limit mask to account for bits in both x and y
+		limit_mask = 1 << (limit_mask << 1);
+
+		//x_mask, bits above limit are 1's for x-carry
+		x_mask = (x_mask | ~(limit_mask - 1));
+		//y_mask. bits above limit are 0'd, as we use a different method for y-carry over
+		y_mask = (y_mask & (limit_mask - 1));
+
+		u32 offs_y = 0;
+		u32 offs_x = 0;
+		u32 offs_x0 = 0; //total y-carry offset for x
+		u32 y_incr = limit_mask;
+
+		T *src, *dst;
+
+		if (!inputIsSwizzled)
+		{
+			for (int y = 0; y < height; ++y)
+			{
+				src = static_cast<T*>(inputPixels) + y*width;
+				dst = static_cast<T*>(outputPixels) + offs_y;
+				offs_x = offs_x0;
+				for (int x = 0; x < width; ++x)
+				{
+					dst[offs_x] = src[x];
+					offs_x = (offs_x - x_mask) & x_mask;
+				}
+				offs_y = (offs_y - y_mask) & y_mask;
+				if (offs_y == 0) offs_x0 += y_incr;
+			}
+		}
+		else
+		{
+			for (int y = 0; y < height; ++y)
+			{
+				src = static_cast<T*>(inputPixels) + offs_y;
+				dst = static_cast<T*>(outputPixels) + y*width;
+				offs_x = offs_x0;
+				for (int x = 0; x < width; ++x)
+				{
+					dst[x] = src[offs_x];
+					offs_x = (offs_x - x_mask) & x_mask;
+				}
+				offs_y = (offs_y - y_mask) & y_mask;
+				if (offs_y == 0) offs_x0 += y_incr;
+			}
+		}
+	}
+
+	void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
+		const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);
+
+	void convert_scale_image(std::unique_ptr<u8[]>& dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
+		const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);
+
+	void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
+	void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
+}
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index ddfa06f7bb..a9d013b1f7 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -129,6 +129,7 @@
     <ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" />
     <ClCompile Include="Emu\RSX\GCM.cpp" />
     <ClCompile Include="Emu\RSX\Null\NullGSRender.cpp" />
+    <ClCompile Include="Emu\RSX\rsx_utils.cpp" />
     <ClCompile Include="Emu\state.cpp" />
     <ClCompile Include="Emu\SysCalls\lv2\sys_dbg.cpp" />
     <ClCompile Include="Emu\SysCalls\lv2\sys_fs.cpp" />
@@ -574,6 +575,7 @@
     <ClInclude Include="Emu\Memory\vm_ptr.h" />
     <ClInclude Include="Emu\Memory\vm_ref.h" />
     <ClInclude Include="Emu\Memory\vm_var.h" />
+    <ClInclude Include="Emu\RSX\rsx_utils.h" />
     <ClInclude Include="Emu\state.h" />
     <ClInclude Include="Emu\SysCalls\Callback.h" />
     <ClInclude Include="Emu\SysCalls\CB_FUNC.h" />
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index 30963300e8..c0cdafaaf1 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -927,6 +927,9 @@
     <ClCompile Include="Emu\RSX\GCM.cpp">
       <Filter>Emu\GPU\RSX</Filter>
     </ClCompile>
+    <ClCompile Include="Emu\RSX\rsx_utils.cpp">
+      <Filter>Emu\GPU\RSX</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="Crypto\aes.h">
@@ -1773,5 +1776,8 @@
     <ClInclude Include="..\Utilities\BitField.h">
       <Filter>Utilities</Filter>
     </ClInclude>
+    <ClInclude Include="Emu\RSX\rsx_utils.h">
+      <Filter>Emu\GPU\RSX</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file

From 11ccc498fd02f8bbb1f38dc600179016681f7be2 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 18:46:28 +0200
Subject: [PATCH 03/13] Reimplemented nv3089::image_in

---
 rpcs3/Emu/RSX/RSXThread.cpp | 264 ++++++++++++++++++++----------------
 1 file changed, 148 insertions(+), 116 deletions(-)

diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index 9903bd1ffd..c68ec9421e 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -12,11 +12,6 @@
 #include "Common/BufferUtils.h"
 #include "rsx_utils.h"
 
-extern "C"
-{
-#include "libswscale/swscale.h"
-}
-
 #define CMD_DEBUG 0
 
 bool user_asked_for_frame_capture = false;
@@ -354,34 +349,36 @@ namespace rsx
 	{
 		never_inline void image_in(u32 arg)
 		{
-			const u16 src_height = method_registers[NV3089_IMAGE_IN_SIZE] >> 16;
-			const u16 src_pitch = method_registers[NV3089_IMAGE_IN_FORMAT];
-			const u8 src_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16;
-			const u8 src_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
-			const u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
-			const u32 operation = method_registers[NV3089_SET_OPERATION];
+			u32 operation = method_registers[NV3089_SET_OPERATION];
 
-			const u16 out_w = method_registers[NV3089_IMAGE_OUT_SIZE];
-			const u16 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16;
+			u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff;
+			u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16;
+			u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff;
+			u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16;
 
-			// handle weird RSX quirk, doesn't report less than 16 pixels width in some cases 
-			u16 src_width = method_registers[NV3089_IMAGE_IN_SIZE];
-			if (src_width == 16 && out_w < 16 && method_registers[NV3089_DS_DX] == (1 << 20))
+			u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff;
+			u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16;
+			u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff;
+			u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16;
+
+			u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE];
+			u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16;
+			u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT];
+			u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16;
+			u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
+			u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
+
+			f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f;
+			f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f;
+
+			if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER)
 			{
-				src_width = out_w;
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin);
 			}
 
-			const u16 u = method_registers[NV3089_IMAGE_IN]; // inX (currently ignored)
-			const u16 v = method_registers[NV3089_IMAGE_IN] >> 16; // inY (currently ignored)
-
-			if (src_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER)
+			if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH)
 			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", src_origin);
-			}
-
-			if (src_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && src_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH)
-			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", src_inter);
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter);
 			}
 
 			if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY)
@@ -395,6 +392,8 @@ namespace rsx
 			u32 dst_offset;
 			u32 dst_dma = 0;
 			u16 dst_color_format;
+			u32 out_pitch = 0;
+			u32 out_aligment = 64;
 
 			switch (method_registers[NV3089_SET_CONTEXT_SURFACE])
 			{
@@ -402,6 +401,8 @@ namespace rsx
 				dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN];
 				dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN];
 				dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT];
+				out_pitch = method_registers[NV3062_SET_PITCH] >> 16;
+				out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff;
 				break;
 
 			case CELL_GCM_CONTEXT_SWIZZLE2D:
@@ -412,19 +413,39 @@ namespace rsx
 
 			default:
 				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]);
-				break;
-			}
-
-			if (!dst_dma)
-			{
-				LOG_ERROR(RSX, "dst_dma not set");
 				return;
 			}
 
-			LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_offset, dst_offset);
+			u32 src_address = get_address(src_offset, src_dma);
+			u32 dst_address = get_address(dst_offset, dst_dma);
 
-			u8* pixels_src = vm::_ptr<u8>(get_address(src_offset, src_dma));
-			u8* pixels_dst = vm::_ptr<u8>(get_address(dst_offset, dst_dma));
+			u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
+			u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
+
+			if (out_pitch == 0)
+			{
+				out_pitch = out_bpp * out_w;
+			}
+
+			if (in_pitch == 0)
+			{
+				in_pitch = in_bpp * in_w;
+			}
+
+			if (clip_w > out_w)
+			{
+				clip_w = out_w;
+			}
+
+			if (clip_h > out_h)
+			{
+				clip_h = out_h;
+			}
+
+			//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
+
+			u8* pixels_src = vm::_ptr<u8>(src_address);
+			u8* pixels_dst = vm::_ptr<u8>(dst_address);
 
 			if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
 				dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
@@ -438,38 +459,103 @@ namespace rsx
 				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format);
 			}
 
-			LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x",
-				method_registers[NV3089_IMAGE_IN_SIZE], src_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
-				method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
-
-			const u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
-			const u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
+			//LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x",
+			//	method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
+			//	method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
 
 			std::unique_ptr<u8[]> temp1, temp2;
 
-			// resize/convert if necessary
-			if (in_bpp != out_bpp && src_width != out_w && src_height != out_h)
+			AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
+			AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
+
+			u32 out_offset = out_x * out_bpp + out_pitch * out_y;
+
+			bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT];
+			bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h;
+
+			u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f));
+
+			if (slice_h)
 			{
-				temp1.reset(new u8[out_bpp * out_w * out_h]);
-
-				AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
-				AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
-
-				std::unique_ptr<SwsContext, void(*)(SwsContext*)> sws(sws_getContext(src_width, src_height, in_format, out_w, out_h, out_format,
-					src_inter ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext);
-
-				int in_line = in_bpp * src_width;
-				u8* out_ptr = temp1.get();
-				int out_line = out_bpp * out_w;
-
-				sws_scale(sws.get(), &pixels_src, &in_line, 0, src_height, &out_ptr, &out_line);
-
-				pixels_src = out_ptr; // use resized image as a source
+				if (clip_h < out_h)
+				{
+					--slice_h;
+				}
+			}
+			else
+			{
+				slice_h = clip_h;
 			}
 
-			// Not sure if swizzle should be after clipping or not
-			if (method_registers[NV3089_SET_CONTEXT_SURFACE] == CELL_GCM_CONTEXT_SWIZZLE2D)
+			if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D)
 			{
+				if (need_convert || need_clip)
+				{
+					if (need_clip)
+					{
+						if (need_convert)
+						{
+							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
+								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
+
+							clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
+						}
+						else
+						{
+							clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
+						}
+					}
+					else
+					{
+						convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch,
+							pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
+					}
+				}
+				else
+				{
+					if (out_pitch != in_pitch || out_pitch != out_bpp * out_w)
+					{
+						for (u32 y = 0; y < out_h; ++y)
+						{
+							u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y);
+							u8 *src = pixels_src + in_pitch * y;
+
+							std::memmove(dst, src, out_w * out_bpp);
+						}
+					}
+					else
+					{
+						std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h);
+					}
+				}
+			}
+			else
+			{
+				if (need_convert || need_clip)
+				{
+					if (need_clip)
+					{
+						if (need_convert)
+						{
+							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
+								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
+
+							clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
+						}
+						else
+						{
+							clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
+						}
+					}
+					else
+					{
+						convert_scale_image(temp2, out_format, out_w, out_h, out_pitch,
+							pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false);
+					}
+
+					pixels_src = temp2.get();
+				}
+
 				u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
 				u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
 
@@ -480,8 +566,6 @@ namespace rsx
 				u16 sw_width = 1 << sw_width_log2;
 				u16 sw_height = 1 << sw_height_log2;
 
-				std::unique_ptr<u8[]> sw_temp;
-
 				temp2.reset(new u8[out_bpp * sw_width * sw_height]);
 
 				u8* linear_pixels = pixels_src;
@@ -490,7 +574,7 @@ namespace rsx
 				// Check and pad texture out if we are given non square texture for swizzle to be correct
 				if (sw_width != out_w || sw_height != out_h)
 				{
-					sw_temp.reset(new u8[out_bpp * sw_width * sw_height]);
+					std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]);
 
 					switch (out_bpp)
 					{
@@ -521,59 +605,7 @@ namespace rsx
 					break;
 				}
 
-				//pixels_src = swizzled_pixels;
-
-				// TODO: Handle Clipping/Image out when swizzled
 				std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
-				return;
-			}
-
-			// clip if necessary
-			if (method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_OUT_SIZE] ||
-				method_registers[NV3089_CLIP_POINT] || method_registers[NV3089_IMAGE_OUT_POINT])
-			{
-				// Note: There are cases currently where the if statement above is true, but this for loop doesn't hit, leading to nothing getting copied to pixels_dst
-				// Currently it seems needed to avoid some errors/crashes
-				for (s32 y = (method_registers[NV3089_CLIP_POINT] >> 16), dst_y = (method_registers[NV3089_IMAGE_OUT_POINT] >> 16); y < out_h; y++, dst_y++)
-				{
-					if (dst_y >= 0 && dst_y < method_registers[NV3089_IMAGE_OUT_SIZE] >> 16)
-					{
-						// destination line
-						u8* dst_line = pixels_dst + dst_y * out_bpp * (method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff)
-							+ std::min<s32>(std::max<s32>(method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff, 0), method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff);
-
-						size_t dst_max = std::min<s32>(
-							std::max<s32>((s32)(method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) - (method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff), 0),
-							method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) * out_bpp;
-
-						if (y >= 0 && y < std::min<s32>(method_registers[NV3089_CLIP_SIZE] >> 16, out_h))
-						{
-							// source line
-							u8* src_line = pixels_src + y * out_bpp * out_w +
-								std::min<s32>(std::max<s32>(method_registers[NV3089_CLIP_POINT] & 0xffff, 0), method_registers[NV3089_CLIP_SIZE] & 0xffff);
-							size_t src_max = std::min<s32>(
-								std::max<s32>((s32)(method_registers[NV3089_CLIP_SIZE] & 0xffff) - (method_registers[NV3089_CLIP_POINT] & 0xffff), 0),
-								method_registers[NV3089_CLIP_SIZE] & 0xffff) * out_bpp;
-
-							std::pair<u8*, size_t>
-								z0 = { src_line + 0, std::min<size_t>(dst_max, std::max<s64>(0, method_registers[NV3089_CLIP_POINT] & 0xffff)) },
-								d0 = { src_line + z0.second, std::min<size_t>(dst_max - z0.second, src_max) },
-								z1 = { src_line + d0.second, dst_max - z0.second - d0.second };
-
-							std::memset(z0.first, 0, z0.second);
-							std::memcpy(d0.first, src_line, d0.second);
-							std::memset(z1.first, 0, z1.second);
-						}
-						else
-						{
-							std::memset(dst_line, 0, dst_max);
-						}
-					}
-				}
-			}
-			else
-			{
-				std::memcpy(pixels_dst, pixels_src, out_w * out_h * out_bpp);
 			}
 		}
 	}

From 509bbddac1c7dc58311eceabc0860c080d19f600 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 22:27:41 +0200
Subject: [PATCH 04/13] OpenGL renderer: use pitch as image row length Fixed
 rsx_utils code style

---
 rpcs3/Emu/RSX/GL/GLGSRender.cpp     |  33 +++++++-
 rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 115 ++++++++++++++++++----------
 rpcs3/Emu/RSX/rsx_utils.h           |  49 +++++++-----
 3 files changed, 132 insertions(+), 65 deletions(-)

diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 46edb5f7e5..7fb1ed6811 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -948,6 +948,14 @@ static const u32 mr_color_dma[rsx::limits::color_buffers_count] =
 	NV4097_SET_CONTEXT_DMA_COLOR_D
 };
 
+static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
+{
+	NV4097_SET_SURFACE_PITCH_A,
+	NV4097_SET_SURFACE_PITCH_B,
+	NV4097_SET_SURFACE_PITCH_C,
+	NV4097_SET_SURFACE_PITCH_D
+};
+
 void GLGSRender::read_buffers()
 {
 	if (!draw_fbo)
@@ -963,7 +971,16 @@ void GLGSRender::read_buffers()
 		{
 			for (int i = index; i < index + count; ++i)
 			{
-				u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]);
+				u32 offset = rsx::method_registers[mr_color_offset[i]];
+				u32 location = rsx::method_registers[mr_color_dma[i]];
+				u32 pitch = rsx::method_registers[mr_color_pitch[i]];
+
+				if (pitch <= 64)
+					continue;
+
+				m_draw_tex_color[i].pixel_unpack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
+
+				u32 color_address = rsx::get_address(offset, location);
 				__glcheck m_draw_tex_color[i].copy_from(vm::base(color_address), color_format.format, color_format.type);
 			}
 		};
@@ -1065,7 +1082,16 @@ void GLGSRender::write_buffers()
 
 				//}, gl::buffer::access::read);
 
-				u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]);
+				u32 offset = rsx::method_registers[mr_color_offset[i]];
+				u32 location = rsx::method_registers[mr_color_dma[i]];
+				u32 pitch = rsx::method_registers[mr_color_pitch[i]];
+
+				if (pitch <= 64)
+					continue;
+
+				m_draw_tex_color[i].pixel_pack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
+
+				u32 color_address = rsx::get_address(offset, location);
 				__glcheck m_draw_tex_color[i].copy_to(vm::base(color_address), color_format.format, color_format.type);
 			}
 		};
@@ -1173,8 +1199,7 @@ void GLGSRender::flip(int buffer)
 				.type(gl::texture::type::uint_8_8_8_8)
 				.format(gl::texture::format::bgra);
 
-			m_flip_tex_color.pixel_unpack_settings().aligment(1);
-			m_flip_tex_color.pixel_pack_settings().aligment(1);
+			m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch);
 
 			__glcheck m_flip_fbo.recreate();
 			__glcheck m_flip_fbo.color = m_flip_tex_color;
diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp
index 6653eace01..baad5818f7 100644
--- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp
+++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp
@@ -4,11 +4,45 @@
 #include "../GCM.h"
 #include "../RSXThread.h"
 #include "../RSXTexture.h"
+#include "../rsx_utils.h"
 
 namespace rsx
 {
 	namespace gl
 	{
+		static const int gl_tex_min_filter[] =
+		{
+			GL_NEAREST, // unused
+			GL_NEAREST,
+			GL_LINEAR,
+			GL_NEAREST_MIPMAP_NEAREST,
+			GL_LINEAR_MIPMAP_NEAREST,
+			GL_NEAREST_MIPMAP_LINEAR,
+			GL_LINEAR_MIPMAP_LINEAR,
+			GL_NEAREST, // CELL_GCM_TEXTURE_CONVOLUTION_MIN
+		};
+
+		static const int gl_tex_mag_filter[] =
+		{
+			GL_NEAREST, // unused
+			GL_NEAREST,
+			GL_LINEAR,
+			GL_NEAREST, // unused
+			GL_LINEAR  // CELL_GCM_TEXTURE_CONVOLUTION_MAG
+		};
+
+		static const int gl_tex_zfunc[] =
+		{
+			GL_NEVER,
+			GL_LESS,
+			GL_EQUAL,
+			GL_LEQUAL,
+			GL_GREATER,
+			GL_NOTEQUAL,
+			GL_GEQUAL,
+			GL_ALWAYS,
+		};
+
 		void texture::create()
 		{
 			if (m_id)
@@ -59,7 +93,9 @@ namespace rsx
 		void texture::init(rsx::texture& tex)
 		{
 			if (!m_id)
+			{
 				create();
+			}
 
 			bind();
 
@@ -69,8 +105,10 @@ namespace rsx
 
 			//TODO: safe init
 
-			int format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
-			bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
+			u32 full_format = tex.format();
+
+			u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
+			bool is_swizzled = ~full_format & CELL_GCM_TEXTURE_LN;
 
 			const u8* pixels = vm::ps3::_ptr<u8>(texaddr);
 			u8 *unswizzledPixels;
@@ -78,10 +116,16 @@ namespace rsx
 			// NOTE: This must be in ARGB order in all forms below.
 			const GLint *glRemap = glRemapStandard;
 
+			::gl::pixel_pack_settings().apply();
+			::gl::pixel_unpack_settings().apply();
+
+			u32 pitch = tex.pitch();
+
 			switch (format)
 			{
 			case CELL_GCM_TEXTURE_B8: // One 8-bit fixed-point number
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BLUE, GL_UNSIGNED_BYTE, pixels);
 
 				static const GLint swizzleMaskB8[] = { GL_BLUE, GL_BLUE, GL_BLUE, GL_BLUE };
@@ -92,6 +136,7 @@ namespace rsx
 			case CELL_GCM_TEXTURE_A1R5G5B5:
 			{
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 
 				// TODO: texture swizzling
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, pixels);
@@ -101,6 +146,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_A4R4G4B4:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4, pixels);
 
 				// We read it in as R4G4B4A4, so we need to remap each component.
@@ -111,6 +157,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_R5G6B5:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, tex.width(), tex.height(), 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, pixels);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@@ -119,6 +166,8 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_A8R8G8B8:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
+
 				if (is_swizzled)
 				{
 					u32 *src, *dst;
@@ -167,6 +216,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_G8B8:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_UNSIGNED_BYTE, pixels);
 
 				static const GLint swizzleMaskG8B8[] = { GL_RED, GL_GREEN, GL_RED, GL_GREEN };
@@ -176,6 +226,8 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_R6G5B5:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
+
 				// TODO: Probably need to actually unswizzle if is_swizzled.
 				const u32 numPixels = tex.width() * tex.height();
 				unswizzledPixels = (u8 *)malloc(numPixels * 4);
@@ -196,30 +248,36 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_DEPTH24_D8: //  24-bit unsigned fixed-point number and 8 bits of garbage
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
+
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, pixels);
 				break;
 			}
 
 			case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // 24-bit unsigned float and 8 bits of garbage
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_FLOAT, pixels);
 				break;
 			}
 
 			case CELL_GCM_TEXTURE_DEPTH16: // 16-bit unsigned fixed-point number
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_SHORT, pixels);
 				break;
 			}
 
 			case CELL_GCM_TEXTURE_DEPTH16_FLOAT: // 16-bit unsigned float
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_FLOAT, pixels);
 				break;
 			}
 
 			case CELL_GCM_TEXTURE_X16: // A 16-bit fixed-point number
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RED, GL_UNSIGNED_SHORT, pixels);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@@ -231,6 +289,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_Y16_X16: // Two 16-bit fixed-point numbers
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_UNSIGNED_SHORT, pixels);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@@ -241,6 +300,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_R5G5B5A1:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, pixels);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@@ -249,6 +309,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: // Four fp16 values
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 8);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_HALF_FLOAT, pixels);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@@ -257,12 +318,16 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: // Four fp32 values
 			{
-				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_FLOAT, pixels);
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 16);
+				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
+				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_FLOAT, pixels);
+				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
 				break;
 			}
 
 			case CELL_GCM_TEXTURE_X32_FLOAT: // One 32-bit floating-point number
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RED, GL_FLOAT, pixels);
 
 				static const GLint swizzleMaskX32_FLOAT[] = { GL_RED, GL_ONE, GL_ONE, GL_ONE };
@@ -272,9 +337,9 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_D1R5G5B5:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 
-
 				// TODO: Texture swizzling
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, pixels);
 
@@ -287,6 +352,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_D8R8G8B8: // 8 bits of garbage and three unsigned 8-bit fixed-point numbers
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, pixels);
 
 				static const GLint swizzleMaskX32_D8R8G8B8[] = { GL_ONE, GL_RED, GL_GREEN, GL_BLUE };
@@ -297,6 +363,7 @@ namespace rsx
 
 			case CELL_GCM_TEXTURE_Y16_X16_FLOAT: // Two fp16 values
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
 				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_HALF_FLOAT, pixels);
 				glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@@ -308,6 +375,8 @@ namespace rsx
 
 			case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
+
 				const u32 numPixels = tex.width() * tex.height();
 				unswizzledPixels = (u8 *)malloc(numPixels * 4);
 				// TODO: Speed.
@@ -332,6 +401,8 @@ namespace rsx
 
 			case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
 			{
+				glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
+
 				const u32 numPixels = tex.width() * tex.height();
 				unswizzledPixels = (u8 *)malloc(numPixels * 4);
 				// TODO: Speed.
@@ -385,18 +456,6 @@ namespace rsx
 				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, glRemap[3]);
 			}
 
-			static const int gl_tex_zfunc[] =
-			{
-				GL_NEVER,
-				GL_LESS,
-				GL_EQUAL,
-				GL_LEQUAL,
-				GL_GREATER,
-				GL_NOTEQUAL,
-				GL_GEQUAL,
-				GL_ALWAYS,
-			};
-
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, gl_wrap(tex.wrap_s()));
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, gl_wrap(tex.wrap_t()));
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_R, gl_wrap(tex.wrap_r()));
@@ -407,34 +466,10 @@ namespace rsx
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8));
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8));
 
-
-
-			static const int gl_tex_min_filter[] =
-			{
-				GL_NEAREST, // unused
-				GL_NEAREST,
-				GL_LINEAR,
-				GL_NEAREST_MIPMAP_NEAREST,
-				GL_LINEAR_MIPMAP_NEAREST,
-				GL_NEAREST_MIPMAP_LINEAR,
-				GL_LINEAR_MIPMAP_LINEAR,
-				GL_NEAREST, // CELL_GCM_TEXTURE_CONVOLUTION_MIN
-			};
-
-			static const int gl_tex_mag_filter[] = {
-				GL_NEAREST, // unused
-				GL_NEAREST,
-				GL_LINEAR,
-				GL_NEAREST, // unused
-				GL_LINEAR  // CELL_GCM_TEXTURE_CONVOLUTION_MAG
-			};
-
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl_tex_min_filter[tex.min_filter()]);
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, gl_tex_mag_filter[tex.mag_filter()]);
 			glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_aniso(tex.max_aniso()));
 
-			//Unbind();
-
 			if (is_swizzled && format == CELL_GCM_TEXTURE_A8R8G8B8)
 			{
 				free(unswizzledPixels);
diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h
index 33ad93be5f..7fdff4d2dd 100644
--- a/rpcs3/Emu/RSX/rsx_utils.h
+++ b/rpcs3/Emu/RSX/rsx_utils.h
@@ -8,17 +8,16 @@ extern "C"
 namespace rsx
 {
 	template<typename T>
-	void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight)
+	void pad_texture(void* input_pixels, void* output_pixels, u16 input_width, u16 input_height, u16 output_width, u16 output_height)
 	{
-		T *src, *dst;
-		src = static_cast<T*>(inputPixels);
-		dst = static_cast<T*>(outputPixels);
+		T *src = static_cast<T*>(input_pixels);
+		T *dst = static_cast<T*>(output_pixels);
 
 		for (u16 h = 0; h < inputHeight; ++h)
 		{
-			const u32 padded_pos = h * outputWidth;
-			const u32 pos = h * inputWidth;
-			for (u16 w = 0; w < inputWidth; ++w)
+			const u32 padded_pos = h * output_width;
+			const u32 pos = h * input_width;
+			for (u16 w = 0; w < input_width; ++w)
 			{
 				dst[padded_pos + w] = src[pos + w];
 			}
@@ -31,12 +30,10 @@ namespace rsx
 	*	 Restriction: It has mixed results if the height or width is not a power of 2
 	*/
 	template<typename T>
-	void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled)
+	void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, bool input_is_swizzled)
 	{
-		u32 log2width, log2height;
-
-		log2width = log2(width);
-		log2height = log2(height);
+		u32 log2width = log2(width);
+		u32 log2height = log2(height);
 
 		// Max mask possible for square texture
 		u32 x_mask = 0x55555555;
@@ -57,38 +54,48 @@ namespace rsx
 		u32 offs_x0 = 0; //total y-carry offset for x
 		u32 y_incr = limit_mask;
 
-		T *src, *dst;
-
-		if (!inputIsSwizzled)
+		if (!input_is_swizzled)
 		{
 			for (int y = 0; y < height; ++y)
 			{
-				src = static_cast<T*>(inputPixels) + y*width;
-				dst = static_cast<T*>(outputPixels) + offs_y;
+				T *src = static_cast<T*>(input_pixels) + y * width;
+				T *dst = static_cast<T*>(output_pixels) + offs_y;
 				offs_x = offs_x0;
+
 				for (int x = 0; x < width; ++x)
 				{
 					dst[offs_x] = src[x];
 					offs_x = (offs_x - x_mask) & x_mask;
 				}
+
 				offs_y = (offs_y - y_mask) & y_mask;
-				if (offs_y == 0) offs_x0 += y_incr;
+
+				if (offs_y == 0)
+				{
+					offs_x0 += y_incr;
+				}
 			}
 		}
 		else
 		{
 			for (int y = 0; y < height; ++y)
 			{
-				src = static_cast<T*>(inputPixels) + offs_y;
-				dst = static_cast<T*>(outputPixels) + y*width;
+				T *src = static_cast<T*>(input_pixels) + offs_y;
+				T *dst = static_cast<T*>(output_pixels) + y * width;
 				offs_x = offs_x0;
+
 				for (int x = 0; x < width; ++x)
 				{
 					dst[x] = src[offs_x];
 					offs_x = (offs_x - x_mask) & x_mask;
 				}
+
 				offs_y = (offs_y - y_mask) & y_mask;
-				if (offs_y == 0) offs_x0 += y_incr;
+
+				if (offs_y == 0)
+				{
+					offs_x0 += y_incr;
+				}
 			}
 		}
 	}

From dbccf5fbad69dc9fd12cf35518662a79821eb61d Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 22:38:18 +0200
Subject: [PATCH 05/13] gl: fixed multiple textures binding

---
 rpcs3/Emu/RSX/GL/GLGSRender.cpp     | 6 ++++--
 rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 4 ++--
 rpcs3/Emu/RSX/GL/rsx_gl_texture.h   | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 7fb1ed6811..1e553ce533 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -325,13 +325,15 @@ void GLGSRender::end()
 	for (int i = 0; i < rsx::limits::textures_count; ++i)
 	{
 		if (!textures[i].enabled())
+		{
 			continue;
+		}
 
 		int location;
 		if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
 		{
-			__glcheck m_gl_textures[i].init(textures[i]);
-			__glcheck m_program->uniforms.texture(location, i, gl::texture_view(gl::texture::target::texture2D, m_gl_textures[i].id()));
+			__glcheck m_gl_textures[i].init(i, textures[i]);
+			glProgramUniform1i(m_program->id(), location, i);
 		}
 	}
 
diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp
index baad5818f7..132378acdd 100644
--- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp
+++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp
@@ -51,7 +51,6 @@ namespace rsx
 			}
 
 			glGenTextures(1, &m_id);
-			bind();
 		}
 
 		int texture::gl_wrap(int wrap)
@@ -90,13 +89,14 @@ namespace rsx
 			return 1.0f;
 		}
 
-		void texture::init(rsx::texture& tex)
+		void texture::init(int index, rsx::texture& tex)
 		{
 			if (!m_id)
 			{
 				create();
 			}
 
+			glActiveTexture(GL_TEXTURE0 + index);
 			bind();
 
 			const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.h b/rpcs3/Emu/RSX/GL/rsx_gl_texture.h
index ace0b86c2e..1b66efa05a 100644
--- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.h
+++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.h
@@ -34,7 +34,7 @@ namespace rsx
 				return (v << 2) | (v >> 4);
 			}
 
-			void init(rsx::texture& tex);
+			void init(int index, rsx::texture& tex);
 			void bind();
 			void unbind();
 			void remove();

From ba12c489ec65e6070e78fcea5bc9ef719c2969b9 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 22:55:43 +0200
Subject: [PATCH 06/13] gl: using tiled region for read/write color buffers and
 flip gl: fixed flip buffer row length compilation fixes

---
 rpcs3/Emu/RSX/Common/TextureUtils.cpp |  1 +
 rpcs3/Emu/RSX/GL/GLGSRender.cpp       | 67 ++++++++++++++++++++++++---
 rpcs3/Emu/RSX/RSXThread.cpp           |  6 +--
 rpcs3/Emu/RSX/RSXThread.h             |  4 +-
 rpcs3/Emu/RSX/rsx_utils.h             |  2 +-
 5 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp
index 99a68b1d09..a54297475b 100644
--- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp
+++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp
@@ -2,6 +2,7 @@
 #include "Emu/Memory/vm.h"
 #include "TextureUtils.h"
 #include "../RSXThread.h"
+#include "../rsx_utils.h"
 
 
 #define MAX2(a, b) ((a) > (b)) ? (a) : (b)
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 1e553ce533..7dab5f83e3 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -971,6 +971,10 @@ void GLGSRender::read_buffers()
 
 		auto read_color_buffers = [&](int index, int count)
 		{
+			u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
+			u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
+
+
 			for (int i = index; i < index + count; ++i)
 			{
 				u32 offset = rsx::method_registers[mr_color_offset[i]];
@@ -982,8 +986,19 @@ void GLGSRender::read_buffers()
 
 				m_draw_tex_color[i].pixel_unpack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
 
-				u32 color_address = rsx::get_address(offset, location);
-				__glcheck m_draw_tex_color[i].copy_from(vm::base(color_address), color_format.format, color_format.type);
+				rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
+
+				if (!color_buffer.tile)
+				{
+					__glcheck m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type);
+				}
+				else
+				{
+					std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
+					color_buffer.read(buffer.get(), width, height, pitch);
+
+					__glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type);
+				}
 			}
 		};
 
@@ -1016,6 +1031,12 @@ void GLGSRender::read_buffers()
 
 	if (rpcs3::state.config.rsx.opengl.read_depth_buffer)
 	{
+		//TODO: use pitch
+		u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
+
+		if (pitch <= 64)
+			return;
+
 		auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
 
 		int pixel_size = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 2 : 4;
@@ -1065,6 +1086,9 @@ void GLGSRender::write_buffers()
 
 		auto write_color_buffers = [&](int index, int count)
 		{
+			u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
+			u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
+
 			for (int i = index; i < index + count; ++i)
 			{
 				//TODO: swizzle
@@ -1093,8 +1117,20 @@ void GLGSRender::write_buffers()
 
 				m_draw_tex_color[i].pixel_pack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
 
-				u32 color_address = rsx::get_address(offset, location);
-				__glcheck m_draw_tex_color[i].copy_to(vm::base(color_address), color_format.format, color_format.type);
+				rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
+
+				if (!color_buffer.tile)
+				{
+					__glcheck m_draw_tex_color[i].copy_to(color_buffer.ptr, color_format.format, color_format.type);
+				}
+				else
+				{
+					std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
+
+					__glcheck m_draw_tex_color[i].copy_to(buffer.get(), color_format.format, color_format.type);
+
+					color_buffer.write(buffer.get(), width, height, pitch);
+				}
 			}
 		};
 
@@ -1127,6 +1163,12 @@ void GLGSRender::write_buffers()
 
 	if (rpcs3::state.config.rsx.opengl.write_depth_buffer)
 	{
+		//TODO: use pitch
+		u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
+
+		if (pitch <= 64)
+			return;
+
 		auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
 
 		gl::buffer pbo_depth;
@@ -1169,7 +1211,9 @@ void GLGSRender::flip(int buffer)
 	u32 buffer_width = gcm_buffers[buffer].width;
 	u32 buffer_height = gcm_buffers[buffer].height;
 	u32 buffer_pitch = gcm_buffers[buffer].pitch;
-	u32 buffer_address = rsx::get_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
+
+	rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
+
 	bool skip_read = false;
 
 	if (draw_fbo && !rpcs3::state.config.rsx.opengl.write_color_buffers)
@@ -1201,7 +1245,7 @@ void GLGSRender::flip(int buffer)
 				.type(gl::texture::type::uint_8_8_8_8)
 				.format(gl::texture::format::bgra);
 
-			m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch);
+			m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4);
 
 			__glcheck m_flip_fbo.recreate();
 			__glcheck m_flip_fbo.color = m_flip_tex_color;
@@ -1218,7 +1262,16 @@ void GLGSRender::flip(int buffer)
 		glDisable(GL_LOGIC_OP);
 		glDisable(GL_CULL_FACE);
 
-		__glcheck m_flip_tex_color.copy_from(vm::base(buffer_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
+		if (buffer_region.tile)
+		{
+			std::unique_ptr<u8> temp(new u8[buffer_height * buffer_pitch]);
+			buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
+			__glcheck m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
+		}
+		else
+		{
+			__glcheck m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
+		}
 	}
 
 	areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index c68ec9421e..d7846797e3 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -968,7 +968,7 @@ namespace rsx
 		}
 	}
 	
-	void tiled_address::write(const void *src, u32 width, u32 height, u32 pitch)
+	void tiled_region::write(const void *src, u32 width, u32 height, u32 pitch)
 	{
 		if (!tile)
 		{
@@ -1021,7 +1021,7 @@ namespace rsx
 		}
 	}
 
-	void tiled_address::read(void *dst, u32 width, u32 height, u32 pitch)
+	void tiled_region::read(void *dst, u32 width, u32 height, u32 pitch)
 	{
 		if (!tile)
 		{
@@ -1494,7 +1494,7 @@ namespace rsx
 		return nullptr;
 	}
 
-	tiled_address thread::get_tiled_address(u32 offset, u32 location)
+	tiled_region thread::get_tiled_address(u32 offset, u32 location)
 	{
 		u32 address = get_address(offset, location);
 
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index 620d989d1f..e4537aa13f 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -215,7 +215,7 @@ namespace rsx
 
 	u32 get_address(u32 offset, u32 location);
 
-	struct tiled_address
+	struct tiled_region
 	{
 		u32 address;
 		u32 base;
@@ -429,7 +429,7 @@ namespace rsx
 		void reset();
 		void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);
 
-		tiled_address get_tiled_address(u32 offset, u32 location);
+		tiled_region get_tiled_address(u32 offset, u32 location);
 		GcmTileInfo *find_tile(u32 offset, u32 location);
 
 		u32 ReadIO32(u32 addr);
diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h
index 7fdff4d2dd..bb8b08ed45 100644
--- a/rpcs3/Emu/RSX/rsx_utils.h
+++ b/rpcs3/Emu/RSX/rsx_utils.h
@@ -13,7 +13,7 @@ namespace rsx
 		T *src = static_cast<T*>(input_pixels);
 		T *dst = static_cast<T*>(output_pixels);
 
-		for (u16 h = 0; h < inputHeight; ++h)
+		for (u16 h = 0; h < input_height; ++h)
 		{
 			const u32 padded_pos = h * output_width;
 			const u32 pos = h * input_width;

From 48919330d7a57cb97d1283c7365c0f23d7c20068 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 23:29:49 +0200
Subject: [PATCH 07/13] rsx methods moved from rsx thread

---
 rpcs3/Emu/RSX/Common/BufferUtils.cpp          |   1 +
 rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp           |   1 +
 rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp         |   1 +
 rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp    |   1 +
 rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp |   1 +
 rpcs3/Emu/RSX/GL/GLGSRender.cpp               |   1 +
 rpcs3/Emu/RSX/RSXTexture.cpp                  |   1 +
 rpcs3/Emu/RSX/RSXThread.cpp                   | 831 +-----------------
 rpcs3/Emu/RSX/RSXThread.h                     |  63 --
 rpcs3/emucore.vcxproj                         |   2 +
 rpcs3/emucore.vcxproj.filters                 |  12 +-
 11 files changed, 19 insertions(+), 896 deletions(-)

diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp
index af3b0a9684..591da7b89b 100644
--- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp
+++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp
@@ -1,5 +1,6 @@
 #include "stdafx.h"
 #include "BufferUtils.h"
+#include "../rsx_methods.h"
 
 #define MIN2(x, y) ((x) < (y)) ? (x) : (y)
 #define MAX2(x, y) ((x) > (y)) ? (x) : (y)
diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
index 28bda5f5a5..fc9c3b23a7 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
@@ -6,6 +6,7 @@
 #include "d3dx12.h"
 #include "../Common/BufferUtils.h"
 #include "D3D12Formats.h"
+#include "../rsx_methods.h"
 
 namespace
 {
diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
index dd8629fb7f..f6d50187d1 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
@@ -10,6 +10,7 @@
 #include <d3d11on12.h>
 #include "Emu/state.h"
 #include "D3D12Formats.h"
+#include "../rsx_methods.h"
 
 PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice;
 PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface;
diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp
index 89392c0b96..477b05acf5 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp
@@ -5,6 +5,7 @@
 #include "D3D12GSRender.h"
 #include "Emu/state.h"
 #include "D3D12Formats.h"
+#include "../rsx_methods.h"
 
 #define TO_STRING(x) #x
 
diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp
index 95f50089bd..c940631c59 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp
@@ -7,6 +7,7 @@
 #include "Emu/System.h"
 #include "Emu/state.h"
 #include "Emu/RSX/GSRender.h"
+#include "../rsx_methods.h"
 
 #include "D3D12.h"
 #include "D3D12GSRender.h"
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 7dab5f83e3..95f5ac3348 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -4,6 +4,7 @@
 #include "Emu/System.h"
 #include "Emu/state.h"
 #include "GLGSRender.h"
+#include "../rsx_methods.h"
 
 #define DUMP_VERTEX_DATA 0
 
diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp
index 017b4e9146..a6eafa09b6 100644
--- a/rpcs3/Emu/RSX/RSXTexture.cpp
+++ b/rpcs3/Emu/RSX/RSXTexture.cpp
@@ -2,6 +2,7 @@
 #include "Emu/Memory/Memory.h"
 #include "RSXThread.h"
 #include "RSXTexture.h"
+#include "rsx_methods.h"
 
 namespace rsx
 {
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index d7846797e3..fd5e3f8554 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -10,7 +10,7 @@
 #include "Emu/SysCalls/lv2/sys_time.h"
 
 #include "Common/BufferUtils.h"
-#include "rsx_utils.h"
+#include "rsx_methods.h"
 
 #define CMD_DEBUG 0
 
@@ -19,835 +19,6 @@ frame_capture_data frame_debug;
 
 namespace rsx
 {
-	using rsx_method_t = void(*)(thread*, u32);
-
-	u32 method_registers[0x10000 >> 2];
-	rsx_method_t methods[0x10000 >> 2]{};
-
-	template<typename Type> struct vertex_data_type_from_element_type;
-	template<> struct vertex_data_type_from_element_type<float> { enum { type = CELL_GCM_VERTEX_F }; };
-	template<> struct vertex_data_type_from_element_type<f16> { enum { type = CELL_GCM_VERTEX_SF }; };
-	template<> struct vertex_data_type_from_element_type<u8> { enum { type = CELL_GCM_VERTEX_UB }; };
-	template<> struct vertex_data_type_from_element_type<u16> { enum { type = CELL_GCM_VERTEX_S1 }; };
-
-	namespace nv406e
-	{
-		force_inline void set_reference(thread* rsx, u32 arg)
-		{
-			rsx->ctrl->ref.exchange(arg);
-		}
-
-		force_inline void semaphore_acquire(thread* rsx, u32 arg)
-		{
-			//TODO: dma
-			while (vm::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg)
-			{
-				if (Emu.IsStopped())
-					break;
-
-				std::this_thread::sleep_for(std::chrono::milliseconds(1));
-			}
-		}
-
-		force_inline void semaphore_release(thread* rsx, u32 arg)
-		{
-			//TODO: dma
-			vm::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg);
-		}
-	}
-
-	namespace nv4097
-	{
-		force_inline void texture_read_semaphore_release(thread* rsx, u32 arg)
-		{
-			//TODO: dma
-			vm::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg);
-		}
-
-		force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg)
-		{
-			//TODO: dma
-			vm::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET],
-				(arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff));
-		}
-
-		//fire only when all data passed to rsx cmd buffer
-		template<u32 id, u32 index, int count, typename type>
-		force_inline void set_vertex_data_impl(thread* rsx, u32 arg)
-		{
-			static const size_t element_size = (count * sizeof(type));
-			static const size_t element_size_in_words = element_size / sizeof(u32);
-
-			auto& info = rsx->register_vertex_info[index];
-
-			info.type = vertex_data_type_from_element_type<type>::type;
-			info.size = count;
-			info.frequency = 0;
-			info.stride = 0;
-
-			auto& entry = rsx->register_vertex_data[index];
-
-			//find begin of data
-			size_t begin = id + index * element_size_in_words;
-
-			size_t position = 0;//entry.size();
-			entry.resize(position + element_size);
-
-			memcpy(entry.data() + position, method_registers + begin, element_size);
-		}
-
-		template<u32 index>
-		struct set_vertex_data4ub_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, u8>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data1f_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, f32>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data2f_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, f32>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data3f_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, f32>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data4f_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, f32>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data2s_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, u16>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data4s_m
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, u16>(rsx, arg);
-			}
-		};
-
-		template<u32 index>
-		struct set_vertex_data_array_format
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				auto& info = rsx->vertex_arrays_info[index];
-				info.unpack_array(arg);
-			}
-		};
-
-		force_inline void draw_arrays(thread* rsx, u32 arg)
-		{
-			rsx->draw_command = thread::Draw_command::draw_command_array;
-			u32 first = arg & 0xffffff;
-			u32 count = (arg >> 24) + 1;
-
-			rsx->load_vertex_data(first, count);
-		}
-
-		force_inline void draw_index_array(thread* rsx, u32 arg)
-		{
-			rsx->draw_command = thread::Draw_command::draw_command_indexed;
-			u32 first = arg & 0xffffff;
-			u32 count = (arg >> 24) + 1;
-
-			rsx->load_vertex_data(first, count);
-			rsx->load_vertex_index_data(first, count);
-		}
-
-		force_inline void draw_inline_array(thread* rsx, u32 arg)
-		{
-			rsx->draw_command = thread::Draw_command::draw_command_inlined_array;
-			rsx->draw_inline_vertex_array = true;
-			rsx->inline_vertex_array.push_back(arg);
-		}
-
-		template<u32 index>
-		struct set_transform_constant
-		{
-			force_inline static void impl(thread* rsxthr, u32 arg)
-			{
-				u32 load = method_registers[NV4097_SET_TRANSFORM_CONSTANT_LOAD];
-
-				static const size_t count = 4;
-				static const size_t size = count * sizeof(f32);
-
-				size_t reg = index / 4;
-				size_t subreg = index % 4;
-
-				memcpy(rsxthr->transform_constants[load + reg].rgba + subreg, method_registers + NV4097_SET_TRANSFORM_CONSTANT + reg * count + subreg, sizeof(f32));
-			}
-		};
-
-		template<u32 index>
-		struct set_transform_program
-		{
-			force_inline static void impl(thread* rsx, u32 arg)
-			{
-				u32& load = method_registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD];
-
-				static const size_t count = 4;
-				static const size_t size = count * sizeof(u32);
-
-				memcpy(rsx->transform_program + load++ * count, method_registers + NV4097_SET_TRANSFORM_PROGRAM + index * count, size);
-			}
-		};
-
-		force_inline void set_begin_end(thread* rsx, u32 arg)
-		{
-			if (arg)
-			{
-				rsx->draw_inline_vertex_array = false;
-				rsx->inline_vertex_array.clear();
-				rsx->begin();
-				return;
-			}
-
-			if (!rsx->vertex_draw_count)
-			{
-				bool has_array = false;
-
-				for (int i = 0; i < rsx::limits::vertex_count; ++i)
-				{
-					if (rsx->vertex_arrays_info[i].size > 0)
-					{
-						has_array = true;
-						break;
-					}
-				}
-
-				if (!has_array)
-				{
-					u32 min_count = ~0;
-
-					for (int i = 0; i < rsx::limits::vertex_count; ++i)
-					{
-						if (!rsx->register_vertex_info[i].size)
-							continue;
-
-						u32 count = u32(rsx->register_vertex_data[i].size()) /
-							rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size;
-
-						if (count < min_count)
-							min_count = count;
-					}
-
-					if (min_count && min_count < ~0)
-					{
-						rsx->vertex_draw_count = min_count;
-					}
-				}
-			}
-
-			rsx->end();
-			rsx->vertex_draw_count = 0;
-		}
-
-		force_inline void get_report(thread* rsx, u32 arg)
-		{
-			u8 type = arg >> 24;
-			u32 offset = arg & 0xffffff;
-
-			//TODO: use DMA
-			vm::ptr<CellGcmReportData> result = { rsx->local_mem_addr + offset, vm::addr };
-
-			result->timer = rsx->timestamp();
-
-			switch (type)
-			{
-			case CELL_GCM_ZPASS_PIXEL_CNT:
-			case CELL_GCM_ZCULL_STATS:
-			case CELL_GCM_ZCULL_STATS1:
-			case CELL_GCM_ZCULL_STATS2:
-			case CELL_GCM_ZCULL_STATS3:
-				result->value = 0;
-				LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
-				break;
-
-			default:
-				result->value = 0;
-				LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
-				break;
-			}
-
-			//result->padding = 0;
-		}
-
-		force_inline void clear_report_value(thread* rsx, u32 arg)
-		{
-			switch (arg)
-			{
-			case CELL_GCM_ZPASS_PIXEL_CNT:
-				LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT");
-				break;
-			case CELL_GCM_ZCULL_STATS:
-				LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS");
-				break;
-			default:
-				LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
-				break;
-			}
-		}
-	}
-
-	namespace nv308a
-	{
-		template<u32 index>
-		struct color
-		{
-			force_inline static void impl(u32 arg)
-			{
-				u32 point = method_registers[NV308A_POINT];
-				u16 x = point;
-				u16 y = point >> 16;
-
-				if (y)
-				{
-					LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y);
-				}
-
-				u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]);
-				vm::write32(address, arg);
-			}
-		};
-	}
-
-	namespace nv3089
-	{
-		never_inline void image_in(u32 arg)
-		{
-			u32 operation = method_registers[NV3089_SET_OPERATION];
-
-			u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff;
-			u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16;
-			u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff;
-			u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16;
-
-			u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff;
-			u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16;
-			u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff;
-			u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16;
-
-			u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE];
-			u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16;
-			u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT];
-			u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16;
-			u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
-			u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
-
-			f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f;
-			f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f;
-
-			if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER)
-			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin);
-			}
-
-			if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH)
-			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter);
-			}
-
-			if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY)
-			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation);
-			}
-
-			const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET];
-			const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE];
-
-			u32 dst_offset;
-			u32 dst_dma = 0;
-			u16 dst_color_format;
-			u32 out_pitch = 0;
-			u32 out_aligment = 64;
-
-			switch (method_registers[NV3089_SET_CONTEXT_SURFACE])
-			{
-			case CELL_GCM_CONTEXT_SURFACE2D:
-				dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN];
-				dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN];
-				dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT];
-				out_pitch = method_registers[NV3062_SET_PITCH] >> 16;
-				out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff;
-				break;
-
-			case CELL_GCM_CONTEXT_SWIZZLE2D:
-				dst_dma = method_registers[NV309E_SET_CONTEXT_DMA_IMAGE];
-				dst_offset = method_registers[NV309E_SET_OFFSET];
-				dst_color_format = method_registers[NV309E_SET_FORMAT];
-				break;
-
-			default:
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]);
-				return;
-			}
-
-			u32 src_address = get_address(src_offset, src_dma);
-			u32 dst_address = get_address(dst_offset, dst_dma);
-
-			u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
-			u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
-
-			if (out_pitch == 0)
-			{
-				out_pitch = out_bpp * out_w;
-			}
-
-			if (in_pitch == 0)
-			{
-				in_pitch = in_bpp * in_w;
-			}
-
-			if (clip_w > out_w)
-			{
-				clip_w = out_w;
-			}
-
-			if (clip_h > out_h)
-			{
-				clip_h = out_h;
-			}
-
-			//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
-
-			u8* pixels_src = vm::_ptr<u8>(src_address);
-			u8* pixels_dst = vm::_ptr<u8>(dst_address);
-
-			if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
-				dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
-			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", dst_color_format);
-			}
-
-			if (src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 &&
-				src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8)
-			{
-				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format);
-			}
-
-			//LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x",
-			//	method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
-			//	method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
-
-			std::unique_ptr<u8[]> temp1, temp2;
-
-			AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
-			AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
-
-			u32 out_offset = out_x * out_bpp + out_pitch * out_y;
-
-			bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT];
-			bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h;
-
-			u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f));
-
-			if (slice_h)
-			{
-				if (clip_h < out_h)
-				{
-					--slice_h;
-				}
-			}
-			else
-			{
-				slice_h = clip_h;
-			}
-
-			if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D)
-			{
-				if (need_convert || need_clip)
-				{
-					if (need_clip)
-					{
-						if (need_convert)
-						{
-							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
-								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
-
-							clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
-						}
-						else
-						{
-							clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
-						}
-					}
-					else
-					{
-						convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch,
-							pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
-					}
-				}
-				else
-				{
-					if (out_pitch != in_pitch || out_pitch != out_bpp * out_w)
-					{
-						for (u32 y = 0; y < out_h; ++y)
-						{
-							u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y);
-							u8 *src = pixels_src + in_pitch * y;
-
-							std::memmove(dst, src, out_w * out_bpp);
-						}
-					}
-					else
-					{
-						std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h);
-					}
-				}
-			}
-			else
-			{
-				if (need_convert || need_clip)
-				{
-					if (need_clip)
-					{
-						if (need_convert)
-						{
-							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
-								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
-
-							clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
-						}
-						else
-						{
-							clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
-						}
-					}
-					else
-					{
-						convert_scale_image(temp2, out_format, out_w, out_h, out_pitch,
-							pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false);
-					}
-
-					pixels_src = temp2.get();
-				}
-
-				u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
-				u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
-
-				// 0 indicates height of 1 pixel
-				sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
-
-				// swizzle based on destination size
-				u16 sw_width = 1 << sw_width_log2;
-				u16 sw_height = 1 << sw_height_log2;
-
-				temp2.reset(new u8[out_bpp * sw_width * sw_height]);
-
-				u8* linear_pixels = pixels_src;
-				u8* swizzled_pixels = temp2.get();
-
-				// Check and pad texture out if we are given non square texture for swizzle to be correct
-				if (sw_width != out_w || sw_height != out_h)
-				{
-					std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]);
-
-					switch (out_bpp)
-					{
-					case 1:
-						pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
-						break;
-					case 2:
-						pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
-						break;
-					case 4:
-						pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
-						break;
-					}
-
-					linear_pixels = sw_temp.get();
-				}
-
-				switch (out_bpp)
-				{
-				case 1:
-					convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
-					break;
-				case 2:
-					convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
-					break;
-				case 4:
-					convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
-					break;
-				}
-
-				std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
-			}
-		}
-	}
-
-	namespace nv0039
-	{
-		force_inline void buffer_notify(u32 arg)
-		{
-			const u32 inPitch = method_registers[NV0039_PITCH_IN];
-			const u32 outPitch = method_registers[NV0039_PITCH_OUT];
-			const u32 lineLength = method_registers[NV0039_LINE_LENGTH_IN];
-			const u32 lineCount = method_registers[NV0039_LINE_COUNT];
-			const u8 outFormat = method_registers[NV0039_FORMAT] >> 8;
-			const u8 inFormat = method_registers[NV0039_FORMAT];
-			const u32 notify = arg;
-
-			// The existing GCM commands use only the value 0x1 for inFormat and outFormat
-			if (inFormat != 0x01 || outFormat != 0x01)
-			{
-				LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", inFormat, outFormat);
-			}
-
-			if (lineCount == 1 && !inPitch && !outPitch && !notify)
-			{
-				std::memcpy(
-					vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])),
-					vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])),
-					lineLength);
-			}
-			else
-			{
-				LOG_ERROR(RSX, "NV0039_OFFSET_IN: bad offset(in=0x%x, out=0x%x), pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
-					method_registers[NV0039_OFFSET_IN], method_registers[NV0039_OFFSET_OUT], inPitch, outPitch, lineLength, lineCount, inFormat, outFormat, notify);
-			}
-		}
-	}
-
-	void flip_command(thread* rsx, u32 arg)
-	{
-		if (user_asked_for_frame_capture)
-		{
-			rsx->capture_current_frame = true;
-			user_asked_for_frame_capture = false;
-			frame_debug.reset();
-		}
-		else if (rsx->capture_current_frame)
-		{
-			rsx->capture_current_frame = false;
-			Emu.Pause();
-		}
-
-		rsx->gcm_current_buffer = arg;
-		rsx->flip(arg);
-		// After each flip PS3 system is executing a routine that changes registers value to some default.
-		// Some game use this default state (SH3).
-		rsx->reset();
-
-		rsx->last_flip_time = get_system_time() - 1000000;
-		rsx->gcm_current_buffer = arg;
-		rsx->flip_status = 0;
-
-		if (rsx->flip_handler)
-		{
-			Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu)
-			{
-				func(ppu, 1);
-			});
-		}
-
-		rsx->sem_flip.post_and_wait();
-
-		//sync
-		double limit;
-		switch (rpcs3::state.config.rsx.frame_limit.value())
-		{
-		case rsx_frame_limit::_50: limit = 50.; break;
-		case rsx_frame_limit::_59_94: limit = 59.94; break;
-		case rsx_frame_limit::_30: limit = 30.; break;
-		case rsx_frame_limit::_60: limit = 60.; break;
-		case rsx_frame_limit::Auto: limit = rsx->fps_limit; break; //TODO
-
-		case rsx_frame_limit::Off:
-		default:
-			return;
-		}
-
-		std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec())));
-		rsx->timer_sync.Start();
-		rsx->local_transform_constants.clear();
-	}
-
-	void user_command(thread* rsx, u32 arg)
-	{
-		if (rsx->user_handler)
-		{
-			Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu)
-			{
-				func(ppu, arg);
-			});
-		}
-		else
-		{
-			throw EXCEPTION("User handler not set");
-		}
-	}
-
-	struct __rsx_methods_t
-	{
-		using rsx_impl_method_t = void(*)(u32);
-
-		template<rsx_method_t impl_func>
-		force_inline static void call_impl_func(thread *rsx, u32 arg)
-		{
-			impl_func(rsx, arg);
-		}
-
-		template<rsx_impl_method_t impl_func>
-		force_inline static void call_impl_func(thread *rsx, u32 arg)
-		{
-			impl_func(arg);
-		}
-
-		template<int id, typename T, T impl_func>
-		static void wrapper(thread *rsx, u32 arg)
-		{
-			// try process using gpu
-			if (rsx->do_method(id, arg))
-			{
-				if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE)
-					rsx->capture_frame("clear");
-				return;
-			}
-
-			// not handled by renderer
-			// try process using cpu
-			if (impl_func != nullptr)
-				call_impl_func<impl_func>(rsx, arg);
-		}
-
-		template<int id, int step, int count, template<u32> class T, int index = 0>
-		struct bind_range_impl_t
-		{
-			force_inline static void impl()
-			{
-				bind_range_impl_t<id + step, step, count, T, index + 1>::impl();
-				bind<id, T<index>::impl>();
-			}
-		};
-
-		template<int id, int step, int count, template<u32> class T>
-		struct bind_range_impl_t<id, step, count, T, count>
-		{
-			force_inline static void impl()
-			{
-			}
-		};
-
-		template<int id, int step, int count, template<u32> class T, int index = 0>
-		force_inline static void bind_range()
-		{
-			bind_range_impl_t<id, step, count, T, index>::impl();
-		}
-
-		[[noreturn]] never_inline static void bind_redefinition_error(int id)
-		{
-			throw EXCEPTION("RSX method implementation redefinition (0x%04x)", id);
-		}
-
-		template<int id, typename T, T impl_func>
-		static void bind_impl()
-		{
-			if (methods[id])
-			{
-				bind_redefinition_error(id);
-			}
-
-			methods[id] = wrapper<id, T, impl_func>;
-		}
-
-		template<int id, typename T, T impl_func>
-		static void bind_cpu_only_impl()
-		{
-			if (methods[id])
-			{
-				bind_redefinition_error(id);
-			}
-
-			methods[id] = call_impl_func<impl_func>;
-		}
-
-		template<int id, rsx_impl_method_t impl_func>       static void bind() { bind_impl<id, rsx_impl_method_t, impl_func>(); }
-		template<int id, rsx_method_t impl_func = nullptr>  static void bind() { bind_impl<id, rsx_method_t, impl_func>(); }
-
-		//do not try process on gpu
-		template<int id, rsx_impl_method_t impl_func>      static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_impl_method_t, impl_func>(); }
-		//do not try process on gpu
-		template<int id, rsx_method_t impl_func = nullptr> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_method_t, impl_func>(); }
-
-		__rsx_methods_t()
-		{
-			// NV406E
-			bind_cpu_only<NV406E_SET_REFERENCE, nv406e::set_reference>();
-			bind<NV406E_SEMAPHORE_ACQUIRE, nv406e::semaphore_acquire>();
-			bind<NV406E_SEMAPHORE_RELEASE, nv406e::semaphore_release>();
-
-			// NV4097
-			bind<NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, nv4097::texture_read_semaphore_release>();
-			bind<NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, nv4097::back_end_write_semaphore_release>();
-			bind<NV4097_SET_BEGIN_END, nv4097::set_begin_end>();
-			bind<NV4097_CLEAR_SURFACE>();
-			bind<NV4097_DRAW_ARRAYS, nv4097::draw_arrays>();
-			bind<NV4097_DRAW_INDEX_ARRAY, nv4097::draw_index_array>();
-			bind<NV4097_INLINE_ARRAY, nv4097::draw_inline_array>();
-			bind_range<NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 1, 16, nv4097::set_vertex_data_array_format>();
-			bind_range<NV4097_SET_VERTEX_DATA4UB_M, 1, 16, nv4097::set_vertex_data4ub_m>();
-			bind_range<NV4097_SET_VERTEX_DATA1F_M, 1, 16, nv4097::set_vertex_data1f_m>();
-			bind_range<NV4097_SET_VERTEX_DATA2F_M + 1, 2, 16, nv4097::set_vertex_data2f_m>();
-			bind_range<NV4097_SET_VERTEX_DATA3F_M + 2, 3, 16, nv4097::set_vertex_data3f_m>();
-			bind_range<NV4097_SET_VERTEX_DATA4F_M + 3, 4, 16, nv4097::set_vertex_data4f_m>();
-			bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>();
-			bind_range<NV4097_SET_VERTEX_DATA4S_M + 1, 2, 16, nv4097::set_vertex_data4s_m>();
-			bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>();
-			bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 128, nv4097::set_transform_program>();
-			bind_cpu_only<NV4097_GET_REPORT, nv4097::get_report>();
-			bind_cpu_only<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>();
-
-			//NV308A
-			bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
-			bind_range<NV308A_COLOR + 256, 1, 512, nv308a::color, 256>();
-
-			//NV3089
-			bind<NV3089_IMAGE_IN, nv3089::image_in>();
-
-			//NV0039
-			bind<NV0039_BUFFER_NOTIFY, nv0039::buffer_notify>();
-
-			// custom methods
-			bind_cpu_only<GCM_FLIP_COMMAND, flip_command>();
-			bind_cpu_only<GCM_SET_USER_COMMAND, user_command>();
-		}
-	} __rsx_methods;
-
 	std::string shaders_cache::path_to_root()
 	{
 		return fs::get_executable_dir() + "data/";
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index e4537aa13f..600719b90a 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -148,69 +148,6 @@ namespace rsx
 		static std::string path_to_root();
 	};
 
-	//TODO
-	union alignas(4) method_registers_t
-	{
-		u8 _u8[0x10000];
-		u32 _u32[0x10000 >> 2];
-/*
-		struct alignas(4)
-		{
-			u8 pad[NV4097_SET_TEXTURE_OFFSET - 4];
-
-			struct alignas(4) texture_t
-			{
-				u32 offset;
-
-				union format_t
-				{
-					u32 _u32;
-
-					struct
-					{
-						u32: 1;
-						u32 location : 1;
-						u32 cubemap : 1;
-						u32 border_type : 1;
-						u32 dimension : 4;
-						u32 format : 8;
-						u32 mipmap : 16;
-					};
-				} format;
-
-				union address_t
-				{
-					u32 _u32;
-
-					struct
-					{
-						u32 wrap_s : 4;
-						u32 aniso_bias : 4;
-						u32 wrap_t : 4;
-						u32 unsigned_remap : 4;
-						u32 wrap_r : 4;
-						u32 gamma : 4;
-						u32 signed_remap : 4;
-						u32 zfunc : 4;
-					};
-				} address;
-
-				u32 control0;
-				u32 control1;
-				u32 filter;
-				u32 image_rect;
-				u32 border_color;
-			} textures[limits::textures_count];
-		};
-*/
-		u32& operator[](int index)
-		{
-			return _u32[index >> 2];
-		}
-	};
-
-	extern u32 method_registers[0x10000 >> 2];
-
 	u32 get_vertex_type_size(u32 type);
 
 	u32 get_address(u32 offset, u32 location);
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index a9d013b1f7..b7481ccaf9 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -129,6 +129,7 @@
     <ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" />
     <ClCompile Include="Emu\RSX\GCM.cpp" />
     <ClCompile Include="Emu\RSX\Null\NullGSRender.cpp" />
+    <ClCompile Include="Emu\RSX\rsx_methods.cpp" />
     <ClCompile Include="Emu\RSX\rsx_utils.cpp" />
     <ClCompile Include="Emu\state.cpp" />
     <ClCompile Include="Emu\SysCalls\lv2\sys_dbg.cpp" />
@@ -575,6 +576,7 @@
     <ClInclude Include="Emu\Memory\vm_ptr.h" />
     <ClInclude Include="Emu\Memory\vm_ref.h" />
     <ClInclude Include="Emu\Memory\vm_var.h" />
+    <ClInclude Include="Emu\RSX\rsx_methods.h" />
     <ClInclude Include="Emu\RSX\rsx_utils.h" />
     <ClInclude Include="Emu\state.h" />
     <ClInclude Include="Emu\SysCalls\Callback.h" />
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index c0cdafaaf1..659e72fb71 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -377,9 +377,6 @@
     <ClCompile Include="Loader\TRP.cpp">
       <Filter>Loader</Filter>
     </ClCompile>
-    <ClCompile Include="stdafx.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\Utilities\StrFmt.cpp">
       <Filter>Utilities</Filter>
     </ClCompile>
@@ -930,6 +927,12 @@
     <ClCompile Include="Emu\RSX\rsx_utils.cpp">
       <Filter>Emu\GPU\RSX</Filter>
     </ClCompile>
+    <ClCompile Include="Emu\RSX\rsx_methods.cpp">
+      <Filter>Emu\GPU\RSX</Filter>
+    </ClCompile>
+    <ClCompile Include="stdafx.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="Crypto\aes.h">
@@ -1779,5 +1782,8 @@
     <ClInclude Include="Emu\RSX\rsx_utils.h">
       <Filter>Emu\GPU\RSX</Filter>
     </ClInclude>
+    <ClInclude Include="Emu\RSX\rsx_methods.h">
+      <Filter>Emu\GPU\RSX</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file

From c1be0cf3bf0e208823be259c41122a0060f7528c Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Tue, 5 Jan 2016 23:32:25 +0200
Subject: [PATCH 08/13] Added missed files

---
 rpcs3/Emu/RSX/rsx_methods.cpp | 839 ++++++++++++++++++++++++++++++++++
 rpcs3/Emu/RSX/rsx_methods.h   |  69 +++
 2 files changed, 908 insertions(+)
 create mode 100644 rpcs3/Emu/RSX/rsx_methods.cpp
 create mode 100644 rpcs3/Emu/RSX/rsx_methods.h

diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp
new file mode 100644
index 0000000000..7bbc95723f
--- /dev/null
+++ b/rpcs3/Emu/RSX/rsx_methods.cpp
@@ -0,0 +1,839 @@
+#include "stdafx.h"
+#include "rsx_methods.h"
+#include "RSXThread.h"
+#include "Emu/Memory/Memory.h"
+#include "Emu/System.h"
+#include "Emu/state.h"
+#include "rsx_utils.h"
+#include "Emu/SysCalls/Callback.h"
+#include "Emu/SysCalls/CB_FUNC.h"
+
+namespace rsx
+{
+	u32 method_registers[0x10000 >> 2];
+	rsx_method_t methods[0x10000 >> 2]{};
+
+	template<typename Type> struct vertex_data_type_from_element_type;
+	template<> struct vertex_data_type_from_element_type<float> { enum { type = CELL_GCM_VERTEX_F }; };
+	template<> struct vertex_data_type_from_element_type<f16> { enum { type = CELL_GCM_VERTEX_SF }; };
+	template<> struct vertex_data_type_from_element_type<u8> { enum { type = CELL_GCM_VERTEX_UB }; };
+	template<> struct vertex_data_type_from_element_type<u16> { enum { type = CELL_GCM_VERTEX_S1 }; };
+
+	namespace nv406e
+	{
+		force_inline void set_reference(thread* rsx, u32 arg)
+		{
+			rsx->ctrl->ref.exchange(arg);
+		}
+
+		force_inline void semaphore_acquire(thread* rsx, u32 arg)
+		{
+			//TODO: dma
+			while (vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg)
+			{
+				if (Emu.IsStopped())
+					break;
+
+				std::this_thread::sleep_for(std::chrono::milliseconds(1));
+			}
+		}
+
+		force_inline void semaphore_release(thread* rsx, u32 arg)
+		{
+			//TODO: dma
+			vm::ps3::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg);
+		}
+	}
+
+	namespace nv4097
+	{
+		force_inline void texture_read_semaphore_release(thread* rsx, u32 arg)
+		{
+			//TODO: dma
+			vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg);
+		}
+
+		force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg)
+		{
+			//TODO: dma
+			vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET],
+				(arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff));
+		}
+
+		//fire only when all data passed to rsx cmd buffer
+		template<u32 id, u32 index, int count, typename type>
+		force_inline void set_vertex_data_impl(thread* rsx, u32 arg)
+		{
+			static const size_t element_size = (count * sizeof(type));
+			static const size_t element_size_in_words = element_size / sizeof(u32);
+
+			auto& info = rsx->register_vertex_info[index];
+
+			info.type = vertex_data_type_from_element_type<type>::type;
+			info.size = count;
+			info.frequency = 0;
+			info.stride = 0;
+
+			auto& entry = rsx->register_vertex_data[index];
+
+			//find begin of data
+			size_t begin = id + index * element_size_in_words;
+
+			size_t position = 0;//entry.size();
+			entry.resize(position + element_size);
+
+			memcpy(entry.data() + position, method_registers + begin, element_size);
+		}
+
+		template<u32 index>
+		struct set_vertex_data4ub_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, u8>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data1f_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, f32>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data2f_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, f32>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data3f_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, f32>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data4f_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, f32>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data2s_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, u16>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data4s_m
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, u16>(rsx, arg);
+			}
+		};
+
+		template<u32 index>
+		struct set_vertex_data_array_format
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				auto& info = rsx->vertex_arrays_info[index];
+				info.unpack_array(arg);
+			}
+		};
+
+		force_inline void draw_arrays(thread* rsx, u32 arg)
+		{
+			rsx->draw_command = thread::Draw_command::draw_command_array;
+			u32 first = arg & 0xffffff;
+			u32 count = (arg >> 24) + 1;
+
+			rsx->load_vertex_data(first, count);
+		}
+
+		force_inline void draw_index_array(thread* rsx, u32 arg)
+		{
+			rsx->draw_command = thread::Draw_command::draw_command_indexed;
+			u32 first = arg & 0xffffff;
+			u32 count = (arg >> 24) + 1;
+
+			rsx->load_vertex_data(first, count);
+			rsx->load_vertex_index_data(first, count);
+		}
+
+		force_inline void draw_inline_array(thread* rsx, u32 arg)
+		{
+			rsx->draw_command = thread::Draw_command::draw_command_inlined_array;
+			rsx->draw_inline_vertex_array = true;
+			rsx->inline_vertex_array.push_back(arg);
+		}
+
+		template<u32 index>
+		struct set_transform_constant
+		{
+			force_inline static void impl(thread* rsxthr, u32 arg)
+			{
+				u32 load = method_registers[NV4097_SET_TRANSFORM_CONSTANT_LOAD];
+
+				static const size_t count = 4;
+				static const size_t size = count * sizeof(f32);
+
+				size_t reg = index / 4;
+				size_t subreg = index % 4;
+
+				memcpy(rsxthr->transform_constants[load + reg].rgba + subreg, method_registers + NV4097_SET_TRANSFORM_CONSTANT + reg * count + subreg, sizeof(f32));
+			}
+		};
+
+		template<u32 index>
+		struct set_transform_program
+		{
+			force_inline static void impl(thread* rsx, u32 arg)
+			{
+				u32& load = method_registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD];
+
+				static const size_t count = 4;
+				static const size_t size = count * sizeof(u32);
+
+				memcpy(rsx->transform_program + load++ * count, method_registers + NV4097_SET_TRANSFORM_PROGRAM + index * count, size);
+			}
+		};
+
+		force_inline void set_begin_end(thread* rsx, u32 arg)
+		{
+			if (arg)
+			{
+				rsx->draw_inline_vertex_array = false;
+				rsx->inline_vertex_array.clear();
+				rsx->begin();
+				return;
+			}
+
+			if (!rsx->vertex_draw_count)
+			{
+				bool has_array = false;
+
+				for (int i = 0; i < rsx::limits::vertex_count; ++i)
+				{
+					if (rsx->vertex_arrays_info[i].size > 0)
+					{
+						has_array = true;
+						break;
+					}
+				}
+
+				if (!has_array)
+				{
+					u32 min_count = ~0;
+
+					for (int i = 0; i < rsx::limits::vertex_count; ++i)
+					{
+						if (!rsx->register_vertex_info[i].size)
+							continue;
+
+						u32 count = u32(rsx->register_vertex_data[i].size()) /
+							rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size;
+
+						if (count < min_count)
+							min_count = count;
+					}
+
+					if (min_count && min_count < ~0)
+					{
+						rsx->vertex_draw_count = min_count;
+					}
+				}
+			}
+
+			rsx->end();
+			rsx->vertex_draw_count = 0;
+		}
+
+		force_inline void get_report(thread* rsx, u32 arg)
+		{
+			u8 type = arg >> 24;
+			u32 offset = arg & 0xffffff;
+
+			//TODO: use DMA
+			vm::ps3::ptr<CellGcmReportData> result = { rsx->local_mem_addr + offset, vm::addr };
+
+			result->timer = rsx->timestamp();
+
+			switch (type)
+			{
+			case CELL_GCM_ZPASS_PIXEL_CNT:
+			case CELL_GCM_ZCULL_STATS:
+			case CELL_GCM_ZCULL_STATS1:
+			case CELL_GCM_ZCULL_STATS2:
+			case CELL_GCM_ZCULL_STATS3:
+				result->value = 0;
+				LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
+				break;
+
+			default:
+				result->value = 0;
+				LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
+				break;
+			}
+
+			//result->padding = 0;
+		}
+
+		force_inline void clear_report_value(thread* rsx, u32 arg)
+		{
+			switch (arg)
+			{
+			case CELL_GCM_ZPASS_PIXEL_CNT:
+				LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT");
+				break;
+			case CELL_GCM_ZCULL_STATS:
+				LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS");
+				break;
+			default:
+				LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
+				break;
+			}
+		}
+	}
+
+	namespace nv308a
+	{
+		template<u32 index>
+		struct color
+		{
+			force_inline static void impl(u32 arg)
+			{
+				u32 point = method_registers[NV308A_POINT];
+				u16 x = point;
+				u16 y = point >> 16;
+
+				if (y)
+				{
+					LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y);
+				}
+
+				u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]);
+				vm::ps3::write32(address, arg);
+			}
+		};
+	}
+
+	namespace nv3089
+	{
+		never_inline void image_in(u32 arg)
+		{
+			u32 operation = method_registers[NV3089_SET_OPERATION];
+
+			u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff;
+			u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16;
+			u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff;
+			u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16;
+
+			u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff;
+			u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16;
+			u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff;
+			u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16;
+
+			u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE];
+			u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16;
+			u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT];
+			u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16;
+			u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
+			u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
+
+			f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f;
+			f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f;
+
+			if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER)
+			{
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin);
+			}
+
+			if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH)
+			{
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter);
+			}
+
+			if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY)
+			{
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation);
+			}
+
+			const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET];
+			const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE];
+
+			u32 dst_offset;
+			u32 dst_dma = 0;
+			u16 dst_color_format;
+			u32 out_pitch = 0;
+			u32 out_aligment = 64;
+
+			switch (method_registers[NV3089_SET_CONTEXT_SURFACE])
+			{
+			case CELL_GCM_CONTEXT_SURFACE2D:
+				dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN];
+				dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN];
+				dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT];
+				out_pitch = method_registers[NV3062_SET_PITCH] >> 16;
+				out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff;
+				break;
+
+			case CELL_GCM_CONTEXT_SWIZZLE2D:
+				dst_dma = method_registers[NV309E_SET_CONTEXT_DMA_IMAGE];
+				dst_offset = method_registers[NV309E_SET_OFFSET];
+				dst_color_format = method_registers[NV309E_SET_FORMAT];
+				break;
+
+			default:
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]);
+				return;
+			}
+
+			u32 src_address = get_address(src_offset, src_dma);
+			u32 dst_address = get_address(dst_offset, dst_dma);
+
+			u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
+			u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
+
+			if (out_pitch == 0)
+			{
+				out_pitch = out_bpp * out_w;
+			}
+
+			if (in_pitch == 0)
+			{
+				in_pitch = in_bpp * in_w;
+			}
+
+			if (clip_w > out_w)
+			{
+				clip_w = out_w;
+			}
+
+			if (clip_h > out_h)
+			{
+				clip_h = out_h;
+			}
+
+			//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
+
+			u8* pixels_src = vm::ps3::_ptr<u8>(src_address);
+			u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address);
+
+			if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
+				dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
+			{
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", dst_color_format);
+			}
+
+			if (src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 &&
+				src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8)
+			{
+				LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format);
+			}
+
+			//LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x",
+			//	method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
+			//	method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
+
+			std::unique_ptr<u8[]> temp1, temp2;
+
+			AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
+			AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
+
+			u32 out_offset = out_x * out_bpp + out_pitch * out_y;
+
+			bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT];
+			bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h;
+
+			u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f));
+
+			if (slice_h)
+			{
+				if (clip_h < out_h)
+				{
+					--slice_h;
+				}
+			}
+			else
+			{
+				slice_h = clip_h;
+			}
+
+			if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D)
+			{
+				if (need_convert || need_clip)
+				{
+					if (need_clip)
+					{
+						if (need_convert)
+						{
+							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
+								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
+
+							clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
+						}
+						else
+						{
+							clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
+						}
+					}
+					else
+					{
+						convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch,
+							pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
+					}
+				}
+				else
+				{
+					if (out_pitch != in_pitch || out_pitch != out_bpp * out_w)
+					{
+						for (u32 y = 0; y < out_h; ++y)
+						{
+							u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y);
+							u8 *src = pixels_src + in_pitch * y;
+
+							std::memmove(dst, src, out_w * out_bpp);
+						}
+					}
+					else
+					{
+						std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h);
+					}
+				}
+			}
+			else
+			{
+				if (need_convert || need_clip)
+				{
+					if (need_clip)
+					{
+						if (need_convert)
+						{
+							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
+								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
+
+							clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
+						}
+						else
+						{
+							clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
+						}
+					}
+					else
+					{
+						convert_scale_image(temp2, out_format, out_w, out_h, out_pitch,
+							pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false);
+					}
+
+					pixels_src = temp2.get();
+				}
+
+				u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
+				u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
+
+				// 0 indicates height of 1 pixel
+				sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
+
+				// swizzle based on destination size
+				u16 sw_width = 1 << sw_width_log2;
+				u16 sw_height = 1 << sw_height_log2;
+
+				temp2.reset(new u8[out_bpp * sw_width * sw_height]);
+
+				u8* linear_pixels = pixels_src;
+				u8* swizzled_pixels = temp2.get();
+
+				// Check and pad texture out if we are given non square texture for swizzle to be correct
+				if (sw_width != out_w || sw_height != out_h)
+				{
+					std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]);
+
+					switch (out_bpp)
+					{
+					case 1:
+						pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
+						break;
+					case 2:
+						pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
+						break;
+					case 4:
+						pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
+						break;
+					}
+
+					linear_pixels = sw_temp.get();
+				}
+
+				switch (out_bpp)
+				{
+				case 1:
+					convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
+					break;
+				case 2:
+					convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
+					break;
+				case 4:
+					convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
+					break;
+				}
+
+				std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
+			}
+		}
+	}
+
+	namespace nv0039
+	{
+		force_inline void buffer_notify(u32 arg)
+		{
+			const u32 inPitch = method_registers[NV0039_PITCH_IN];
+			const u32 outPitch = method_registers[NV0039_PITCH_OUT];
+			const u32 lineLength = method_registers[NV0039_LINE_LENGTH_IN];
+			const u32 lineCount = method_registers[NV0039_LINE_COUNT];
+			const u8 outFormat = method_registers[NV0039_FORMAT] >> 8;
+			const u8 inFormat = method_registers[NV0039_FORMAT];
+			const u32 notify = arg;
+
+			// The existing GCM commands use only the value 0x1 for inFormat and outFormat
+			if (inFormat != 0x01 || outFormat != 0x01)
+			{
+				LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", inFormat, outFormat);
+			}
+
+			if (lineCount == 1 && !inPitch && !outPitch && !notify)
+			{
+				std::memcpy(
+					vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])),
+					vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])),
+					lineLength);
+			}
+			else
+			{
+				LOG_ERROR(RSX, "NV0039_OFFSET_IN: bad offset(in=0x%x, out=0x%x), pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
+					method_registers[NV0039_OFFSET_IN], method_registers[NV0039_OFFSET_OUT], inPitch, outPitch, lineLength, lineCount, inFormat, outFormat, notify);
+			}
+		}
+	}
+
+	void flip_command(thread* rsx, u32 arg)
+	{
+		if (user_asked_for_frame_capture)
+		{
+			rsx->capture_current_frame = true;
+			user_asked_for_frame_capture = false;
+			frame_debug.reset();
+		}
+		else if (rsx->capture_current_frame)
+		{
+			rsx->capture_current_frame = false;
+			Emu.Pause();
+		}
+
+		rsx->gcm_current_buffer = arg;
+		rsx->flip(arg);
+		// After each flip PS3 system is executing a routine that changes registers value to some default.
+		// Some game use this default state (SH3).
+		rsx->reset();
+
+		rsx->last_flip_time = get_system_time() - 1000000;
+		rsx->gcm_current_buffer = arg;
+		rsx->flip_status = 0;
+
+		if (rsx->flip_handler)
+		{
+			Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu)
+			{
+				func(ppu, 1);
+			});
+		}
+
+		rsx->sem_flip.post_and_wait();
+
+		//sync
+		double limit;
+		switch (rpcs3::state.config.rsx.frame_limit.value())
+		{
+		case rsx_frame_limit::_50: limit = 50.; break;
+		case rsx_frame_limit::_59_94: limit = 59.94; break;
+		case rsx_frame_limit::_30: limit = 30.; break;
+		case rsx_frame_limit::_60: limit = 60.; break;
+		case rsx_frame_limit::Auto: limit = rsx->fps_limit; break; //TODO
+
+		case rsx_frame_limit::Off:
+		default:
+			return;
+		}
+
+		std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec())));
+		rsx->timer_sync.Start();
+		rsx->local_transform_constants.clear();
+	}
+
+	void user_command(thread* rsx, u32 arg)
+	{
+		if (rsx->user_handler)
+		{
+			Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu)
+			{
+				func(ppu, arg);
+			});
+		}
+		else
+		{
+			throw EXCEPTION("User handler not set");
+		}
+	}
+
+	struct __rsx_methods_t
+	{
+		using rsx_impl_method_t = void(*)(u32);
+
+		template<rsx_method_t impl_func>
+		force_inline static void call_impl_func(thread *rsx, u32 arg)
+		{
+			impl_func(rsx, arg);
+		}
+
+		template<rsx_impl_method_t impl_func>
+		force_inline static void call_impl_func(thread *rsx, u32 arg)
+		{
+			impl_func(arg);
+		}
+
+		template<int id, typename T, T impl_func>
+		static void wrapper(thread *rsx, u32 arg)
+		{
+			// try process using gpu
+			if (rsx->do_method(id, arg))
+			{
+				if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE)
+					rsx->capture_frame("clear");
+				return;
+			}
+
+			// not handled by renderer
+			// try process using cpu
+			if (impl_func != nullptr)
+				call_impl_func<impl_func>(rsx, arg);
+		}
+
+		template<int id, int step, int count, template<u32> class T, int index = 0>
+		struct bind_range_impl_t
+		{
+			force_inline static void impl()
+			{
+				bind_range_impl_t<id + step, step, count, T, index + 1>::impl();
+				bind<id, T<index>::impl>();
+			}
+		};
+
+		template<int id, int step, int count, template<u32> class T>
+		struct bind_range_impl_t<id, step, count, T, count>
+		{
+			force_inline static void impl()
+			{
+			}
+		};
+
+		template<int id, int step, int count, template<u32> class T, int index = 0>
+		force_inline static void bind_range()
+		{
+			bind_range_impl_t<id, step, count, T, index>::impl();
+		}
+
+		[[noreturn]] never_inline static void bind_redefinition_error(int id)
+		{
+			throw EXCEPTION("RSX method implementation redefinition (0x%04x)", id);
+		}
+
+		template<int id, typename T, T impl_func>
+		static void bind_impl()
+		{
+			if (methods[id])
+			{
+				bind_redefinition_error(id);
+			}
+
+			methods[id] = wrapper<id, T, impl_func>;
+		}
+
+		template<int id, typename T, T impl_func>
+		static void bind_cpu_only_impl()
+		{
+			if (methods[id])
+			{
+				bind_redefinition_error(id);
+			}
+
+			methods[id] = call_impl_func<impl_func>;
+		}
+
+		template<int id, rsx_impl_method_t impl_func>       static void bind() { bind_impl<id, rsx_impl_method_t, impl_func>(); }
+		template<int id, rsx_method_t impl_func = nullptr>  static void bind() { bind_impl<id, rsx_method_t, impl_func>(); }
+
+		//do not try process on gpu
+		template<int id, rsx_impl_method_t impl_func>      static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_impl_method_t, impl_func>(); }
+		//do not try process on gpu
+		template<int id, rsx_method_t impl_func = nullptr> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_method_t, impl_func>(); }
+
+		__rsx_methods_t()
+		{
+			// NV406E
+			bind_cpu_only<NV406E_SET_REFERENCE, nv406e::set_reference>();
+			bind<NV406E_SEMAPHORE_ACQUIRE, nv406e::semaphore_acquire>();
+			bind<NV406E_SEMAPHORE_RELEASE, nv406e::semaphore_release>();
+
+			// NV4097
+			bind<NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, nv4097::texture_read_semaphore_release>();
+			bind<NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, nv4097::back_end_write_semaphore_release>();
+			bind<NV4097_SET_BEGIN_END, nv4097::set_begin_end>();
+			bind<NV4097_CLEAR_SURFACE>();
+			bind<NV4097_DRAW_ARRAYS, nv4097::draw_arrays>();
+			bind<NV4097_DRAW_INDEX_ARRAY, nv4097::draw_index_array>();
+			bind<NV4097_INLINE_ARRAY, nv4097::draw_inline_array>();
+			bind_range<NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 1, 16, nv4097::set_vertex_data_array_format>();
+			bind_range<NV4097_SET_VERTEX_DATA4UB_M, 1, 16, nv4097::set_vertex_data4ub_m>();
+			bind_range<NV4097_SET_VERTEX_DATA1F_M, 1, 16, nv4097::set_vertex_data1f_m>();
+			bind_range<NV4097_SET_VERTEX_DATA2F_M + 1, 2, 16, nv4097::set_vertex_data2f_m>();
+			bind_range<NV4097_SET_VERTEX_DATA3F_M + 2, 3, 16, nv4097::set_vertex_data3f_m>();
+			bind_range<NV4097_SET_VERTEX_DATA4F_M + 3, 4, 16, nv4097::set_vertex_data4f_m>();
+			bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>();
+			bind_range<NV4097_SET_VERTEX_DATA4S_M + 1, 2, 16, nv4097::set_vertex_data4s_m>();
+			bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>();
+			bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 128, nv4097::set_transform_program>();
+			bind_cpu_only<NV4097_GET_REPORT, nv4097::get_report>();
+			bind_cpu_only<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>();
+
+			//NV308A
+			bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
+			bind_range<NV308A_COLOR + 256, 1, 512, nv308a::color, 256>();
+
+			//NV3089
+			bind<NV3089_IMAGE_IN, nv3089::image_in>();
+
+			//NV0039
+			bind<NV0039_BUFFER_NOTIFY, nv0039::buffer_notify>();
+
+			// custom methods
+			bind_cpu_only<GCM_FLIP_COMMAND, flip_command>();
+			bind_cpu_only<GCM_SET_USER_COMMAND, user_command>();
+		}
+	} __rsx_methods;
+}
diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h
new file mode 100644
index 0000000000..ef34418dee
--- /dev/null
+++ b/rpcs3/Emu/RSX/rsx_methods.h
@@ -0,0 +1,69 @@
+#pragma once
+
+namespace rsx
+{
+	//TODO
+	union alignas(4) method_registers_t
+	{
+		u8 _u8[0x10000];
+		u32 _u32[0x10000 >> 2];
+		/*
+		struct alignas(4)
+		{
+		u8 pad[NV4097_SET_TEXTURE_OFFSET - 4];
+
+		struct alignas(4) texture_t
+		{
+		u32 offset;
+
+		union format_t
+		{
+		u32 _u32;
+
+		struct
+		{
+		u32: 1;
+		u32 location : 1;
+		u32 cubemap : 1;
+		u32 border_type : 1;
+		u32 dimension : 4;
+		u32 format : 8;
+		u32 mipmap : 16;
+		};
+		} format;
+
+		union address_t
+		{
+		u32 _u32;
+
+		struct
+		{
+		u32 wrap_s : 4;
+		u32 aniso_bias : 4;
+		u32 wrap_t : 4;
+		u32 unsigned_remap : 4;
+		u32 wrap_r : 4;
+		u32 gamma : 4;
+		u32 signed_remap : 4;
+		u32 zfunc : 4;
+		};
+		} address;
+
+		u32 control0;
+		u32 control1;
+		u32 filter;
+		u32 image_rect;
+		u32 border_color;
+		} textures[limits::textures_count];
+		};
+		*/
+		u32& operator[](int index)
+		{
+			return _u32[index >> 2];
+		}
+	};
+
+	using rsx_method_t = void(*)(class thread*, u32);
+	extern u32 method_registers[0x10000 >> 2];
+	extern rsx_method_t methods[0x10000 >> 2];
+}

From 3ac9e0933fad212c11f47e29ad7b601061ce64e1 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Wed, 6 Jan 2016 01:15:35 +0200
Subject: [PATCH 09/13] gl: fixed nv4097_clear_surface & front face selection
 added window shader and clip plane constants to GCM.h

---
 rpcs3/Emu/RSX/GCM.h             | 12 ++++-
 rpcs3/Emu/RSX/GL/GLGSRender.cpp | 89 ++++++++++++++++++---------------
 rpcs3/Emu/RSX/GL/GLGSRender.h   |  4 +-
 3 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h
index 156b69837c..c83a009943 100644
--- a/rpcs3/Emu/RSX/GCM.h
+++ b/rpcs3/Emu/RSX/GCM.h
@@ -451,7 +451,17 @@ enum
 	CELL_GCM_POLYGON_MODE_FILL = 0x1B02,
 
 	CELL_GCM_TRUE = 1,
-	CELL_GCM_FALSE = 0
+	CELL_GCM_FALSE = 0,
+
+	CELL_GCM_WINDOW_ORIGIN_TOP = 0,
+	CELL_GCM_WINDOW_ORIGIN_BOTTOM = 1,
+
+	CELL_GCM_WINDOW_PIXEL_CENTER_HALF = 0,
+	CELL_GCM_WINDOW_PIXEL_CENTER_INTEGER = 1,
+
+	CELL_GCM_USER_CLIP_PLANE_DISABLE = 0,
+	CELL_GCM_USER_CLIP_PLANE_ENABLE_LT = 1,
+	CELL_GCM_USER_CLIP_PLANE_ENABLE_GE = 2,
 };
 
 enum
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 95f5ac3348..937afc4146 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -65,24 +65,6 @@ void GLGSRender::begin()
 	__glcheck glDepthMask(rsx::method_registers[NV4097_SET_DEPTH_MASK]);
 	__glcheck glStencilMask(rsx::method_registers[NV4097_SET_STENCIL_MASK]);
 
-	int viewport_x = int(rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL] & 0xffff);
-	int viewport_y = int(rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL] & 0xffff);
-	int viewport_w = int(rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL] >> 16);
-	int viewport_h = int(rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL] >> 16);
-	glViewport(viewport_x, viewport_y, viewport_w, viewport_h);
-
-	//scissor test is always enabled
-	glEnable(GL_SCISSOR_TEST);
-
-	u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL];
-	u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL];
-	u16 scissor_x = scissor_horizontal;
-	u16 scissor_w = scissor_horizontal >> 16;
-	u16 scissor_y = scissor_vertical;
-	u16 scissor_h = scissor_vertical >> 16;
-
-	__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
-
 	if (__glcheck enable(rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE], GL_DEPTH_TEST))
 	{
 		__glcheck glDepthFunc(rsx::method_registers[NV4097_SET_DEPTH_FUNC]);
@@ -236,9 +218,10 @@ void GLGSRender::begin()
 	if (__glcheck enable(rsx::method_registers[NV4097_SET_CULL_FACE_ENABLE], GL_CULL_FACE))
 	{
 		__glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]);
-		__glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE]);
 	}
 
+	__glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1);
+
 	__glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH);
 
 	//NV4097_SET_COLOR_KEY_COLOR
@@ -480,6 +463,44 @@ void GLGSRender::end()
 	rsx::thread::end();
 }
 
+void GLGSRender::set_viewport()
+{
+	u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL];
+	u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL];
+
+	u16 viewport_x = viewport_horizontal & 0xffff;
+	u16 viewport_y = viewport_vertical & 0xffff;
+	u16 viewport_w = viewport_horizontal >> 16;
+	u16 viewport_h = viewport_vertical >> 16;
+
+	u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL];
+	u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL];
+	u16 scissor_x = scissor_horizontal;
+	u16 scissor_w = scissor_horizontal >> 16;
+	u16 scissor_y = scissor_vertical;
+	u16 scissor_h = scissor_vertical >> 16;
+
+	u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW];
+
+	u8 shader_window_origin = (shader_window >> 12) & 0xf;
+
+	//TODO
+	if (true || shader_window_origin == CELL_GCM_WINDOW_ORIGIN_BOTTOM)
+	{
+		__glcheck glViewport(viewport_x, viewport_y, viewport_w, viewport_h);
+		__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
+	}
+	else
+	{
+		u16 shader_window_height = shader_window & 0xfff;
+
+		__glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h - 1, viewport_w, viewport_h);
+		__glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h - 1, scissor_w, scissor_h);
+	}
+
+	glEnable(GL_SCISSOR_TEST);
+}
+
 void GLGSRender::on_init_thread()
 {
 	GSRender::on_init_thread();
@@ -558,9 +579,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
 		return;
 	}
 
-	renderer->draw_fbo.bind();
-	glEnable(GL_SCISSOR_TEST);
-
 	/*
 	u16 clear_x = rsx::method_registers[NV4097_SET_CLEAR_RECT_HORIZONTAL];
 	u16 clear_y = rsx::method_registers[NV4097_SET_CLEAR_RECT_VERTICAL];
@@ -569,14 +587,8 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
 	glScissor(clear_x, clear_y, clear_w, clear_h);
 	*/
 
-	u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL];
-	u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL];
-	u16 scissor_x = scissor_horizontal;
-	u16 scissor_w = scissor_horizontal >> 16;
-	u16 scissor_y = scissor_vertical;
-	u16 scissor_h = scissor_vertical >> 16;
-
-	glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
+	renderer->init_buffers(true);
+	renderer->draw_fbo.bind();
 
 	GLbitfield mask = 0;
 
@@ -616,8 +628,8 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
 		mask |= GLenum(gl::buffers::color);
 	}
 
-	renderer->clear_surface_buffers = (gl::buffers)mask;
-	renderer->draw_fbo.clear((gl::buffers)mask);
+	glClear(mask);
+	renderer->write_buffers();
 }
 
 using rsx_method_impl_t = void(*)(u32, GLGSRender*);
@@ -811,7 +823,7 @@ std::pair<gl::texture::type, gl::texture::format> surface_depth_format_to_gl(int
 	}
 }
 
-void GLGSRender::init_buffers()
+void GLGSRender::init_buffers(bool skip_reading)
 {
 	u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
 
@@ -893,11 +905,13 @@ void GLGSRender::init_buffers()
 		__glcheck m_draw_tex_depth_stencil.pixel_unpack_settings().aligment(1);
 	}
 
-	if (clear_surface_buffers == gl::buffers::none)
+	if (!skip_reading)
 	{
 		read_buffers();
 	}
 
+	set_viewport();
+
 	switch (rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])
 	{
 	case CELL_GCM_SURFACE_TARGET_NONE: break;
@@ -926,13 +940,6 @@ void GLGSRender::init_buffers()
 		LOG_ERROR(RSX, "Bad surface color target: %d", rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
 		break;
 	}
-
-	if (clear_surface_buffers != gl::buffers::none)
-	{
-		//draw_fbo.clear(clear_surface_buffers);
-
-		clear_surface_buffers = gl::buffers::none;
-	}
 }
 
 static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h
index b49d52a569..ec7babfe86 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.h
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.h
@@ -24,7 +24,6 @@ private:
 
 public:
 	gl::fbo draw_fbo;
-	gl::buffers clear_surface_buffers = gl::buffers::none;
 
 private:
 	GLProgramBuffer m_prog_buffer;
@@ -53,9 +52,10 @@ private:
 
 public:
 	bool load_program();
-	void init_buffers();
+	void init_buffers(bool skip_reading = false);
 	void read_buffers();
 	void write_buffers();
+	void set_viewport();
 
 protected:
 	void begin() override;

From 8912b9880e18a33cfa0d3ae6f00d0f26536e5655 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Wed, 6 Jan 2016 01:25:17 +0200
Subject: [PATCH 10/13] rsx: initialize vertex textures on reset

---
 rpcs3/Emu/RSX/RSXTexture.cpp | 23 -----------------------
 rpcs3/Emu/RSX/RSXTexture.h   |  6 ------
 rpcs3/Emu/RSX/RSXThread.cpp  |  7 +++++++
 3 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp
index a6eafa09b6..9ad938f6d6 100644
--- a/rpcs3/Emu/RSX/RSXTexture.cpp
+++ b/rpcs3/Emu/RSX/RSXTexture.cpp
@@ -279,24 +279,6 @@ namespace rsx
 		return ((method_registers[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 8)] >> 16) & 0xffff);
 	}
 
-	u8 vertex_texture::wrap_s() const
-	{
-		return 1;
-		//return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)]) & 0xf);
-	}
-
-	u8 vertex_texture::wrap_t() const
-	{
-		return 1;
-		//return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 8) & 0xf);
-	}
-
-	u8 vertex_texture::wrap_r() const
-	{
-		return 1;
-		//return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 16) & 0xf);
-	}
-
 	u8 vertex_texture::unsigned_remap() const
 	{
 		return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf);
@@ -347,11 +329,6 @@ namespace rsx
 		return ((method_registers[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 8)] >> 2) & 0x1);
 	}
 
-	u32 vertex_texture::remap() const
-	{
-		return 0 | (1 << 2) | (2 << 4) | (3 << 6);//(method_registers[NV4097_SET_VERTEX_TEXTURE_CONTROL1 + (m_index * 8)]);
-	}
-
 	u16 vertex_texture::bias() const
 	{
 		return ((method_registers[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff);
diff --git a/rpcs3/Emu/RSX/RSXTexture.h b/rpcs3/Emu/RSX/RSXTexture.h
index c5d2a5a4bb..a258006c95 100644
--- a/rpcs3/Emu/RSX/RSXTexture.h
+++ b/rpcs3/Emu/RSX/RSXTexture.h
@@ -86,9 +86,6 @@ namespace rsx
 		u16  mipmap() const;
 
 		// Address
-		u8 wrap_s() const;
-		u8 wrap_t() const;
-		u8 wrap_r() const;
 		u8 unsigned_remap() const;
 		u8 zfunc() const;
 		u8 gamma() const;
@@ -102,9 +99,6 @@ namespace rsx
 		u8   max_aniso() const;
 		bool alpha_kill_enabled() const;
 
-		// Control1
-		u32 remap() const;
-
 		// Filter
 		u16 bias() const;
 		u8  min_filter() const;
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index fd5e3f8554..7d4babcd01 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -624,13 +624,20 @@ namespace rsx
 
 		// Reset vertex attrib array
 		for (int i = 0; i < limits::vertex_count; i++)
+		{
 			vertex_arrays_info[i].size = 0;
+		}
 
 		// Construct Textures
 		for (int i = 0; i < limits::textures_count; i++)
 		{
 			textures[i].init(i);
 		}
+
+		for (int i = 0; i < limits::vertex_textures_count; i++)
+		{
+			vertex_textures[i].init(i);
+		}
 	}
 
 	void thread::init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress)

From f7e787958ece6bb0045fbc20e0cb0b3c8c325c8e Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Wed, 6 Jan 2016 02:24:57 +0200
Subject: [PATCH 11/13] fixed nv3089::image_in scale value

---
 rpcs3/Emu/RSX/RSXThread.cpp   |  5 +++++
 rpcs3/Emu/RSX/rsx_methods.cpp | 19 ++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index 7d4babcd01..d3a016caa9 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -350,13 +350,18 @@ namespace rsx
 	void thread::end()
 	{
 		vertex_index_array.clear();
+
 		for (auto &vertex_array : vertex_arrays)
+		{
 			vertex_array.clear();
+		}
 
 		transform_constants.clear();
 
 		if (capture_current_frame)
+		{
 			capture_frame("Draw " + std::to_string(vertex_draw_count));
+		}
 	}
 
 	void thread::on_task()
diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp
index 7bbc95723f..0c65d73769 100644
--- a/rpcs3/Emu/RSX/rsx_methods.cpp
+++ b/rpcs3/Emu/RSX/rsx_methods.cpp
@@ -459,10 +459,19 @@ namespace rsx
 
 			u32 out_offset = out_x * out_bpp + out_pitch * out_y;
 
-			bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT];
-			bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h;
+			f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX];
+			f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY];
 
-			u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f));
+			u32 slice_h = (u32)(clip_h * (1.0 / scale_y));
+
+			u32 convert_w = (u32)(scale_x * in_w);
+			u32 convert_h = (u32)(scale_y * in_h);
+
+			bool need_clip =
+				method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] ||
+				method_registers[NV3089_CLIP_POINT] || convert_w != out_w || convert_h != out_h;
+
+			bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0;
 
 			if (slice_h)
 			{
@@ -484,7 +493,7 @@ namespace rsx
 					{
 						if (need_convert)
 						{
-							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
+							convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
 								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
 
 							clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
@@ -526,7 +535,7 @@ namespace rsx
 					{
 						if (need_convert)
 						{
-							convert_scale_image(temp1, out_format, out_w, out_h, out_pitch,
+							convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
 								pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
 
 							clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);

From 6406cece578942e9e61b71776b16483a61b14653 Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Wed, 6 Jan 2016 03:37:38 +0200
Subject: [PATCH 12/13] nv3089::image_in: fixed reading from tiled regions

---
 rpcs3/Emu/RSX/rsx_methods.cpp | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp
index 0c65d73769..6870c1830c 100644
--- a/rpcs3/Emu/RSX/rsx_methods.cpp
+++ b/rpcs3/Emu/RSX/rsx_methods.cpp
@@ -336,7 +336,7 @@ namespace rsx
 
 	namespace nv3089
 	{
-		never_inline void image_in(u32 arg)
+		never_inline void image_in(thread *rsx, u32 arg)
 		{
 			u32 operation = method_registers[NV3089_SET_OPERATION];
 
@@ -405,7 +405,7 @@ namespace rsx
 				return;
 			}
 
-			u32 src_address = get_address(src_offset, src_dma);
+			tiled_region src_region = rsx->get_tiled_address(src_offset, src_dma & 0xf);//get_address(src_offset, src_dma);
 			u32 dst_address = get_address(dst_offset, dst_dma);
 
 			u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
@@ -433,7 +433,7 @@ namespace rsx
 
 			//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
 
-			u8* pixels_src = vm::ps3::_ptr<u8>(src_address);
+			u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
 			u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address);
 
 			if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
@@ -462,8 +462,6 @@ namespace rsx
 			f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX];
 			f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY];
 
-			u32 slice_h = (u32)(clip_h * (1.0 / scale_y));
-
 			u32 convert_w = (u32)(scale_x * in_w);
 			u32 convert_h = (u32)(scale_y * in_h);
 
@@ -473,16 +471,22 @@ namespace rsx
 
 			bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0;
 
-			if (slice_h)
+			u32 slice_h = clip_h;
+
+			if (src_region.tile)
 			{
-				if (clip_h < out_h)
+				if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2)
 				{
-					--slice_h;
+					slice_h *= 2;
+				}
+
+				u32 size = slice_h * in_pitch;
+
+				if (size > src_region.tile->size - src_region.base)
+				{
+					u32 diff = size - (src_region.tile->size - src_region.base);
+					slice_h -= diff / in_pitch + (diff % in_pitch ? 1 : 0);
 				}
-			}
-			else
-			{
-				slice_h = clip_h;
 			}
 
 			if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D)

From e9560da4e2bcafad4b988a5ccc3d1fe2b619f64e Mon Sep 17 00:00:00 2001
From: DHrpcs3 <dh.rpcs3@gmail.com>
Date: Wed, 6 Jan 2016 13:47:05 +0200
Subject: [PATCH 13/13] nv3089::image_in: use in_x/in_y & out_x/out_y

---
 rpcs3/Emu/RSX/rsx_methods.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp
index 6870c1830c..3529418999 100644
--- a/rpcs3/Emu/RSX/rsx_methods.cpp
+++ b/rpcs3/Emu/RSX/rsx_methods.cpp
@@ -405,12 +405,15 @@ namespace rsx
 				return;
 			}
 
-			tiled_region src_region = rsx->get_tiled_address(src_offset, src_dma & 0xf);//get_address(src_offset, src_dma);
-			u32 dst_address = get_address(dst_offset, dst_dma);
-
 			u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
 			u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
 
+			u32 in_offset = u32(in_x * in_bpp + in_pitch * in_y);
+			u32 out_offset = out_x * out_bpp + out_pitch * out_y;
+
+			tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma);
+			u32 dst_address = get_address(dst_offset + out_offset, dst_dma);
+
 			if (out_pitch == 0)
 			{
 				out_pitch = out_bpp * out_w;
@@ -434,7 +437,7 @@ namespace rsx
 			//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
 
 			u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
-			u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address);
+			u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
 
 			if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
 				dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
@@ -457,8 +460,6 @@ namespace rsx
 			AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
 			AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
 
-			u32 out_offset = out_x * out_bpp + out_pitch * out_y;
-
 			f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX];
 			f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY];
 
@@ -519,7 +520,7 @@ namespace rsx
 					{
 						for (u32 y = 0; y < out_h; ++y)
 						{
-							u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y);
+							u8 *dst = pixels_dst + out_pitch * y;
 							u8 *src = pixels_src + in_pitch * y;
 
 							std::memmove(dst, src, out_w * out_bpp);