Merge pull request #1239 from vlj/d3d12

D3d12: Various factorisations and fixes
2025-03-13 07:14:49 +00:00 · 2015-10-03 20:04:53 +03:00 · 2015-10-03 20:04:53 +03:00 · 55ca625371
commit 55ca625371
parent f898ebff3f 81546d357c
15 changed files with 999 additions and 867 deletions
--- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp
+++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp
@ -0,0 +1,242 @@
+#include "stdafx.h"
+#include "BufferUtils.h"
+
+
+#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
+#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
+
+
+inline
+bool overlaps(const std::pair<size_t, size_t> &range1, const std::pair<size_t, size_t> &range2)
+{
+	return !(range1.second < range2.first || range2.second < range1.first);
+}
+
+std::vector<VertexBufferFormat> FormatVertexData(const RSXVertexData *m_vertex_data, size_t *vertex_data_size, size_t base_offset)
+{
+	std::vector<VertexBufferFormat> Result;
+	for (size_t i = 0; i < 32; ++i)
+	{
+		const RSXVertexData &vertexData = m_vertex_data[i];
+		if (!vertexData.IsEnabled()) continue;
+
+		size_t elementCount = vertex_data_size[i] / (vertexData.size * vertexData.GetTypeSize());
+		// If there is a single element, stride is 0, use the size of element instead
+		size_t stride = vertexData.stride;
+		size_t elementSize = vertexData.GetTypeSize();
+		std::pair<size_t, size_t> range = std::make_pair(vertexData.addr + base_offset, vertexData.addr + base_offset + elementSize * vertexData.size + (elementCount - 1) * stride - 1);
+		bool isMerged = false;
+
+		for (VertexBufferFormat &vbf : Result)
+		{
+			if (overlaps(vbf.range, range) && vbf.stride == stride)
+			{
+				// Extend buffer if necessary
+				vbf.range.first = MIN2(vbf.range.first, range.first);
+				vbf.range.second = MAX2(vbf.range.second, range.second);
+				vbf.elementCount = MAX2(vbf.elementCount, elementCount);
+
+				vbf.attributeId.push_back(i);
+				isMerged = true;
+				break;
+			}
+		}
+		if (isMerged)
+			continue;
+		VertexBufferFormat newRange = { range, std::vector<size_t>{ i }, elementCount, stride };
+		Result.emplace_back(newRange);
+	}
+	return Result;
+}
+
+void uploadVertexData(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, size_t baseOffset, void* bufferMap)
+{
+	for (int vertex = 0; vertex < vbf.elementCount; vertex++)
+	{
+		for (size_t attributeId : vbf.attributeId)
+		{
+			if (!vertexData[attributeId].addr)
+			{
+				memcpy(bufferMap, vertexData[attributeId].data.data(), vertexData[attributeId].data.size());
+				continue;
+			}
+			size_t offset = (size_t)vertexData[attributeId].addr + baseOffset - vbf.range.first;
+			size_t tsize = vertexData[attributeId].GetTypeSize();
+			size_t size = vertexData[attributeId].size;
+			auto src = vm::get_ptr<const u8>(vertexData[attributeId].addr + (u32)baseOffset + (u32)vbf.stride * vertex);
+			char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
+
+			switch (tsize)
+			{
+			case 1:
+			{
+				memcpy(dst, src, size);
+				break;
+			}
+
+			case 2:
+			{
+				const u16* c_src = (const u16*)src;
+				u16* c_dst = (u16*)dst;
+				for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
+				break;
+			}
+
+			case 4:
+			{
+				const u32* c_src = (const u32*)src;
+				u32* c_dst = (u32*)dst;
+				for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
+				break;
+			}
+			}
+		}
+	}
+}
+
+template<typename IndexType, typename DstType, typename SrcType>
+void expandIndexedTriangleFan(DstType *dst, const SrcType *src, size_t indexCount)
+{
+	IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
+	const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
+	for (unsigned i = 0; i < indexCount - 2; i++)
+	{
+		typedDst[3 * i] = typedSrc[0];
+		typedDst[3 * i + 1] = typedSrc[i + 2 - 1];
+		typedDst[3 * i + 2] = typedSrc[i + 2];
+	}
+}
+
+template<typename IndexType, typename DstType, typename SrcType>
+void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount)
+{
+	IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
+	const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
+	for (unsigned i = 0; i < indexCount / 4; i++)
+	{
+		// First triangle
+		typedDst[6 * i] = typedSrc[4 * i];
+		typedDst[6 * i + 1] = typedSrc[4 * i + 1];
+		typedDst[6 * i + 2] = typedSrc[4 * i + 2];
+		// Second triangle
+		typedDst[6 * i + 3] = typedSrc[4 * i + 2];
+		typedDst[6 * i + 4] = typedSrc[4 * i + 3];
+		typedDst[6 * i + 5] = typedSrc[4 * i];
+	}
+}
+
+// Only handle quads and triangle fan now
+bool isNativePrimitiveMode(unsigned m_draw_mode)
+{
+	switch (m_draw_mode)
+	{
+	default:
+	case CELL_GCM_PRIMITIVE_POINTS:
+	case CELL_GCM_PRIMITIVE_LINES:
+	case CELL_GCM_PRIMITIVE_LINE_LOOP:
+	case CELL_GCM_PRIMITIVE_LINE_STRIP:
+	case CELL_GCM_PRIMITIVE_TRIANGLES:
+	case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
+	case CELL_GCM_PRIMITIVE_QUAD_STRIP:
+	case CELL_GCM_PRIMITIVE_POLYGON:
+		return true;
+	case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
+	case CELL_GCM_PRIMITIVE_QUADS:
+		return false;
+	}
+}
+
+size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count)
+{
+	// Index count
+	if (isNativePrimitiveMode(m_draw_mode))
+		return initial_index_count;
+
+	switch (m_draw_mode)
+	{
+	case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
+		return (initial_index_count - 2) * 3;
+	case CELL_GCM_PRIMITIVE_QUADS:
+		return (6 * initial_index_count) / 4;
+	default:
+		return 0;
+	}
+}
+
+
+void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count)
+{
+	if (indexBuffer != nullptr)
+	{
+		switch (m_draw_mode)
+		{
+		case CELL_GCM_PRIMITIVE_POINTS:
+		case CELL_GCM_PRIMITIVE_LINES:
+		case CELL_GCM_PRIMITIVE_LINE_LOOP:
+		case CELL_GCM_PRIMITIVE_LINE_STRIP:
+		case CELL_GCM_PRIMITIVE_TRIANGLES:
+		case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
+		case CELL_GCM_PRIMITIVE_QUAD_STRIP:
+		case CELL_GCM_PRIMITIVE_POLYGON:
+		{
+			size_t indexSize = (index_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32) ? 4 : 2;
+			memcpy(bufferMap, indexBuffer, indexSize * element_count);
+			return;
+		}
+		case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
+			switch (index_type)
+			{
+			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
+				expandIndexedTriangleFan<unsigned int>(bufferMap, indexBuffer, element_count);
+				return;
+			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
+				expandIndexedTriangleFan<unsigned short>(bufferMap, indexBuffer, element_count);
+				return;
+			default:
+				abort();
+				return;
+			}
+		case CELL_GCM_PRIMITIVE_QUADS:
+			switch (index_type)
+			{
+			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
+				expandIndexedQuads<unsigned int>(bufferMap, indexBuffer, element_count);
+				return;
+			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
+				expandIndexedQuads<unsigned short>(bufferMap, indexBuffer, element_count);
+				return;
+			default:
+				abort();
+				return;
+			}
+		}
+	}
+	else
+	{
+		unsigned short *typedDst = static_cast<unsigned short *>(bufferMap);
+		switch (m_draw_mode)
+		{
+		case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
+			for (unsigned i = 0; i < (element_count - 2); i++)
+			{
+				typedDst[3 * i] = 0;
+				typedDst[3 * i + 1] = i + 2 - 1;
+				typedDst[3 * i + 2] = i + 2;
+			}
+			return;
+		case CELL_GCM_PRIMITIVE_QUADS:
+			for (unsigned i = 0; i < element_count / 4; i++)
+			{
+				// First triangle
+				typedDst[6 * i] = 4 * i;
+				typedDst[6 * i + 1] = 4 * i + 1;
+				typedDst[6 * i + 2] = 4 * i + 2;
+				// Second triangle
+				typedDst[6 * i + 3] = 4 * i + 2;
+				typedDst[6 * i + 4] = 4 * i + 3;
+				typedDst[6 * i + 5] = 4 * i;
+			}
+			return;
+		}
+	}
+}
--- a/rpcs3/Emu/RSX/Common/BufferUtils.h
+++ b/rpcs3/Emu/RSX/Common/BufferUtils.h
@ -0,0 +1,40 @@
+#pragma once
+#include <vector>
+#include "Emu/Memory/vm.h"
+#include "../RSXThread.h"
+
+
+struct VertexBufferFormat
+{
+	std::pair<size_t, size_t> range;
+	std::vector<size_t> attributeId;
+	size_t elementCount;
+	size_t stride;
+};
+
+
+/*
+ * Detect buffer containing interleaved vertex attribute.
+ * This minimizes memory upload size.
+ */
+std::vector<VertexBufferFormat> FormatVertexData(const RSXVertexData *m_vertex_data, size_t *vertex_data_size, size_t base_offset);
+
+/*
+ * Write vertex attributes to bufferMap, swapping data as required.
+ */
+void uploadVertexData(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, size_t baseOffset, void* bufferMap);
+
+/*
+ * If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
+ */
+bool isNativePrimitiveMode(unsigned m_draw_mode);
+
+/*
+ * Returns a fixed index count for emulated primitive, otherwise returns initial_index_count
+ */
+size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count);
+
+/*
+ * Write index information to bufferMap
+ */
+void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count);
--- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp
+++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp
@ -0,0 +1,544 @@
+#include "stdafx.h"
+#include "Emu/Memory/vm.h"
+#include "TextureUtils.h"
+#include "../RSXThread.h"
+#include "Utilities/Log.h"
+
+
+#define MAX2(a, b) ((a) > (b)) ? (a) : (b)
+
+unsigned LinearToSwizzleAddress(unsigned x, unsigned y, unsigned z, unsigned log2_width, unsigned log2_height, unsigned log2_depth)
+{
+	unsigned offset = 0;
+	unsigned shift_count = 0;
+	while (log2_width | log2_height | log2_depth) {
+		if (log2_width)
+		{
+			offset |= (x & 0x01) << shift_count;
+			x >>= 1;
+			++shift_count;
+			--log2_width;
+		}
+		if (log2_height)
+		{
+			offset |= (y & 0x01) << shift_count;
+			y >>= 1;
+			++shift_count;
+			--log2_height;
+		}
+		if (log2_depth)
+		{
+			offset |= (z & 0x01) << shift_count;
+			z >>= 1;
+			++shift_count;
+			--log2_depth;
+		}
+	}
+	return offset;
+}
+
+
+/**
+* Write data, assume src pixels are packed but not mipmaplevel
+*/
+inline std::vector<MipmapLevelInfo>
+writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
+{
+	std::vector<MipmapLevelInfo> Result;
+	size_t offsetInDst = 0, offsetInSrc = 0;
+	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
+	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
+	{
+		size_t rowPitch = align(currentWidth * blockSize, 256);
+
+		MipmapLevelInfo currentMipmapLevelInfo = {};
+		currentMipmapLevelInfo.offset = offsetInDst;
+		currentMipmapLevelInfo.height = currentHeight;
+		currentMipmapLevelInfo.width = currentWidth;
+		currentMipmapLevelInfo.rowPitch = rowPitch;
+		Result.push_back(currentMipmapLevelInfo);
+
+		for (unsigned row = 0; row < currentHeight; row++)
+			memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * widthInBlock * blockSize, currentWidth * blockSize);
+
+		offsetInDst += currentHeight * rowPitch;
+		offsetInDst = align(offsetInDst, 512);
+		offsetInSrc += currentHeight * widthInBlock * blockSize;
+		currentHeight = MAX2(currentHeight / 2, 1);
+		currentWidth = MAX2(currentWidth / 2, 1);
+	}
+	return Result;
+}
+
+/**
+* Write data, assume src pixels are swizzled and but not mipmaplevel
+*/
+inline std::vector<MipmapLevelInfo>
+writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
+{
+	std::vector<MipmapLevelInfo> Result;
+	size_t offsetInDst = 0, offsetInSrc = 0;
+	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
+	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
+	{
+		size_t rowPitch = align(currentWidth * blockSize, 256);
+
+		MipmapLevelInfo currentMipmapLevelInfo = {};
+		currentMipmapLevelInfo.offset = offsetInDst;
+		currentMipmapLevelInfo.height = currentHeight;
+		currentMipmapLevelInfo.width = currentWidth;
+		currentMipmapLevelInfo.rowPitch = rowPitch;
+		Result.push_back(currentMipmapLevelInfo);
+
+		u32 *castedSrc, *castedDst;
+		u32 log2width, log2height;
+
+		castedSrc = (u32*)src + offsetInSrc;
+		castedDst = (u32*)dst + offsetInDst;
+
+		log2width = (u32)(logf((float)currentWidth) / logf(2.f));
+		log2height = (u32)(logf((float)currentHeight) / logf(2.f));
+
+		for (int row = 0; row < currentHeight; row++)
+			for (int j = 0; j < currentWidth; j++)
+				castedDst[(row * rowPitch / 4) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
+
+		offsetInDst += currentHeight * rowPitch;
+		offsetInSrc += currentHeight * widthInBlock * blockSize;
+		currentHeight = MAX2(currentHeight / 2, 1);
+		currentWidth = MAX2(currentWidth / 2, 1);
+	}
+	return Result;
+}
+
+
+/**
+* Write data, assume compressed (DXTCn) format
+*/
+inline std::vector<MipmapLevelInfo>
+writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount)
+{
+	std::vector<MipmapLevelInfo> Result;
+	size_t offsetInDst = 0, offsetInSrc = 0;
+	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
+	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
+	{
+		size_t rowPitch = align(currentWidth * blockSize, 256);
+
+		MipmapLevelInfo currentMipmapLevelInfo = {};
+		currentMipmapLevelInfo.offset = offsetInDst;
+		currentMipmapLevelInfo.height = currentHeight * blockHeight;
+		currentMipmapLevelInfo.width = currentWidth * blockWidth;
+		currentMipmapLevelInfo.rowPitch = rowPitch;
+		Result.push_back(currentMipmapLevelInfo);
+
+		for (unsigned row = 0; row < currentHeight; row++)
+			memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * currentWidth * blockSize, currentWidth * blockSize);
+
+		offsetInDst += currentHeight * rowPitch;
+		offsetInDst = align(offsetInDst, 512);
+		offsetInSrc += currentHeight * currentWidth * blockSize;
+		currentHeight = MAX2(currentHeight / 2, 1);
+		currentWidth = MAX2(currentWidth / 2, 1);
+	}
+	return Result;
+}
+
+
+/**
+* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel
+*/
+inline std::vector<MipmapLevelInfo>
+write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
+{
+	std::vector<MipmapLevelInfo> Result;
+	size_t offsetInDst = 0, offsetInSrc = 0;
+	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
+	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
+	{
+		size_t rowPitch = align(currentWidth * blockSize, 256);
+
+		MipmapLevelInfo currentMipmapLevelInfo = {};
+		currentMipmapLevelInfo.offset = offsetInDst;
+		currentMipmapLevelInfo.height = currentHeight;
+		currentMipmapLevelInfo.width = currentWidth;
+		currentMipmapLevelInfo.rowPitch = rowPitch;
+		Result.push_back(currentMipmapLevelInfo);
+
+		u16 *castedSrc, *castedDst;
+		u16 log2width, log2height;
+
+		castedSrc = (u16*)src + offsetInSrc;
+		castedDst = (u16*)dst + offsetInDst;
+
+		log2width = (u32)(logf((float)currentWidth) / logf(2.f));
+		log2height = (u32)(logf((float)currentHeight) / logf(2.f));
+
+		for (int row = 0; row < currentHeight; row++)
+			for (int j = 0; j < currentWidth; j++)
+				castedDst[(row * rowPitch / 2) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
+
+		offsetInDst += currentHeight * rowPitch;
+		offsetInSrc += currentHeight * widthInBlock * blockSize;
+		currentHeight = MAX2(currentHeight / 2, 1);
+		currentWidth = MAX2(currentWidth / 2, 1);
+	}
+	return Result;
+}
+
+/**
+* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
+*/
+inline std::vector<MipmapLevelInfo>
+write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
+{
+	std::vector<MipmapLevelInfo> Result;
+	size_t offsetInDst = 0, offsetInSrc = 0;
+	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
+	size_t srcPitch = widthInBlock * blockSize;
+	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
+	{
+		size_t rowPitch = align(currentWidth * blockSize, 256);
+
+		MipmapLevelInfo currentMipmapLevelInfo = {};
+		currentMipmapLevelInfo.offset = offsetInDst;
+		currentMipmapLevelInfo.height = currentHeight;
+		currentMipmapLevelInfo.width = currentWidth;
+		currentMipmapLevelInfo.rowPitch = rowPitch;
+		Result.push_back(currentMipmapLevelInfo);
+
+		unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
+
+		for (unsigned row = 0; row < heightInBlock; row++)
+			for (int j = 0; j < currentWidth; j++)
+			{
+				u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
+				castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
+			}
+
+		offsetInDst += currentHeight * rowPitch;
+		offsetInSrc += currentHeight * widthInBlock * blockSize;
+		currentHeight = MAX2(currentHeight / 2, 1);
+		currentWidth = MAX2(currentWidth / 2, 1);
+	}
+	return Result;
+}
+
+/**
+* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
+*/
+inline std::vector<MipmapLevelInfo>
+write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
+{
+	std::vector<MipmapLevelInfo> Result;
+	size_t offsetInDst = 0, offsetInSrc = 0;
+	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
+	size_t srcPitch = widthInBlock * blockSize;
+	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
+	{
+		size_t rowPitch = align(currentWidth * blockSize, 256);
+
+		MipmapLevelInfo currentMipmapLevelInfo = {};
+		currentMipmapLevelInfo.offset = offsetInDst;
+		currentMipmapLevelInfo.height = currentHeight;
+		currentMipmapLevelInfo.width = currentWidth;
+		currentMipmapLevelInfo.rowPitch = rowPitch;
+		Result.push_back(currentMipmapLevelInfo);
+
+		unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
+
+		for (unsigned row = 0; row < heightInBlock; row++)
+			for (int j = 0; j < currentWidth * 4; j++)
+			{
+				u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
+				castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
+			}
+
+		offsetInDst += currentHeight * rowPitch;
+		offsetInSrc += currentHeight * widthInBlock * blockSize;
+		currentHeight = MAX2(currentHeight / 2, 1);
+		currentWidth = MAX2(currentWidth / 2, 1);
+	}
+	return Result;
+}
+
+
+size_t getPlacedTextureStorageSpace(const RSXTexture &texture, size_t rowPitchAlignement)
+{
+	size_t w = texture.GetWidth(), h = texture.GetHeight();
+
+	size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
+	int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
+
+	switch (format)
+	{
+	case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
+	case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
+	case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
+	case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
+	default:
+		LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
+		break;
+	case CELL_GCM_TEXTURE_B8:
+		blockSizeInByte = 1;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_A1R5G5B5:
+		blockSizeInByte = 2;
+		blockHeightInPixel = 1, blockWidthInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_A4R4G4B4:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_R5G6B5:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_A8R8G8B8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
+		blockSizeInByte = 8;
+		blockWidthInPixel = 4, blockHeightInPixel = 4;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
+		blockSizeInByte = 16;
+		blockWidthInPixel = 4, blockHeightInPixel = 4;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
+		blockSizeInByte = 16;
+		blockWidthInPixel = 4, blockHeightInPixel = 4;
+		break;
+	case CELL_GCM_TEXTURE_G8B8:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_R6G5B5:
+		// Not native
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH24_D8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH16:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_X16:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_Y16_X16:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_R5G5B5A1:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
+		blockSizeInByte = 8;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
+		blockSizeInByte = 16;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_X32_FLOAT:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_D1R5G5B5:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_D8R8G8B8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 2, blockHeightInPixel = 2;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 2, blockHeightInPixel = 2;
+		break;
+	}
+
+	size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
+	size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
+
+	size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement);
+
+	return rowPitch * heightInBlocks * 2; // * 2 for mipmap levels
+}
+
+std::vector<MipmapLevelInfo> uploadPlacedTexture(const RSXTexture &texture, size_t rowPitchAlignement, void* textureData)
+{
+	size_t w = texture.GetWidth(), h = texture.GetHeight();
+
+	int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
+
+	size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
+	switch (format)
+	{
+	case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
+	case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
+	case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
+	case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
+	default:
+		LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
+		break;
+	case CELL_GCM_TEXTURE_B8:
+		blockSizeInByte = 1;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_A1R5G5B5:
+		blockSizeInByte = 2;
+		blockHeightInPixel = 1, blockWidthInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_A4R4G4B4:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_R5G6B5:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_A8R8G8B8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
+		blockSizeInByte = 8;
+		blockWidthInPixel = 4, blockHeightInPixel = 4;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
+		blockSizeInByte = 16;
+		blockWidthInPixel = 4, blockHeightInPixel = 4;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
+		blockSizeInByte = 16;
+		blockWidthInPixel = 4, blockHeightInPixel = 4;
+		break;
+	case CELL_GCM_TEXTURE_G8B8:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_R6G5B5:
+		// Not native
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH24_D8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH16:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_X16:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_Y16_X16:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_R5G5B5A1:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
+		blockSizeInByte = 8;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
+		blockSizeInByte = 16;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_X32_FLOAT:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_D1R5G5B5:
+		blockSizeInByte = 2;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_D8R8G8B8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 1, blockHeightInPixel = 1;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 2, blockHeightInPixel = 2;
+		break;
+	case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
+		blockSizeInByte = 4;
+		blockWidthInPixel = 2, blockHeightInPixel = 2;
+		break;
+	}
+
+	size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
+	size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
+
+	std::vector<MipmapLevelInfo> mipInfos;
+
+	const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation());
+	auto pixels = vm::get_ptr<const u8>(texaddr);
+	bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN);
+	switch (format)
+	{
+	case CELL_GCM_TEXTURE_A8R8G8B8:
+		if (is_swizzled)
+			return writeTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
+		else
+			return writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
+	case CELL_GCM_TEXTURE_A1R5G5B5:
+	case CELL_GCM_TEXTURE_A4R4G4B4:
+	case CELL_GCM_TEXTURE_R5G6B5:
+		if (is_swizzled)
+			return write16bTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
+		else
+			return write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
+	case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
+		return write16bX4TexelsGeneric((char*)pixels, (char*)textureData, w, h, 8, texture.GetMipmap());
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
+	case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
+		return writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.GetMipmap());
+	default:
+		return writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.GetMipmap());
+	}
+
+}
--- a/rpcs3/Emu/RSX/Common/TextureUtils.h
+++ b/rpcs3/Emu/RSX/Common/TextureUtils.h
@ -0,0 +1,26 @@
+#pragma once
+#include "../RSXTexture.h"
+#include <vector>
+
+struct MipmapLevelInfo
+{
+	size_t offset;
+	size_t width;
+	size_t height;
+	size_t rowPitch;
+};
+
+unsigned LinearToSwizzleAddress(unsigned x, unsigned y, unsigned z, unsigned log2_width, unsigned log2_height, unsigned log2_depth);
+
+/**
+* Get size to store texture in a linear fashion.
+* Storage is assumed to use a rowPitchAlignement boundary for every row of texture.
+*/
+size_t getPlacedTextureStorageSpace(const RSXTexture &texture, size_t rowPitchAlignement);
+
+/**
+* Write texture data to textureData.
+* Data are not packed, they are stored per rows using rowPitchAlignement.
+* Similarly, offset for every mipmaplevel is aligned to rowPitchAlignement boundary.
+*/
+std::vector<MipmapLevelInfo> uploadPlacedTexture(const RSXTexture &texture, size_t rowPitchAlignement, void* textureData);
--- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
@ -5,6 +5,7 @@

 #include "D3D12GSRender.h"
 #include "d3dx12.h"
+#include "../Common/BufferUtils.h"

 const int g_vertexCount = 32;

@ -90,14 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size)
 	}
 }

-struct VertexBufferFormat
-{
-	std::pair<size_t, size_t> range;
-	std::vector<size_t> attributeId;
-	size_t elementCount;
-	size_t stride;
-};
-
 static
 std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(ID3D12Device *device, const std::vector<VertexBufferFormat> &vertexBufferFormat, const RSXVertexData *m_vertex_data, size_t baseOffset)
 {
@ -122,89 +115,9 @@ std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(ID3D12Device *device, const st
 	return result;
 }

-template<typename IndexType, typename DstType, typename SrcType>
-void expandIndexedTriangleFan(DstType *dst, const SrcType *src, size_t indexCount)
-{
-	IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
-	const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
-	for (unsigned i = 0; i < indexCount - 2; i++)
-	{
-		typedDst[3 * i] = typedSrc[0];
-		typedDst[3 * i + 1] = typedSrc[i + 2 - 1];
-		typedDst[3 * i + 2] = typedSrc[i + 2];
-	}
-}
-
-template<typename IndexType, typename DstType, typename SrcType>
-void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount)
-{
-	IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
-	const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
-	for (unsigned i = 0; i < indexCount / 4; i++)
-	{
-		// First triangle
-		typedDst[6 * i] = typedSrc[4 * i];
-		typedDst[6 * i + 1] = typedSrc[4 * i + 1];
-		typedDst[6 * i + 2] = typedSrc[4 * i + 2];
-		// Second triangle
-		typedDst[6 * i + 3] = typedSrc[4 * i + 2];
-		typedDst[6 * i + 4] = typedSrc[4 * i + 3];
-		typedDst[6 * i + 5] = typedSrc[4 * i];
-	}
-}
-
-
 // D3D12GS member handling buffers


-
-#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
-#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
-
-static
-bool overlaps(const std::pair<size_t, size_t> &range1, const std::pair<size_t, size_t> &range2)
-{
-	return !(range1.second < range2.first || range2.second < range1.first);
-}
-
-static
-std::vector<VertexBufferFormat> FormatVertexData(const RSXVertexData *m_vertex_data, size_t base_offset)
-{
-	std::vector<VertexBufferFormat> Result;
-	for (size_t i = 0; i < 32; ++i)
-	{
-		const RSXVertexData &vertexData = m_vertex_data[i];
-		if (!vertexData.IsEnabled()) continue;
-
-		size_t elementCount = vertexData.data.size() / (vertexData.size * vertexData.GetTypeSize());
-		// If there is a single element, stride is 0, use the size of element instead
-		size_t stride = vertexData.stride;
-		size_t elementSize = vertexData.GetTypeSize();
-		std::pair<size_t, size_t> range = std::make_pair(vertexData.addr + base_offset, vertexData.addr + base_offset + elementSize * vertexData.size + (elementCount - 1) * stride - 1);
-		bool isMerged = false;
-
-		for (VertexBufferFormat &vbf : Result)
-		{
-			if (overlaps(vbf.range, range) && vbf.stride == stride)
-			{
-				// Extend buffer if necessary
-				vbf.range.first = MIN2(vbf.range.first, range.first);
-				vbf.range.second = MAX2(vbf.range.second, range.second);
-				vbf.elementCount = MAX2(vbf.elementCount, elementCount);
-
-				vbf.attributeId.push_back(i);
-				isMerged = true;
-				break;
-			}
-		}
-		if (isMerged)
-			continue;
-		VertexBufferFormat newRange = { range, std::vector<size_t>{ i }, elementCount, stride };
-		Result.emplace_back(newRange);
-	}
-	return Result;
-}
-
 /**
 * Suballocate a new vertex buffer with attributes from vbf using vertexIndexHeap as storage heap.
 */
@ -221,67 +134,15 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const VertexBufferFormat &vbf, cons
 	void *buffer;
 	ThrowIfFailed(vertexIndexHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
 	void *bufferMap = (char*)buffer + heapOffset;
-	for (int vertex = 0; vertex < vbf.elementCount; vertex++)
-	{
-		for (size_t attributeId : vbf.attributeId)
-		{
-			if (!vertexData[attributeId].addr)
-			{
-				memcpy(bufferMap, vertexData[attributeId].data.data(), vertexData[attributeId].data.size());
-				continue;
-			}
-			size_t offset = (size_t)vertexData[attributeId].addr + baseOffset - vbf.range.first;
-			size_t tsize = vertexData[attributeId].GetTypeSize();
-			size_t size = vertexData[attributeId].size;
-			auto src = vm::get_ptr<const u8>(vertexData[attributeId].addr + (u32)baseOffset + (u32)vbf.stride * vertex);
-			char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
-
-			switch (tsize)
-			{
-			case 1:
-			{
-				memcpy(dst, src, size);
-				break;
-			}
-
-			case 2:
-			{
-				const u16* c_src = (const u16*)src;
-				u16* c_dst = (u16*)dst;
-				for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
-				break;
-			}
-
-			case 4:
-			{
-				const u32* c_src = (const u32*)src;
-				u32* c_dst = (u32*)dst;
-				for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
-				break;
-			}
-			}
-		}
-	}
-
+	uploadVertexData(vbf, vertexData, baseOffset, bufferMap);
 	vertexIndexHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
 	return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
 }

-static bool
-isContained(const std::vector<std::pair<u32, u32> > &ranges, const std::pair<u32, u32> &range)
-{
-	for (auto &r : ranges)
-	{
-		if (r == range)
-			return true;
-	}
-	return false;
-}
-
 std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw)
 {
 	std::vector<D3D12_VERTEX_BUFFER_VIEW> result;
-	const std::vector<VertexBufferFormat> &vertexBufferFormat = FormatVertexData(m_vertex_data, m_vertex_data_base_offset);
+	const std::vector<VertexBufferFormat> &vertexBufferFormat = FormatVertexData(m_vertex_data, m_vertexBufferSize, m_vertex_data_base_offset);
 	m_IASet = getIALayout(m_device.Get(), vertexBufferFormat, m_vertex_data, m_vertex_data_base_offset);

 	const u32 data_offset = indexed_draw ? 0 : m_draw_array_first;
@ -310,29 +171,9 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool in
 D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
 {
 	D3D12_INDEX_BUFFER_VIEW indexBufferView = {};
-	// Only handle quads and triangle fan now
-	bool forcedIndexBuffer = false;
-	switch (m_draw_mode - 1)
-	{
-	default:
-	case GL_POINTS:
-	case GL_LINES:
-	case GL_LINE_LOOP:
-	case GL_LINE_STRIP:
-	case GL_TRIANGLES:
-	case GL_TRIANGLE_STRIP:
-	case GL_QUAD_STRIP:
-	case GL_POLYGON:
-		forcedIndexBuffer = false;
-		break;
-	case GL_TRIANGLE_FAN:
-	case GL_QUADS:
-		forcedIndexBuffer = true;
-		break;
-	}

 	// No need for index buffer
-	if (!indexed_draw && !forcedIndexBuffer)
+	if (!indexed_draw && isNativePrimitiveMode(m_draw_mode))
 	{
 		m_renderingInfo.m_indexed = false;
 		m_renderingInfo.m_count = m_draw_array_count;
@ -366,35 +207,10 @@ D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
 	}

 	// Index count
-	if (indexed_draw && !forcedIndexBuffer)
-		m_renderingInfo.m_count = m_indexed_array.m_data.size() / indexSize;
-	else if (indexed_draw && forcedIndexBuffer)
-	{
-		switch (m_draw_mode - 1)
-		{
-		case GL_TRIANGLE_FAN:
-			m_renderingInfo.m_count = (m_indexed_array.m_data.size() - 2) * 3;
-			break;
-		case GL_QUADS:
-			m_renderingInfo.m_count = 6 * m_indexed_array.m_data.size() / (4 * indexSize);
-			break;
-		}
-	}
-	else
-	{
-		switch (m_draw_mode - 1)
-		{
-		case GL_TRIANGLE_FAN:
-			m_renderingInfo.m_count = (m_draw_array_count - 2) * 3;
-			break;
-		case GL_QUADS:
-			m_renderingInfo.m_count = m_draw_array_count * 6 / 4;
-			break;
-		}
-	}
+	m_renderingInfo.m_count = getIndexCount(m_draw_mode, indexed_draw ? (u32)(m_indexed_array.m_data.size() / indexSize) : m_draw_array_count);

 	// Base vertex
-	if (!indexed_draw && forcedIndexBuffer)
+	if (!indexed_draw && isNativePrimitiveMode(m_draw_mode))
 		m_renderingInfo.m_baseVertex = m_draw_array_first;
 	else
 		m_renderingInfo.m_baseVertex = 0;
@ -408,70 +224,9 @@ D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
 	void *buffer;
 	ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
 	void *bufferMap = (char*)buffer + heapOffset;
-	if (indexed_draw && !forcedIndexBuffer)
-		streamBuffer(bufferMap, m_indexed_array.m_data.data(), subBufferSize);
-	else if (indexed_draw && forcedIndexBuffer)
-	{
-		// Only quads supported now
-		switch (m_draw_mode - 1)
-		{
-		case GL_TRIANGLE_FAN:
-			switch (m_indexed_array.m_type)
-			{
-			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
-				expandIndexedTriangleFan<unsigned int>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4);
-				break;
-			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
-				expandIndexedTriangleFan<unsigned short>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2);
-				break;
-			}
-			break;
-		case GL_QUADS:
-			switch (m_indexed_array.m_type)
-			{
-			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
-				expandIndexedQuads<unsigned int>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4);
-				break;
-			case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
-				expandIndexedQuads<unsigned short>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2);
-				break;
-			}
-			break;
-		}
-	}
-	else
-	{
-		unsigned short *typedDst = static_cast<unsigned short *>(bufferMap);
-		switch (m_draw_mode - 1)
-		{
-		case GL_TRIANGLE_FAN:
-			for (unsigned i = 0; i < (m_draw_array_count - 2); i++)
-			{
-				typedDst[3 * i] = 0;
-				typedDst[3 * i + 1] = i + 2 - 1;
-				typedDst[3 * i + 2] = i + 2;
-			}
-			break;
-		case GL_QUADS:
-			for (unsigned i = 0; i < m_draw_array_count / 4; i++)
-			{
-				// First triangle
-				typedDst[6 * i] = 4 * i;
-				typedDst[6 * i + 1] = 4 * i + 1;
-				typedDst[6 * i + 2] = 4 * i + 2;
-				// Second triangle
-				typedDst[6 * i + 3] = 4 * i + 2;
-				typedDst[6 * i + 4] = 4 * i + 3;
-				typedDst[6 * i + 5] = 4 * i;
-			}
-			break;
-		}
-
-	}
+	uploadIndexData(m_draw_mode, m_indexed_array.m_type, indexed_draw ? m_indexed_array.m_data.data() : nullptr, bufferMap, indexed_draw ? (u32)(m_indexed_array.m_data.size() / indexSize) : m_draw_array_count);
 	m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
-
 	m_timers.m_bufferUploadSize += subBufferSize;
-
 	indexBufferView.SizeInBytes = (UINT)subBufferSize;
 	indexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
 	return indexBufferView;
@ -505,22 +260,20 @@ void D3D12GSRender::setScaleOffset()

 	// Scale offset buffer
 	// Separate constant buffer
-	D3D12_RANGE range = { heapOffset, heapOffset + 256 };
-
 	void *scaleOffsetMap;
-	ThrowIfFailed(m_constantsData.m_heap->Map(0, &range, &scaleOffsetMap));
+	ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256), &scaleOffsetMap));
 	streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float));
 	int isAlphaTested = m_set_alpha_test;
 	memcpy((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int));
 	memcpy((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &m_alpha_ref, sizeof(float));
-	m_constantsData.m_heap->Unmap(0, &range);
+	m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256));

 	D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
 	constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
 	constantBufferViewDesc.SizeInBytes = (UINT)256;
-	D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
-	Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
-	m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle);
+	m_device->CreateConstantBufferView(&constantBufferViewDesc,
+		CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart())
+		.Offset((INT)getCurrentResourceStorage().m_currentScaleOffsetBufferIndex, g_descriptorStrideSRVCBVUAV));
 }

 void D3D12GSRender::FillVertexShaderConstantsBuffer()
@ -536,10 +289,8 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
 	assert(m_constantsData.canAlloc(bufferSize));
 	size_t heapOffset = m_constantsData.alloc(bufferSize);

-	D3D12_RANGE range = { heapOffset, heapOffset + bufferSize };
-
 	void *constantsBufferMap;
-	ThrowIfFailed(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap));
+	ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap));
 	for (const auto &vertexConstants : m_vertexConstants)
 	{
 		float data[4] = {
@ -550,14 +301,14 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
 		};
 		streamToBuffer((char*)constantsBufferMap + heapOffset + vertexConstants.first, data, 4 * sizeof(float));
 	}
-	m_constantsData.m_heap->Unmap(0, &range);
+	m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize));

 	D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
 	constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
 	constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
-	D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart();
-	Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
-	m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle);
+	m_device->CreateConstantBufferView(&constantBufferViewDesc,
+		CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
+		.Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV));
 }

 void D3D12GSRender::FillPixelShaderConstantsBuffer()
@ -571,11 +322,9 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer()
 	assert(m_constantsData.canAlloc(bufferSize));
 	size_t heapOffset = m_constantsData.alloc(bufferSize);

-	D3D12_RANGE range = { heapOffset, heapOffset + bufferSize };
-
 	size_t offset = 0;
 	void *constantsBufferMap;
-	ThrowIfFailed(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap));
+	ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap));
 	for (size_t offsetInFP : fragmentOffset)
 	{
 		u32 vector[4];
@ -613,14 +362,14 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer()
 		streamToBuffer((char*)constantsBufferMap + heapOffset + offset, vector, 4 * sizeof(u32));
 		offset += 4 * sizeof(u32);
 	}
-	m_constantsData.m_heap->Unmap(0, &range);
+	m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize));

 	D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
 	constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
 	constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
-	D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart();
-	Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
-	m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle);
+	m_device->CreateConstantBufferView(&constantBufferViewDesc,
+		CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
+		.Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV));
 }


--- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
@ -7,12 +7,15 @@
 #include <thread>
 #include <chrono>
 #include "d3dx12.h"
+#include <d3d11on12.h>

 PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice;
 PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface;
 PFN_D3D12_SERIALIZE_ROOT_SIGNATURE wrapD3D12SerializeRootSignature;
+PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice;

 static HMODULE D3D12Module;
+static HMODULE D3D11Module;

 static void loadD3D12FunctionPointers()
 {
@ -20,11 +23,14 @@ static void loadD3D12FunctionPointers()
 	wrapD3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(D3D12Module, "D3D12CreateDevice");
 	wrapD3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(D3D12Module, "D3D12GetDebugInterface");
 	wrapD3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)GetProcAddress(D3D12Module, "D3D12SerializeRootSignature");
+	D3D11Module = LoadLibrary(L"d3d11.dll");
+	wrapD3D11On12CreateDevice = (PFN_D3D11ON12_CREATE_DEVICE)GetProcAddress(D3D11Module, "D3D11On12CreateDevice");
 }

 static void unloadD3D12FunctionPointers()
 {
 	FreeLibrary(D3D12Module);
+	FreeLibrary(D3D11Module);
 }

 GetGSFrameCb2 GetGSFrame = nullptr;
@ -95,6 +101,8 @@ void D3D12GSRender::ResourceStorage::WaitAndClean()

 void D3D12GSRender::ResourceStorage::Release()
 {
+	for (auto tmp : m_dirtyTextures)
+		tmp->Release();
 	// NOTE: Should be released only after gfx pipeline last command has been finished.
 	CloseHandle(m_frameFinishedHandle);
 }
@ -224,49 +232,28 @@ D3D12GSRender::D3D12GSRender()
 	// Common root signatures
 	for (unsigned textureCount = 0; textureCount < 17; textureCount++)
 	{
-		D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {};
-		// Scale Offset data
-		descriptorRange[0].BaseShaderRegister = 0;
-		descriptorRange[0].NumDescriptors = 1;
-		descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
-		// Constants
-		descriptorRange[1].BaseShaderRegister = 1;
-		descriptorRange[1].NumDescriptors = 2;
-		descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
-		// Textures
-		descriptorRange[2].BaseShaderRegister = 0;
-		descriptorRange[2].NumDescriptors = textureCount;
-		descriptorRange[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
-		// Samplers
-		descriptorRange[3].BaseShaderRegister = 0;
-		descriptorRange[3].NumDescriptors = textureCount;
-		descriptorRange[3].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
-		D3D12_ROOT_PARAMETER RP[4] = {};
-		RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
-		RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
-		RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0];
-		RP[0].DescriptorTable.NumDescriptorRanges = 1;
-		RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
-		RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
-		RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1];
-		RP[1].DescriptorTable.NumDescriptorRanges = 1;
-		RP[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
-		RP[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
-		RP[2].DescriptorTable.pDescriptorRanges = &descriptorRange[2];
-		RP[2].DescriptorTable.NumDescriptorRanges = 1;
-		RP[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
-		RP[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
-		RP[3].DescriptorTable.pDescriptorRanges = &descriptorRange[3];
-		RP[3].DescriptorTable.NumDescriptorRanges = 1;
-
-		D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {};
-		rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
-		rootSignatureDesc.NumParameters = (textureCount > 0) ? 4 : 2;
-		rootSignatureDesc.pParameters = RP;
+		CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
+		{
+			// Scale Offset data
+			CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0),
+			// Constants
+			CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1),
+			// Textures
+			CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, textureCount, 0),
+			// Samplers
+			CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, textureCount, 0),
+		};
+		CD3DX12_ROOT_PARAMETER RP[4];
+		RP[0].InitAsDescriptorTable(1, &descriptorRange[0]);
+		RP[1].InitAsDescriptorTable(1, &descriptorRange[1]);
+		RP[2].InitAsDescriptorTable(1, &descriptorRange[2]);
+		RP[3].InitAsDescriptorTable(1, &descriptorRange[3]);

 		Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
 		Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
-		ThrowIfFailed(wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
+		ThrowIfFailed(wrapD3D12SerializeRootSignature(
+			&CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 4 : 2, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
+			D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));

 		m_device->CreateRootSignature(0,
 			rootSignatureBlob->GetBufferPointer(),
@ -299,7 +286,7 @@ D3D12GSRender::D3D12GSRender()

 	m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
 	m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
-	m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS);
+	m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);

 	if (Ini.GSOverlay.GetValue())
 		InitD2DStructures();
@ -475,7 +462,6 @@ void D3D12GSRender::Draw()
 	std::chrono::time_point<std::chrono::system_clock> vertexIndexDurationStart = std::chrono::system_clock::now();

 	// Init vertex count
-	// TODO: Very hackish, clean this
 	if (m_indexed_array.m_count)
 	{
 		for (u32 i = 0; i < m_vertex_count; ++i)
@ -484,7 +470,7 @@ void D3D12GSRender::Draw()
 			if (!m_vertex_data[i].addr) continue;

 			const u32 tsize = m_vertex_data[i].GetTypeSize();
-			m_vertex_data[i].data.resize((m_indexed_array.index_min + m_indexed_array.index_max - m_indexed_array.index_min + 1) * tsize * m_vertex_data[i].size);
+			m_vertexBufferSize[i] = (m_indexed_array.index_min + m_indexed_array.index_max - m_indexed_array.index_min + 1) * tsize * m_vertex_data[i].size;
 		}
 	}
 	else
@ -495,7 +481,7 @@ void D3D12GSRender::Draw()
 			if (!m_vertex_data[i].addr) continue;

 			const u32 tsize = m_vertex_data[i].GetTypeSize();
-			m_vertex_data[i].data.resize((m_draw_array_first + m_draw_array_count) * tsize * m_vertex_data[i].size);
+			m_vertexBufferSize[i] = (m_draw_array_first + m_draw_array_count) * tsize * m_vertex_data[i].size;
 		}
 	}

@ -727,7 +713,7 @@ void D3D12GSRender::Flip()

 		size_t w = 0, h = 0, rowPitch = 0;

-		ID3D12Resource *stagingTexture;
+		size_t offset = 0;
 		if (m_read_buffer)
 		{
 			CellGcmDisplayInfo* buffers = vm::get_ptr<CellGcmDisplayInfo>(m_gcm_buffers_addr);
@ -741,21 +727,13 @@ void D3D12GSRender::Flip()
 			assert(m_textureUploadData.canAlloc(textureSize));
 			size_t heapOffset = m_textureUploadData.alloc(textureSize);

-			ThrowIfFailed(m_device->CreatePlacedResource(
-				m_textureUploadData.m_heap,
-				heapOffset,
-				&CD3DX12_RESOURCE_DESC::Buffer(textureSize),
-				D3D12_RESOURCE_STATE_GENERIC_READ,
-				nullptr,
-				IID_PPV_ARGS(&stagingTexture)
-				));
-			getCurrentResourceStorage().m_singleFrameLifetimeResources.push_back(stagingTexture);
-
-			void *dstBuffer;
-			ThrowIfFailed(stagingTexture->Map(0, nullptr, &dstBuffer));
+			void *buffer;
+			ThrowIfFailed(m_textureUploadData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer));
+			void *dstBuffer = (char*)buffer + heapOffset;
 			for (unsigned row = 0; row < h; row++)
 				memcpy((char*)dstBuffer + row * rowPitch, (char*)src_buffer + row * w * 4, w * 4);
-			stagingTexture->Unmap(0, nullptr);
+			m_textureUploadData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize));
+			offset = heapOffset;
 		}

 		ThrowIfFailed(
@ -769,7 +747,7 @@ void D3D12GSRender::Flip()
 				)
 			);
 		getCurrentResourceStorage().m_commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.m_RAMFramebuffer.Get(), 0), 0, 0, 0,
-			&CD3DX12_TEXTURE_COPY_LOCATION(stagingTexture, { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)rowPitch} }), nullptr);
+			&CD3DX12_TEXTURE_COPY_LOCATION(m_textureUploadData.m_heap, { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)rowPitch} }), nullptr);

 		getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.m_RAMFramebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
 		resourceToFlip = storage.m_RAMFramebuffer.Get();
@ -860,6 +838,10 @@ void D3D12GSRender::Flip()
 	if(Ini.GSOverlay.GetValue())
 		renderOverlay();

+	ResetTimer();
+
+	std::chrono::time_point<std::chrono::system_clock> flipStart = std::chrono::system_clock::now();
+
 	ThrowIfFailed(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0));
 	// Add an event signaling queue completion

@ -890,15 +872,20 @@ void D3D12GSRender::Flip()
 	ResourceStorage &newStorage = getCurrentResourceStorage();

 	newStorage.WaitAndClean();
-	m_constantsData.m_getPos.store(newStorage.m_getPosConstantsHeap, std::memory_order_release);
-	m_vertexIndexData.m_getPos.store(newStorage.m_getPosVertexIndexHeap, std::memory_order_release);
-	m_textureUploadData.m_getPos.store(newStorage.m_getPosTextureUploadHeap, std::memory_order_release);
-	m_readbackResources.m_getPos.store(newStorage.m_getPosReadbackHeap, std::memory_order_release);
-	m_UAVHeap.m_getPos.store(newStorage.m_getPosUAVHeap, std::memory_order_release);
+	if (newStorage.m_inUse)
+	{
+		m_constantsData.m_getPos = newStorage.m_getPosConstantsHeap;
+		m_vertexIndexData.m_getPos = newStorage.m_getPosVertexIndexHeap;
+		m_textureUploadData.m_getPos = newStorage.m_getPosTextureUploadHeap;
+		m_readbackResources.m_getPos = newStorage.m_getPosReadbackHeap;
+		m_UAVHeap.m_getPos = newStorage.m_getPosUAVHeap;
+	}

 	m_frame->Flip(nullptr);

-	ResetTimer();
+
+	std::chrono::time_point<std::chrono::system_clock> flipEnd = std::chrono::system_clock::now();
+	m_timers.m_flipDuration += std::chrono::duration_cast<std::chrono::microseconds>(flipEnd - flipStart).count();
 }

 void D3D12GSRender::ResetTimer()
@ -911,6 +898,7 @@ void D3D12GSRender::ResetTimer()
 	m_timers.m_programLoadDuration = 0;
 	m_timers.m_constantsDuration = 0;
 	m_timers.m_textureDuration = 0;
+	m_timers.m_flipDuration = 0;
 }

 D3D12GSRender::ResourceStorage& D3D12GSRender::getCurrentResourceStorage()
--- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h
+++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h
@ -119,14 +119,14 @@ struct DataHeap
 	T *m_heap;
 	size_t m_size;
 	size_t m_putPos; // Start of free space
-	std::atomic<size_t> m_getPos; // End of free space
+	size_t m_getPos; // End of free space

 	void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
 	{
 		m_size = heapSize;
 		m_heap = InitHeap<T>::Init(device, heapSize, type, flags);
 		m_putPos = 0;
-		m_getPos = m_size - 1;
+		m_getPos = heapSize - 1;
 	}

 	/**
@ -135,7 +135,7 @@ struct DataHeap
 	bool canAlloc(size_t size) const
 	{
 		size_t allocSize = align(size, Alignment);
-		size_t currentGetPos = m_getPos.load();
+		size_t currentGetPos = m_getPos;
 		if (m_putPos + allocSize < m_size)
 		{
 			// range before get
@ -232,6 +232,8 @@ private:
 	PipelineStateObjectCache m_cachePSO;
 	std::pair<ID3D12PipelineState *, size_t> *m_PSO;

+	size_t m_vertexBufferSize[32];
+
 	struct
 	{
 		size_t m_drawCallDuration;
@ -242,6 +244,7 @@ private:
 		size_t m_programLoadDuration;
 		size_t m_constantsDuration;
 		size_t m_textureDuration;
+		size_t m_flipDuration;
 	} m_timers;

 	void ResetTimer();
@ -332,7 +335,7 @@ private:
 	// Vertex storage
 	DataHeap<ID3D12Resource, 65536> m_vertexIndexData;
 	// Texture storage
-	DataHeap<ID3D12Heap, 65536> m_textureUploadData;
+	DataHeap<ID3D12Resource, 65536> m_textureUploadData;
 	DataHeap<ID3D12Heap, 65536> m_UAVHeap;
 	DataHeap<ID3D12Heap, 65536> m_readbackResources;

--- a/rpcs3/Emu/RSX/D3D12/D3D12Overlay.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Overlay.cpp
@ -22,11 +22,12 @@ ComPtr<ID2D1SolidColorBrush> m_textBrush;

 #pragma comment (lib, "d2d1.lib")
 #pragma comment (lib, "dwrite.lib")
-#pragma comment (lib, "d3d11.lib")
+
+extern PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice;

 void D3D12GSRender::InitD2DStructures()
 {
-	D3D11On12CreateDevice(
+	wrapD3D11On12CreateDevice(
 		m_device.Get(),
 		D3D11_CREATE_DEVICE_BGRA_SUPPORT,
 		nullptr,
@ -129,6 +130,7 @@ void D3D12GSRender::renderOverlay()
 	std::wstring constantDuration = L"Constants : " + std::to_wstring(m_timers.m_constantsDuration) + L" us (" + std::to_wstring(100.f * constantsPercent) + L" %)";
 	float rttPercent = (float)m_timers.m_rttDuration / (float)m_timers.m_drawCallDuration;
 	std::wstring rttDuration = L"RTT : " + std::to_wstring(m_timers.m_rttDuration) + L" us (" + std::to_wstring(100.f * rttPercent) + L" %)";
+	std::wstring flipDuration = L"Flip : " + std::to_wstring(m_timers.m_flipDuration) + L" us";

 	std::wstring count = L"Draw count : " + std::to_wstring(m_timers.m_drawCallCount);

@ -195,6 +197,13 @@ void D3D12GSRender::renderOverlay()
 		&D2D1::RectF(0, 98, rtSize.width, rtSize.height),
 		m_textBrush.Get()
 		);
+	m_d2dDeviceContext->DrawTextW(
+		flipDuration.c_str(),
+		(UINT32)flipDuration.size(),
+		m_textFormat.Get(),
+		&D2D1::RectF(0, 112, rtSize.width, rtSize.height),
+		m_textBrush.Get()
+		);
 	m_d2dDeviceContext->EndDraw();

 	// Release our wrapped render target resource. Releasing 
--- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp
@ -2,40 +2,9 @@
 #if defined(DX12_SUPPORT)
 #include "D3D12GSRender.h"
 #include "d3dx12.h"
+#include "../Common/TextureUtils.h"
 // For clarity this code deals with texture but belongs to D3D12GSRender class

-
-static
-u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth)
-{
-	u32 offset = 0;
-	u32 shift_count = 0;
-	while (log2_width | log2_height | log2_depth) {
-		if (log2_width)
-		{
-			offset |= (x & 0x01) << shift_count;
-			x >>= 1;
-			++shift_count;
-			--log2_width;
-		}
-		if (log2_height)
-		{
-			offset |= (y & 0x01) << shift_count;
-			y >>= 1;
-			++shift_count;
-			--log2_height;
-		}
-		if (log2_depth)
-		{
-			offset |= (z & 0x01) << shift_count;
-			z >>= 1;
-			++shift_count;
-			--log2_depth;
-		}
-	}
-	return offset;
-}
-
 static
 D3D12_COMPARISON_FUNC getSamplerCompFunc[] =
 {
@ -151,239 +120,6 @@ D3D12_SAMPLER_DESC getSamplerDesc(const RSXTexture &texture)
 	return samplerDesc;
 }

-struct MipmapLevelInfo
-{
-	size_t offset;
-	size_t width;
-	size_t height;
-	size_t rowPitch;
-};
-
-#define MAX2(a, b) ((a) > (b)) ? (a) : (b)
-
-/**
- * Write data, assume src pixels are packed but not mipmaplevel
- */
-static std::vector<MipmapLevelInfo>
-writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
-{
-	std::vector<MipmapLevelInfo> Result;
-	size_t offsetInDst = 0, offsetInSrc = 0;
-	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
-	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
-	{
-		size_t rowPitch = align(currentWidth * blockSize, 256);
-
-		MipmapLevelInfo currentMipmapLevelInfo = {};
-		currentMipmapLevelInfo.offset = offsetInDst;
-		currentMipmapLevelInfo.height = currentHeight;
-		currentMipmapLevelInfo.width = currentWidth;
-		currentMipmapLevelInfo.rowPitch = rowPitch;
-		Result.push_back(currentMipmapLevelInfo);
-
-		for (unsigned row = 0; row < currentHeight; row++)
-			memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * widthInBlock * blockSize, currentWidth * blockSize);
-
-		offsetInDst += currentHeight * rowPitch;
-		offsetInDst = align(offsetInDst, 512);
-		offsetInSrc += currentHeight * widthInBlock * blockSize;
-		currentHeight = MAX2(currentHeight / 2, 1);
-		currentWidth = MAX2(currentWidth / 2, 1);
-	}
-	return Result;
-}
-
-/**
-* Write data, assume src pixels are swizzled and but not mipmaplevel
-*/
-static std::vector<MipmapLevelInfo>
-writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
-{
-	std::vector<MipmapLevelInfo> Result;
-	size_t offsetInDst = 0, offsetInSrc = 0;
-	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
-	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
-	{
-		size_t rowPitch = align(currentWidth * blockSize, 256);
-
-		MipmapLevelInfo currentMipmapLevelInfo = {};
-		currentMipmapLevelInfo.offset = offsetInDst;
-		currentMipmapLevelInfo.height = currentHeight;
-		currentMipmapLevelInfo.width = currentWidth;
-		currentMipmapLevelInfo.rowPitch = rowPitch;
-		Result.push_back(currentMipmapLevelInfo);
-
-		u32 *castedSrc, *castedDst;
-		u32 log2width, log2height;
-
-		castedSrc = (u32*)src + offsetInSrc;
-		castedDst = (u32*)dst + offsetInDst;
-
-		log2width = (u32)(logf((float)currentWidth) / logf(2.f));
-		log2height = (u32)(logf((float)currentHeight) / logf(2.f));
-
-		for (int row = 0; row < currentHeight; row++)
-			for (int j = 0; j < currentWidth; j++)
-				castedDst[(row * rowPitch / 4) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
-
-		offsetInDst += currentHeight * rowPitch;
-		offsetInSrc += currentHeight * widthInBlock * blockSize;
-		currentHeight = MAX2(currentHeight / 2, 1);
-		currentWidth = MAX2(currentWidth / 2, 1);
-	}
-	return Result;
-}
-
-
-/**
-* Write data, assume compressed (DXTCn) format
-*/
-static std::vector<MipmapLevelInfo>
-writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount)
-{
-	std::vector<MipmapLevelInfo> Result;
-	size_t offsetInDst = 0, offsetInSrc = 0;
-	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
-	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
-	{
-		size_t rowPitch = align(currentWidth * blockSize, 256);
-
-		MipmapLevelInfo currentMipmapLevelInfo = {};
-		currentMipmapLevelInfo.offset = offsetInDst;
-		currentMipmapLevelInfo.height = currentHeight * blockHeight;
-		currentMipmapLevelInfo.width = currentWidth * blockWidth;
-		currentMipmapLevelInfo.rowPitch = rowPitch;
-		Result.push_back(currentMipmapLevelInfo);
-
-		for (unsigned row = 0; row < currentHeight; row++)
-			memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * currentWidth * blockSize, currentWidth * blockSize);
-
-		offsetInDst += currentHeight * rowPitch;
-		offsetInDst = align(offsetInDst, 512);
-		offsetInSrc += currentHeight * currentWidth * blockSize;
-		currentHeight = MAX2(currentHeight / 2, 1);
-		currentWidth = MAX2(currentWidth / 2, 1);
-	}
-	return Result;
-}
-
-
-/**
-* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel
-*/
-static std::vector<MipmapLevelInfo>
-write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
-{
-	std::vector<MipmapLevelInfo> Result;
-	size_t offsetInDst = 0, offsetInSrc = 0;
-	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
-	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
-	{
-		size_t rowPitch = align(currentWidth * blockSize, 256);
-
-		MipmapLevelInfo currentMipmapLevelInfo = {};
-		currentMipmapLevelInfo.offset = offsetInDst;
-		currentMipmapLevelInfo.height = currentHeight;
-		currentMipmapLevelInfo.width = currentWidth;
-		currentMipmapLevelInfo.rowPitch = rowPitch;
-		Result.push_back(currentMipmapLevelInfo);
-
-		u16 *castedSrc, *castedDst;
-		u16 log2width, log2height;
-
-		castedSrc = (u16*)src + offsetInSrc;
-		castedDst = (u16*)dst + offsetInDst;
-
-		log2width = (u32)(logf((float)currentWidth) / logf(2.f));
-		log2height = (u32)(logf((float)currentHeight) / logf(2.f));
-
-		for (int row = 0; row < currentHeight; row++)
-			for (int j = 0; j < currentWidth; j++)
-				castedDst[(row * rowPitch / 2) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
-
-		offsetInDst += currentHeight * rowPitch;
-		offsetInSrc += currentHeight * widthInBlock * blockSize;
-		currentHeight = MAX2(currentHeight / 2, 1);
-		currentWidth = MAX2(currentWidth / 2, 1);
-	}
-	return Result;
-}
-
-/**
-* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
-*/
-static std::vector<MipmapLevelInfo>
-write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
-{
-	std::vector<MipmapLevelInfo> Result;
-	size_t offsetInDst = 0, offsetInSrc = 0;
-	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
-	size_t srcPitch = widthInBlock * blockSize;
-	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
-	{
-		size_t rowPitch = align(currentWidth * blockSize, 256);
-
-		MipmapLevelInfo currentMipmapLevelInfo = {};
-		currentMipmapLevelInfo.offset = offsetInDst;
-		currentMipmapLevelInfo.height = currentHeight;
-		currentMipmapLevelInfo.width = currentWidth;
-		currentMipmapLevelInfo.rowPitch = rowPitch;
-		Result.push_back(currentMipmapLevelInfo);
-
-		unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
-
-		for (unsigned row = 0; row < heightInBlock; row++)
-			for (int j = 0; j < currentWidth; j++)
-			{
-				u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
-				castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
-			}
-
-		offsetInDst += currentHeight * rowPitch;
-		offsetInSrc += currentHeight * widthInBlock * blockSize;
-		currentHeight = MAX2(currentHeight / 2, 1);
-		currentWidth = MAX2(currentWidth / 2, 1);
-	}
-	return Result;
-}
-
-/**
-* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
-*/
-static std::vector<MipmapLevelInfo>
-write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
-{
-	std::vector<MipmapLevelInfo> Result;
-	size_t offsetInDst = 0, offsetInSrc = 0;
-	size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
-	size_t srcPitch = widthInBlock * blockSize;
-	for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
-	{
-		size_t rowPitch = align(currentWidth * blockSize, 256);
-
-		MipmapLevelInfo currentMipmapLevelInfo = {};
-		currentMipmapLevelInfo.offset = offsetInDst;
-		currentMipmapLevelInfo.height = currentHeight;
-		currentMipmapLevelInfo.width = currentWidth;
-		currentMipmapLevelInfo.rowPitch = rowPitch;
-		Result.push_back(currentMipmapLevelInfo);
-
-		unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
-
-		for (unsigned row = 0; row < heightInBlock; row++)
-			for (int j = 0; j < currentWidth * 4; j++)
-			{
-				u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
-				castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
-			}
-
-		offsetInDst += currentHeight * rowPitch;
-		offsetInSrc += currentHeight * widthInBlock * blockSize;
-		currentHeight = MAX2(currentHeight / 2, 1);
-		currentWidth = MAX2(currentWidth / 2, 1);
-	}
-	return Result;
-}

 /**
 * Create a texture residing in default heap and generate uploads commands in commandList,
@ -394,221 +130,23 @@ ID3D12Resource *uploadSingleTexture(
 	const RSXTexture &texture,
 	ID3D12Device *device,
 	ID3D12GraphicsCommandList *commandList,
-	DataHeap<ID3D12Heap, 65536> &textureBuffersHeap,
-	std::vector<ComPtr<ID3D12Resource> > &stagingRamTexture)
+	DataHeap<ID3D12Resource, 65536> &textureBuffersHeap)
 {
 	ID3D12Resource *vramTexture;
 	size_t w = texture.GetWidth(), h = texture.GetHeight();

-	size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
 	int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
 	DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format);

-	const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation());
-
-	bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN);
-	size_t srcPitch;
-	switch (format)
-	{
-	case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
-	case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
-	case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
-	case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
-	default:
-		LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
-		break;
-	case CELL_GCM_TEXTURE_B8:
-		blockSizeInByte = 1;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w;
-		break;
-	case CELL_GCM_TEXTURE_A1R5G5B5:
-		blockSizeInByte = 2;
-		blockHeightInPixel = 1, blockWidthInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_A4R4G4B4:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_R5G6B5:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_A8R8G8B8:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
-		blockSizeInByte = 8;
-		blockWidthInPixel = 4, blockHeightInPixel = 4;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
-		blockSizeInByte = 16;
-		blockWidthInPixel = 4, blockHeightInPixel = 4;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
-		blockSizeInByte = 16;
-		blockWidthInPixel = 4, blockHeightInPixel = 4;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_G8B8:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_R6G5B5:
-		// Not native
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_DEPTH24_D8:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_DEPTH16:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_X16:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_Y16_X16:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_R5G5B5A1:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
-		blockSizeInByte = 8;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 8;
-		break;
-	case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
-		blockSizeInByte = 16;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 16;
-		break;
-	case CELL_GCM_TEXTURE_X32_FLOAT:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_D1R5G5B5:
-		blockSizeInByte = 2;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 2;
-		break;
-	case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_D8R8G8B8:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 1, blockHeightInPixel = 1;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 2, blockHeightInPixel = 2;
-		srcPitch = w * 4;
-		break;
-	case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
-		blockSizeInByte = 4;
-		blockWidthInPixel = 2, blockHeightInPixel = 2;
-		srcPitch = w * 4;
-		break;
-	}
-
-	size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
-	size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
-	// Multiple of 256
-	size_t rowPitch = align(blockSizeInByte * widthInBlocks, 256);
-
-	ComPtr<ID3D12Resource> Texture;
-	size_t textureSize = rowPitch * heightInBlocks * 2; // * 4 for mipmap levels
+	size_t textureSize = getPlacedTextureStorageSpace(texture, 256);
 	assert(textureBuffersHeap.canAlloc(textureSize));
 	size_t heapOffset = textureBuffersHeap.alloc(textureSize);

-	ThrowIfFailed(device->CreatePlacedResource(
-		textureBuffersHeap.m_heap,
-		heapOffset,
-		&CD3DX12_RESOURCE_DESC::Buffer(textureSize),
-		D3D12_RESOURCE_STATE_GENERIC_READ,
-		nullptr,
-		IID_PPV_ARGS(Texture.GetAddressOf())
-		));
-	stagingRamTexture.push_back(Texture);
-
-	auto pixels = vm::get_ptr<const u8>(texaddr);
-	void *textureData;
-	ThrowIfFailed(Texture->Map(0, nullptr, (void**)&textureData));
-	std::vector<MipmapLevelInfo> mipInfos;
-
-	switch (format)
-	{
-	case CELL_GCM_TEXTURE_A8R8G8B8:
-	{
-		if (is_swizzled)
-			mipInfos = writeTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
-		else
-			mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
-		break;
-	}
-	case CELL_GCM_TEXTURE_A1R5G5B5:
-	case CELL_GCM_TEXTURE_A4R4G4B4:
-	case CELL_GCM_TEXTURE_R5G6B5:
-	{
-		if (is_swizzled)
-			mipInfos = write16bTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
-		else
-			mipInfos = write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
-		break;
-	}
-	case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
-	{
-		mipInfos = write16bX4TexelsGeneric((char*)pixels, (char*)textureData, w, h, 8, texture.GetMipmap());
-		break;
-	}
-	case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
-	case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
-	case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
-	{
-		mipInfos = writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.GetMipmap());
-		break;
-	}
-	default:
-	{
-		mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.GetMipmap());
-		break;
-	}
-	}
-	Texture->Unmap(0, nullptr);
+	void *buffer;
+	ThrowIfFailed(textureBuffersHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer));
+	void *textureData = (char*)buffer + heapOffset;
+	std::vector<MipmapLevelInfo> mipInfos = uploadPlacedTexture(texture, 256, textureData);
+	textureBuffersHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize));

 	D3D12_RESOURCE_DESC texturedesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)w, (UINT)h, 1, texture.GetMipmap());
 	textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes;
@ -626,7 +164,7 @@ ID3D12Resource *uploadSingleTexture(
 	for (const MipmapLevelInfo mli : mipInfos)
 	{
 		commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(vramTexture, (UINT)miplevel), 0, 0, 0,
-			&CD3DX12_TEXTURE_COPY_LOCATION(Texture.Get(), { mli.offset, { dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
+			&CD3DX12_TEXTURE_COPY_LOCATION(textureBuffersHeap.m_heap, { heapOffset + mli.offset, { dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
 		miplevel++;
 	}

@ -738,7 +276,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
 		}
 		else
 		{
-			vramTexture = uploadSingleTexture(m_textures[i], m_device.Get(), cmdlist, m_textureUploadData, getCurrentResourceStorage().m_singleFrameLifetimeResources);
+			vramTexture = uploadSingleTexture(m_textures[i], m_device.Get(), cmdlist, m_textureUploadData);
 			m_texturesCache[texaddr] = vramTexture;

 			u32 s = (u32)align(getTextureSize(m_textures[i]), 4096);
@ -881,7 +419,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
 		}
 		m_device->CreateSampler(&getSamplerDesc(m_textures[i]),
 			CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart())
-			.Offset((UINT)getCurrentResourceStorage().m_currentSamplerIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV));
+			.Offset((UINT)getCurrentResourceStorage().m_currentSamplerIndex + (UINT)usedTexture, g_descriptorStrideSamplers));

 		usedTexture++;
 	}
--- a/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Utils.cpp
@ -36,28 +36,20 @@ std::pair<ID3DBlob *, ID3DBlob *> compileF32toU8CS()
 		const char *tmp = (const char*)errorBlob->GetBufferPointer();
 		LOG_ERROR(RSX, tmp);
 	}
-	D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {};
-	// Textures
-	descriptorRange[0].BaseShaderRegister = 0;
-	descriptorRange[0].NumDescriptors = 1;
-	descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
-	descriptorRange[1].BaseShaderRegister = 0;
-	descriptorRange[1].NumDescriptors = 1;
-	descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
-	descriptorRange[1].OffsetInDescriptorsFromTableStart = 1;
-	D3D12_ROOT_PARAMETER RP[2] = {};
-	RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
-	RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
-	RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0];
-	RP[0].DescriptorTable.NumDescriptorRanges = 2;
+	CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
+	{
+		// Textures
+		CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0),
+		// UAV (same descriptor heap)
+		CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 1),
+	};

-	D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {};
-	rootSignatureDesc.NumParameters = 1;
-	rootSignatureDesc.pParameters = RP;
+	CD3DX12_ROOT_PARAMETER RP;
+	RP.InitAsDescriptorTable(2, &descriptorRange[0]);

 	ID3DBlob *rootSignatureBlob;

-	hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob);
+	hr = wrapD3D12SerializeRootSignature(&CD3DX12_ROOT_SIGNATURE_DESC(1, &RP), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob);
 	if (hr != S_OK)
 	{
 		const char *tmp = (const char*)errorBlob->GetBufferPointer();
--- a/rpcs3/Emu/RSX/GCM.h
+++ b/rpcs3/Emu/RSX/GCM.h
@ -260,6 +260,21 @@ enum

 };

+// GCM Primitive
+enum
+{
+	CELL_GCM_PRIMITIVE_POINTS                  = 1,
+	CELL_GCM_PRIMITIVE_LINES                   = 2,
+	CELL_GCM_PRIMITIVE_LINE_LOOP               = 3,
+	CELL_GCM_PRIMITIVE_LINE_STRIP              = 4,
+	CELL_GCM_PRIMITIVE_TRIANGLES               = 5,
+	CELL_GCM_PRIMITIVE_TRIANGLE_STRIP          = 6,
+	CELL_GCM_PRIMITIVE_TRIANGLE_FAN            = 7,
+	CELL_GCM_PRIMITIVE_QUADS                   = 8,
+	CELL_GCM_PRIMITIVE_QUAD_STRIP              = 9,
+	CELL_GCM_PRIMITIVE_POLYGON                 = 10,
+};
+
 // GCM Reports
 enum
 {
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@ -6,6 +6,7 @@
 #include "Emu/Memory/Memory.h"
 #include "Emu/System.h"
 #include "GLGSRender.h"
+#include "../Common/TextureUtils.h"

 #define CMD_DEBUG 0
 #define DUMP_VERTEX_DATA 0
@ -2152,34 +2153,4 @@ void GLGSRender::semaphorePGRAPHBackendRelease(u32 offset, u32 value)
 void GLGSRender::semaphorePFIFOAcquire(u32 offset, u32 value)
 {

-}
-
-u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth)
-{
-	u32 offset = 0;
-	u32 shift_count = 0;
-	while (log2_width | log2_height | log2_depth){
-		if (log2_width)
-		{
-			offset |= (x & 0x01) << shift_count;
-			x >>= 1;
-			++shift_count;
-			--log2_width;
-		}
-		if (log2_height)
-		{
-			offset |= (y & 0x01) << shift_count;
-			y >>= 1;
-			++shift_count;
-			--log2_height;
-		}
-		if (log2_depth)
-		{
-			offset |= (z & 0x01) << shift_count;
-			z >>= 1;
-			++shift_count;
-			--log2_depth;
-		}
-	}
-	return offset;
-}
+}
--- a/rpcs3/Emu/RSX/GL/GLGSRender.h
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.h
@ -18,7 +18,6 @@
 extern GLenum g_last_gl_error;
 void printGlError(GLenum err, const char* situation);
 void printGlError(GLenum err, const std::string& situation);
-u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth);


 class GLTexture
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@ -60,8 +60,10 @@
    <ClCompile Include="Emu\IdManager.cpp" />
    <ClCompile Include="Emu\RSX\CgBinaryFragmentProgram.cpp" />
    <ClCompile Include="Emu\RSX\CgBinaryVertexProgram.cpp" />
+    <ClCompile Include="Emu\RSX\Common\BufferUtils.cpp" />
    <ClCompile Include="Emu\RSX\Common\FragmentProgramDecompiler.cpp" />
    <ClCompile Include="Emu\RSX\Common\ShaderParam.cpp" />
+    <ClCompile Include="Emu\RSX\Common\TextureUtils.cpp" />
    <ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" />
    <ClCompile Include="Emu\RSX\D3D12\D3D12Buffer.cpp" />
    <ClCompile Include="Emu\RSX\D3D12\D3D12FragmentProgramDecompiler.cpp" />
@ -529,9 +531,11 @@
    <ClInclude Include="Emu\Memory\Memory.h" />
    <ClInclude Include="Emu\Memory\MemoryBlock.h" />
    <ClInclude Include="Emu\RSX\CgBinaryProgram.h" />
+    <ClInclude Include="Emu\RSX\Common\BufferUtils.h" />
    <ClInclude Include="Emu\RSX\Common\FragmentProgramDecompiler.h" />
    <ClInclude Include="Emu\RSX\Common\ProgramStateCache.h" />
    <ClInclude Include="Emu\RSX\Common\ShaderParam.h" />
+    <ClInclude Include="Emu\RSX\Common\TextureUtils.h" />
    <ClInclude Include="Emu\RSX\Common\VertexProgramDecompiler.h" />
    <ClInclude Include="Emu\RSX\D3D12\D3D12.h" />
    <ClInclude Include="Emu\RSX\D3D12\D3D12Buffer.h" />
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@ -989,6 +989,12 @@
    <ClCompile Include="..\Utilities\SharedMutex.cpp">
      <Filter>Utilities</Filter>
    </ClCompile>
+    <ClCompile Include="Emu\RSX\Common\TextureUtils.cpp">
+      <Filter>Emu\GPU\RSX\Common</Filter>
+    </ClCompile>
+    <ClCompile Include="Emu\RSX\Common\BufferUtils.cpp">
+      <Filter>Emu\GPU\RSX\Common</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="Crypto\aes.h">
@ -1882,5 +1888,11 @@
    <ClInclude Include="Emu\RSX\D3D12\d3dx12.h">
      <Filter>Emu\GPU\RSX\D3D12</Filter>
    </ClInclude>
+    <ClInclude Include="Emu\RSX\Common\TextureUtils.h">
+      <Filter>Emu\GPU\RSX\Common</Filter>
+    </ClInclude>
+    <ClInclude Include="Emu\RSX\Common\BufferUtils.h">
+      <Filter>Emu\GPU\RSX\Common</Filter>
+    </ClInclude>
  </ItemGroup>
 </Project>