1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2025-03-14 01:27:00 +00:00

Merge pull request from vlj/d3d12

D3d12: Various factorisations and fixes
This commit is contained in:
Raul Tambre 2015-10-03 20:04:53 +03:00
commit 55ca625371
15 changed files with 999 additions and 867 deletions

@ -0,0 +1,242 @@
#include "stdafx.h"
#include "BufferUtils.h"
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
inline
bool overlaps(const std::pair<size_t, size_t> &range1, const std::pair<size_t, size_t> &range2)
{
return !(range1.second < range2.first || range2.second < range1.first);
}
std::vector<VertexBufferFormat> FormatVertexData(const RSXVertexData *m_vertex_data, size_t *vertex_data_size, size_t base_offset)
{
std::vector<VertexBufferFormat> Result;
for (size_t i = 0; i < 32; ++i)
{
const RSXVertexData &vertexData = m_vertex_data[i];
if (!vertexData.IsEnabled()) continue;
size_t elementCount = vertex_data_size[i] / (vertexData.size * vertexData.GetTypeSize());
// If there is a single element, stride is 0, use the size of element instead
size_t stride = vertexData.stride;
size_t elementSize = vertexData.GetTypeSize();
std::pair<size_t, size_t> range = std::make_pair(vertexData.addr + base_offset, vertexData.addr + base_offset + elementSize * vertexData.size + (elementCount - 1) * stride - 1);
bool isMerged = false;
for (VertexBufferFormat &vbf : Result)
{
if (overlaps(vbf.range, range) && vbf.stride == stride)
{
// Extend buffer if necessary
vbf.range.first = MIN2(vbf.range.first, range.first);
vbf.range.second = MAX2(vbf.range.second, range.second);
vbf.elementCount = MAX2(vbf.elementCount, elementCount);
vbf.attributeId.push_back(i);
isMerged = true;
break;
}
}
if (isMerged)
continue;
VertexBufferFormat newRange = { range, std::vector<size_t>{ i }, elementCount, stride };
Result.emplace_back(newRange);
}
return Result;
}
void uploadVertexData(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, size_t baseOffset, void* bufferMap)
{
for (int vertex = 0; vertex < vbf.elementCount; vertex++)
{
for (size_t attributeId : vbf.attributeId)
{
if (!vertexData[attributeId].addr)
{
memcpy(bufferMap, vertexData[attributeId].data.data(), vertexData[attributeId].data.size());
continue;
}
size_t offset = (size_t)vertexData[attributeId].addr + baseOffset - vbf.range.first;
size_t tsize = vertexData[attributeId].GetTypeSize();
size_t size = vertexData[attributeId].size;
auto src = vm::get_ptr<const u8>(vertexData[attributeId].addr + (u32)baseOffset + (u32)vbf.stride * vertex);
char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
switch (tsize)
{
case 1:
{
memcpy(dst, src, size);
break;
}
case 2:
{
const u16* c_src = (const u16*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
break;
}
case 4:
{
const u32* c_src = (const u32*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
break;
}
}
}
}
}
template<typename IndexType, typename DstType, typename SrcType>
void expandIndexedTriangleFan(DstType *dst, const SrcType *src, size_t indexCount)
{
IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
for (unsigned i = 0; i < indexCount - 2; i++)
{
typedDst[3 * i] = typedSrc[0];
typedDst[3 * i + 1] = typedSrc[i + 2 - 1];
typedDst[3 * i + 2] = typedSrc[i + 2];
}
}
template<typename IndexType, typename DstType, typename SrcType>
void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount)
{
IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
for (unsigned i = 0; i < indexCount / 4; i++)
{
// First triangle
typedDst[6 * i] = typedSrc[4 * i];
typedDst[6 * i + 1] = typedSrc[4 * i + 1];
typedDst[6 * i + 2] = typedSrc[4 * i + 2];
// Second triangle
typedDst[6 * i + 3] = typedSrc[4 * i + 2];
typedDst[6 * i + 4] = typedSrc[4 * i + 3];
typedDst[6 * i + 5] = typedSrc[4 * i];
}
}
// Only handle quads and triangle fan now
bool isNativePrimitiveMode(unsigned m_draw_mode)
{
switch (m_draw_mode)
{
default:
case CELL_GCM_PRIMITIVE_POINTS:
case CELL_GCM_PRIMITIVE_LINES:
case CELL_GCM_PRIMITIVE_LINE_LOOP:
case CELL_GCM_PRIMITIVE_LINE_STRIP:
case CELL_GCM_PRIMITIVE_TRIANGLES:
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON:
return true;
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
case CELL_GCM_PRIMITIVE_QUADS:
return false;
}
}
size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count)
{
// Index count
if (isNativePrimitiveMode(m_draw_mode))
return initial_index_count;
switch (m_draw_mode)
{
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
return (initial_index_count - 2) * 3;
case CELL_GCM_PRIMITIVE_QUADS:
return (6 * initial_index_count) / 4;
default:
return 0;
}
}
void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count)
{
if (indexBuffer != nullptr)
{
switch (m_draw_mode)
{
case CELL_GCM_PRIMITIVE_POINTS:
case CELL_GCM_PRIMITIVE_LINES:
case CELL_GCM_PRIMITIVE_LINE_LOOP:
case CELL_GCM_PRIMITIVE_LINE_STRIP:
case CELL_GCM_PRIMITIVE_TRIANGLES:
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON:
{
size_t indexSize = (index_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32) ? 4 : 2;
memcpy(bufferMap, indexBuffer, indexSize * element_count);
return;
}
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
switch (index_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedTriangleFan<unsigned int>(bufferMap, indexBuffer, element_count);
return;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedTriangleFan<unsigned short>(bufferMap, indexBuffer, element_count);
return;
default:
abort();
return;
}
case CELL_GCM_PRIMITIVE_QUADS:
switch (index_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedQuads<unsigned int>(bufferMap, indexBuffer, element_count);
return;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedQuads<unsigned short>(bufferMap, indexBuffer, element_count);
return;
default:
abort();
return;
}
}
}
else
{
unsigned short *typedDst = static_cast<unsigned short *>(bufferMap);
switch (m_draw_mode)
{
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
for (unsigned i = 0; i < (element_count - 2); i++)
{
typedDst[3 * i] = 0;
typedDst[3 * i + 1] = i + 2 - 1;
typedDst[3 * i + 2] = i + 2;
}
return;
case CELL_GCM_PRIMITIVE_QUADS:
for (unsigned i = 0; i < element_count / 4; i++)
{
// First triangle
typedDst[6 * i] = 4 * i;
typedDst[6 * i + 1] = 4 * i + 1;
typedDst[6 * i + 2] = 4 * i + 2;
// Second triangle
typedDst[6 * i + 3] = 4 * i + 2;
typedDst[6 * i + 4] = 4 * i + 3;
typedDst[6 * i + 5] = 4 * i;
}
return;
}
}
}

@ -0,0 +1,40 @@
#pragma once
#include <vector>
#include "Emu/Memory/vm.h"
#include "../RSXThread.h"
struct VertexBufferFormat
{
std::pair<size_t, size_t> range;
std::vector<size_t> attributeId;
size_t elementCount;
size_t stride;
};
/*
* Detect buffer containing interleaved vertex attribute.
* This minimizes memory upload size.
*/
std::vector<VertexBufferFormat> FormatVertexData(const RSXVertexData *m_vertex_data, size_t *vertex_data_size, size_t base_offset);
/*
* Write vertex attributes to bufferMap, swapping data as required.
*/
void uploadVertexData(const VertexBufferFormat &vbf, const RSXVertexData *vertexData, size_t baseOffset, void* bufferMap);
/*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
*/
bool isNativePrimitiveMode(unsigned m_draw_mode);
/*
* Returns a fixed index count for emulated primitive, otherwise returns initial_index_count
*/
size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count);
/*
* Write index information to bufferMap
*/
void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count);

@ -0,0 +1,544 @@
#include "stdafx.h"
#include "Emu/Memory/vm.h"
#include "TextureUtils.h"
#include "../RSXThread.h"
#include "Utilities/Log.h"
#define MAX2(a, b) ((a) > (b)) ? (a) : (b)
unsigned LinearToSwizzleAddress(unsigned x, unsigned y, unsigned z, unsigned log2_width, unsigned log2_height, unsigned log2_depth)
{
unsigned offset = 0;
unsigned shift_count = 0;
while (log2_width | log2_height | log2_depth) {
if (log2_width)
{
offset |= (x & 0x01) << shift_count;
x >>= 1;
++shift_count;
--log2_width;
}
if (log2_height)
{
offset |= (y & 0x01) << shift_count;
y >>= 1;
++shift_count;
--log2_height;
}
if (log2_depth)
{
offset |= (z & 0x01) << shift_count;
z >>= 1;
++shift_count;
--log2_depth;
}
}
return offset;
}
/**
* Write data, assume src pixels are packed but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
for (unsigned row = 0; row < currentHeight; row++)
memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * widthInBlock * blockSize, currentWidth * blockSize);
offsetInDst += currentHeight * rowPitch;
offsetInDst = align(offsetInDst, 512);
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write data, assume src pixels are swizzled and but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
u32 *castedSrc, *castedDst;
u32 log2width, log2height;
castedSrc = (u32*)src + offsetInSrc;
castedDst = (u32*)dst + offsetInDst;
log2width = (u32)(logf((float)currentWidth) / logf(2.f));
log2height = (u32)(logf((float)currentHeight) / logf(2.f));
for (int row = 0; row < currentHeight; row++)
for (int j = 0; j < currentWidth; j++)
castedDst[(row * rowPitch / 4) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write data, assume compressed (DXTCn) format
*/
inline std::vector<MipmapLevelInfo>
writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight * blockHeight;
currentMipmapLevelInfo.width = currentWidth * blockWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
for (unsigned row = 0; row < currentHeight; row++)
memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * currentWidth * blockSize, currentWidth * blockSize);
offsetInDst += currentHeight * rowPitch;
offsetInDst = align(offsetInDst, 512);
offsetInSrc += currentHeight * currentWidth * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
u16 *castedSrc, *castedDst;
u16 log2width, log2height;
castedSrc = (u16*)src + offsetInSrc;
castedDst = (u16*)dst + offsetInDst;
log2width = (u32)(logf((float)currentWidth) / logf(2.f));
log2height = (u32)(logf((float)currentHeight) / logf(2.f));
for (int row = 0; row < currentHeight; row++)
for (int j = 0; j < currentWidth; j++)
castedDst[(row * rowPitch / 2) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
size_t srcPitch = widthInBlock * blockSize;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
for (unsigned row = 0; row < heightInBlock; row++)
for (int j = 0; j < currentWidth; j++)
{
u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
}
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
*/
inline std::vector<MipmapLevelInfo>
write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
size_t srcPitch = widthInBlock * blockSize;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
for (unsigned row = 0; row < heightInBlock; row++)
for (int j = 0; j < currentWidth * 4; j++)
{
u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
}
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
size_t getPlacedTextureStorageSpace(const RSXTexture &texture, size_t rowPitchAlignement)
{
size_t w = texture.GetWidth(), h = texture.GetHeight();
size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
break;
case CELL_GCM_TEXTURE_B8:
blockSizeInByte = 1;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A1R5G5B5:
blockSizeInByte = 2;
blockHeightInPixel = 1, blockWidthInPixel = 1;
break;
case CELL_GCM_TEXTURE_A4R4G4B4:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G6B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
blockSizeInByte = 8;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_G8B8:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G5B5A1:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
blockSizeInByte = 8;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
blockSizeInByte = 16;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X32_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D1R5G5B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
}
size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
size_t rowPitch = align(blockSizeInByte * widthInBlocks, rowPitchAlignement);
return rowPitch * heightInBlocks * 2; // * 2 for mipmap levels
}
std::vector<MipmapLevelInfo> uploadPlacedTexture(const RSXTexture &texture, size_t rowPitchAlignement, void* textureData)
{
size_t w = texture.GetWidth(), h = texture.GetHeight();
int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
break;
case CELL_GCM_TEXTURE_B8:
blockSizeInByte = 1;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A1R5G5B5:
blockSizeInByte = 2;
blockHeightInPixel = 1, blockWidthInPixel = 1;
break;
case CELL_GCM_TEXTURE_A4R4G4B4:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G6B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_A8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
blockSizeInByte = 8;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
break;
case CELL_GCM_TEXTURE_G8B8:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_R5G5B5A1:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
blockSizeInByte = 8;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
blockSizeInByte = 16;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_X32_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D1R5G5B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_D8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
break;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
break;
}
size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
std::vector<MipmapLevelInfo> mipInfos;
const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation());
auto pixels = vm::get_ptr<const u8>(texaddr);
bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN);
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
if (is_swizzled)
return writeTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
else
return writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
if (is_swizzled)
return write16bTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
else
return write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return write16bX4TexelsGeneric((char*)pixels, (char*)textureData, w, h, 8, texture.GetMipmap());
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.GetMipmap());
default:
return writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.GetMipmap());
}
}

@ -0,0 +1,26 @@
#pragma once
#include "../RSXTexture.h"
#include <vector>
struct MipmapLevelInfo
{
size_t offset;
size_t width;
size_t height;
size_t rowPitch;
};
unsigned LinearToSwizzleAddress(unsigned x, unsigned y, unsigned z, unsigned log2_width, unsigned log2_height, unsigned log2_depth);
/**
* Get size to store texture in a linear fashion.
* Storage is assumed to use a rowPitchAlignement boundary for every row of texture.
*/
size_t getPlacedTextureStorageSpace(const RSXTexture &texture, size_t rowPitchAlignement);
/**
* Write texture data to textureData.
* Data are not packed, they are stored per rows using rowPitchAlignement.
* Similarly, offset for every mipmaplevel is aligned to rowPitchAlignement boundary.
*/
std::vector<MipmapLevelInfo> uploadPlacedTexture(const RSXTexture &texture, size_t rowPitchAlignement, void* textureData);

@ -5,6 +5,7 @@
#include "D3D12GSRender.h" #include "D3D12GSRender.h"
#include "d3dx12.h" #include "d3dx12.h"
#include "../Common/BufferUtils.h"
const int g_vertexCount = 32; const int g_vertexCount = 32;
@ -90,14 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size)
} }
} }
struct VertexBufferFormat
{
std::pair<size_t, size_t> range;
std::vector<size_t> attributeId;
size_t elementCount;
size_t stride;
};
static static
std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(ID3D12Device *device, const std::vector<VertexBufferFormat> &vertexBufferFormat, const RSXVertexData *m_vertex_data, size_t baseOffset) std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(ID3D12Device *device, const std::vector<VertexBufferFormat> &vertexBufferFormat, const RSXVertexData *m_vertex_data, size_t baseOffset)
{ {
@ -122,89 +115,9 @@ std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(ID3D12Device *device, const st
return result; return result;
} }
template<typename IndexType, typename DstType, typename SrcType>
void expandIndexedTriangleFan(DstType *dst, const SrcType *src, size_t indexCount)
{
IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
for (unsigned i = 0; i < indexCount - 2; i++)
{
typedDst[3 * i] = typedSrc[0];
typedDst[3 * i + 1] = typedSrc[i + 2 - 1];
typedDst[3 * i + 2] = typedSrc[i + 2];
}
}
template<typename IndexType, typename DstType, typename SrcType>
void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount)
{
IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
for (unsigned i = 0; i < indexCount / 4; i++)
{
// First triangle
typedDst[6 * i] = typedSrc[4 * i];
typedDst[6 * i + 1] = typedSrc[4 * i + 1];
typedDst[6 * i + 2] = typedSrc[4 * i + 2];
// Second triangle
typedDst[6 * i + 3] = typedSrc[4 * i + 2];
typedDst[6 * i + 4] = typedSrc[4 * i + 3];
typedDst[6 * i + 5] = typedSrc[4 * i];
}
}
// D3D12GS member handling buffers // D3D12GS member handling buffers
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
static
bool overlaps(const std::pair<size_t, size_t> &range1, const std::pair<size_t, size_t> &range2)
{
return !(range1.second < range2.first || range2.second < range1.first);
}
static
std::vector<VertexBufferFormat> FormatVertexData(const RSXVertexData *m_vertex_data, size_t base_offset)
{
std::vector<VertexBufferFormat> Result;
for (size_t i = 0; i < 32; ++i)
{
const RSXVertexData &vertexData = m_vertex_data[i];
if (!vertexData.IsEnabled()) continue;
size_t elementCount = vertexData.data.size() / (vertexData.size * vertexData.GetTypeSize());
// If there is a single element, stride is 0, use the size of element instead
size_t stride = vertexData.stride;
size_t elementSize = vertexData.GetTypeSize();
std::pair<size_t, size_t> range = std::make_pair(vertexData.addr + base_offset, vertexData.addr + base_offset + elementSize * vertexData.size + (elementCount - 1) * stride - 1);
bool isMerged = false;
for (VertexBufferFormat &vbf : Result)
{
if (overlaps(vbf.range, range) && vbf.stride == stride)
{
// Extend buffer if necessary
vbf.range.first = MIN2(vbf.range.first, range.first);
vbf.range.second = MAX2(vbf.range.second, range.second);
vbf.elementCount = MAX2(vbf.elementCount, elementCount);
vbf.attributeId.push_back(i);
isMerged = true;
break;
}
}
if (isMerged)
continue;
VertexBufferFormat newRange = { range, std::vector<size_t>{ i }, elementCount, stride };
Result.emplace_back(newRange);
}
return Result;
}
/** /**
* Suballocate a new vertex buffer with attributes from vbf using vertexIndexHeap as storage heap. * Suballocate a new vertex buffer with attributes from vbf using vertexIndexHeap as storage heap.
*/ */
@ -221,67 +134,15 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const VertexBufferFormat &vbf, cons
void *buffer; void *buffer;
ThrowIfFailed(vertexIndexHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); ThrowIfFailed(vertexIndexHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset; void *bufferMap = (char*)buffer + heapOffset;
for (int vertex = 0; vertex < vbf.elementCount; vertex++) uploadVertexData(vbf, vertexData, baseOffset, bufferMap);
{
for (size_t attributeId : vbf.attributeId)
{
if (!vertexData[attributeId].addr)
{
memcpy(bufferMap, vertexData[attributeId].data.data(), vertexData[attributeId].data.size());
continue;
}
size_t offset = (size_t)vertexData[attributeId].addr + baseOffset - vbf.range.first;
size_t tsize = vertexData[attributeId].GetTypeSize();
size_t size = vertexData[attributeId].size;
auto src = vm::get_ptr<const u8>(vertexData[attributeId].addr + (u32)baseOffset + (u32)vbf.stride * vertex);
char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
switch (tsize)
{
case 1:
{
memcpy(dst, src, size);
break;
}
case 2:
{
const u16* c_src = (const u16*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
break;
}
case 4:
{
const u32* c_src = (const u32*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
break;
}
}
}
}
vertexIndexHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); vertexIndexHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset; return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
} }
static bool
isContained(const std::vector<std::pair<u32, u32> > &ranges, const std::pair<u32, u32> &range)
{
for (auto &r : ranges)
{
if (r == range)
return true;
}
return false;
}
std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw) std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw)
{ {
std::vector<D3D12_VERTEX_BUFFER_VIEW> result; std::vector<D3D12_VERTEX_BUFFER_VIEW> result;
const std::vector<VertexBufferFormat> &vertexBufferFormat = FormatVertexData(m_vertex_data, m_vertex_data_base_offset); const std::vector<VertexBufferFormat> &vertexBufferFormat = FormatVertexData(m_vertex_data, m_vertexBufferSize, m_vertex_data_base_offset);
m_IASet = getIALayout(m_device.Get(), vertexBufferFormat, m_vertex_data, m_vertex_data_base_offset); m_IASet = getIALayout(m_device.Get(), vertexBufferFormat, m_vertex_data, m_vertex_data_base_offset);
const u32 data_offset = indexed_draw ? 0 : m_draw_array_first; const u32 data_offset = indexed_draw ? 0 : m_draw_array_first;
@ -310,29 +171,9 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool in
D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw) D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
{ {
D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; D3D12_INDEX_BUFFER_VIEW indexBufferView = {};
// Only handle quads and triangle fan now
bool forcedIndexBuffer = false;
switch (m_draw_mode - 1)
{
default:
case GL_POINTS:
case GL_LINES:
case GL_LINE_LOOP:
case GL_LINE_STRIP:
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_QUAD_STRIP:
case GL_POLYGON:
forcedIndexBuffer = false;
break;
case GL_TRIANGLE_FAN:
case GL_QUADS:
forcedIndexBuffer = true;
break;
}
// No need for index buffer // No need for index buffer
if (!indexed_draw && !forcedIndexBuffer) if (!indexed_draw && isNativePrimitiveMode(m_draw_mode))
{ {
m_renderingInfo.m_indexed = false; m_renderingInfo.m_indexed = false;
m_renderingInfo.m_count = m_draw_array_count; m_renderingInfo.m_count = m_draw_array_count;
@ -366,35 +207,10 @@ D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
} }
// Index count // Index count
if (indexed_draw && !forcedIndexBuffer) m_renderingInfo.m_count = getIndexCount(m_draw_mode, indexed_draw ? (u32)(m_indexed_array.m_data.size() / indexSize) : m_draw_array_count);
m_renderingInfo.m_count = m_indexed_array.m_data.size() / indexSize;
else if (indexed_draw && forcedIndexBuffer)
{
switch (m_draw_mode - 1)
{
case GL_TRIANGLE_FAN:
m_renderingInfo.m_count = (m_indexed_array.m_data.size() - 2) * 3;
break;
case GL_QUADS:
m_renderingInfo.m_count = 6 * m_indexed_array.m_data.size() / (4 * indexSize);
break;
}
}
else
{
switch (m_draw_mode - 1)
{
case GL_TRIANGLE_FAN:
m_renderingInfo.m_count = (m_draw_array_count - 2) * 3;
break;
case GL_QUADS:
m_renderingInfo.m_count = m_draw_array_count * 6 / 4;
break;
}
}
// Base vertex // Base vertex
if (!indexed_draw && forcedIndexBuffer) if (!indexed_draw && isNativePrimitiveMode(m_draw_mode))
m_renderingInfo.m_baseVertex = m_draw_array_first; m_renderingInfo.m_baseVertex = m_draw_array_first;
else else
m_renderingInfo.m_baseVertex = 0; m_renderingInfo.m_baseVertex = 0;
@ -408,70 +224,9 @@ D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
void *buffer; void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset; void *bufferMap = (char*)buffer + heapOffset;
if (indexed_draw && !forcedIndexBuffer) uploadIndexData(m_draw_mode, m_indexed_array.m_type, indexed_draw ? m_indexed_array.m_data.data() : nullptr, bufferMap, indexed_draw ? (u32)(m_indexed_array.m_data.size() / indexSize) : m_draw_array_count);
streamBuffer(bufferMap, m_indexed_array.m_data.data(), subBufferSize);
else if (indexed_draw && forcedIndexBuffer)
{
// Only quads supported now
switch (m_draw_mode - 1)
{
case GL_TRIANGLE_FAN:
switch (m_indexed_array.m_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedTriangleFan<unsigned int>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4);
break;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedTriangleFan<unsigned short>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2);
break;
}
break;
case GL_QUADS:
switch (m_indexed_array.m_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedQuads<unsigned int>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 4);
break;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedQuads<unsigned short>(bufferMap, m_indexed_array.m_data.data(), m_indexed_array.m_data.size() / 2);
break;
}
break;
}
}
else
{
unsigned short *typedDst = static_cast<unsigned short *>(bufferMap);
switch (m_draw_mode - 1)
{
case GL_TRIANGLE_FAN:
for (unsigned i = 0; i < (m_draw_array_count - 2); i++)
{
typedDst[3 * i] = 0;
typedDst[3 * i + 1] = i + 2 - 1;
typedDst[3 * i + 2] = i + 2;
}
break;
case GL_QUADS:
for (unsigned i = 0; i < m_draw_array_count / 4; i++)
{
// First triangle
typedDst[6 * i] = 4 * i;
typedDst[6 * i + 1] = 4 * i + 1;
typedDst[6 * i + 2] = 4 * i + 2;
// Second triangle
typedDst[6 * i + 3] = 4 * i + 2;
typedDst[6 * i + 4] = 4 * i + 3;
typedDst[6 * i + 5] = 4 * i;
}
break;
}
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
m_timers.m_bufferUploadSize += subBufferSize; m_timers.m_bufferUploadSize += subBufferSize;
indexBufferView.SizeInBytes = (UINT)subBufferSize; indexBufferView.SizeInBytes = (UINT)subBufferSize;
indexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset; indexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
return indexBufferView; return indexBufferView;
@ -505,22 +260,20 @@ void D3D12GSRender::setScaleOffset()
// Scale offset buffer // Scale offset buffer
// Separate constant buffer // Separate constant buffer
D3D12_RANGE range = { heapOffset, heapOffset + 256 };
void *scaleOffsetMap; void *scaleOffsetMap;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &range, &scaleOffsetMap)); ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256), &scaleOffsetMap));
streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float)); streamToBuffer((char*)scaleOffsetMap + heapOffset, scaleOffsetMat, 16 * sizeof(float));
int isAlphaTested = m_set_alpha_test; int isAlphaTested = m_set_alpha_test;
memcpy((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int)); memcpy((char*)scaleOffsetMap + heapOffset + 16 * sizeof(float), &isAlphaTested, sizeof(int));
memcpy((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &m_alpha_ref, sizeof(float)); memcpy((char*)scaleOffsetMap + heapOffset + 17 * sizeof(float), &m_alpha_ref, sizeof(float));
m_constantsData.m_heap->Unmap(0, &range); m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + 256));
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)256; constantBufferViewDesc.SizeInBytes = (UINT)256;
D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); m_device->CreateConstantBufferView(&constantBufferViewDesc,
Handle.ptr += getCurrentResourceStorage().m_currentScaleOffsetBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart())
m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); .Offset((INT)getCurrentResourceStorage().m_currentScaleOffsetBufferIndex, g_descriptorStrideSRVCBVUAV));
} }
void D3D12GSRender::FillVertexShaderConstantsBuffer() void D3D12GSRender::FillVertexShaderConstantsBuffer()
@ -536,10 +289,8 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
assert(m_constantsData.canAlloc(bufferSize)); assert(m_constantsData.canAlloc(bufferSize));
size_t heapOffset = m_constantsData.alloc(bufferSize); size_t heapOffset = m_constantsData.alloc(bufferSize);
D3D12_RANGE range = { heapOffset, heapOffset + bufferSize };
void *constantsBufferMap; void *constantsBufferMap;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap)); ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap));
for (const auto &vertexConstants : m_vertexConstants) for (const auto &vertexConstants : m_vertexConstants)
{ {
float data[4] = { float data[4] = {
@ -550,14 +301,14 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
}; };
streamToBuffer((char*)constantsBufferMap + heapOffset + vertexConstants.first, data, 4 * sizeof(float)); streamToBuffer((char*)constantsBufferMap + heapOffset + vertexConstants.first, data, 4 * sizeof(float));
} }
m_constantsData.m_heap->Unmap(0, &range); m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize));
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); m_device->CreateConstantBufferView(&constantBufferViewDesc,
Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); .Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV));
} }
void D3D12GSRender::FillPixelShaderConstantsBuffer() void D3D12GSRender::FillPixelShaderConstantsBuffer()
@ -571,11 +322,9 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer()
assert(m_constantsData.canAlloc(bufferSize)); assert(m_constantsData.canAlloc(bufferSize));
size_t heapOffset = m_constantsData.alloc(bufferSize); size_t heapOffset = m_constantsData.alloc(bufferSize);
D3D12_RANGE range = { heapOffset, heapOffset + bufferSize };
size_t offset = 0; size_t offset = 0;
void *constantsBufferMap; void *constantsBufferMap;
ThrowIfFailed(m_constantsData.m_heap->Map(0, &range, &constantsBufferMap)); ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize), &constantsBufferMap));
for (size_t offsetInFP : fragmentOffset) for (size_t offsetInFP : fragmentOffset)
{ {
u32 vector[4]; u32 vector[4];
@ -613,14 +362,14 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer()
streamToBuffer((char*)constantsBufferMap + heapOffset + offset, vector, 4 * sizeof(u32)); streamToBuffer((char*)constantsBufferMap + heapOffset + offset, vector, 4 * sizeof(u32));
offset += 4 * sizeof(u32); offset += 4 * sizeof(u32);
} }
m_constantsData.m_heap->Unmap(0, &range); m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + bufferSize));
D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {}; D3D12_CONSTANT_BUFFER_VIEW_DESC constantBufferViewDesc = {};
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset; constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize; constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
D3D12_CPU_DESCRIPTOR_HANDLE Handle = getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart(); m_device->CreateConstantBufferView(&constantBufferViewDesc,
Handle.ptr += getCurrentResourceStorage().m_constantsBufferIndex * m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
m_device->CreateConstantBufferView(&constantBufferViewDesc, Handle); .Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV));
} }

@ -7,12 +7,15 @@
#include <thread> #include <thread>
#include <chrono> #include <chrono>
#include "d3dx12.h" #include "d3dx12.h"
#include <d3d11on12.h>
PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice; PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice;
PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface; PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface;
PFN_D3D12_SERIALIZE_ROOT_SIGNATURE wrapD3D12SerializeRootSignature; PFN_D3D12_SERIALIZE_ROOT_SIGNATURE wrapD3D12SerializeRootSignature;
PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice;
static HMODULE D3D12Module; static HMODULE D3D12Module;
static HMODULE D3D11Module;
static void loadD3D12FunctionPointers() static void loadD3D12FunctionPointers()
{ {
@ -20,11 +23,14 @@ static void loadD3D12FunctionPointers()
wrapD3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(D3D12Module, "D3D12CreateDevice"); wrapD3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(D3D12Module, "D3D12CreateDevice");
wrapD3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(D3D12Module, "D3D12GetDebugInterface"); wrapD3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(D3D12Module, "D3D12GetDebugInterface");
wrapD3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)GetProcAddress(D3D12Module, "D3D12SerializeRootSignature"); wrapD3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)GetProcAddress(D3D12Module, "D3D12SerializeRootSignature");
D3D11Module = LoadLibrary(L"d3d11.dll");
wrapD3D11On12CreateDevice = (PFN_D3D11ON12_CREATE_DEVICE)GetProcAddress(D3D11Module, "D3D11On12CreateDevice");
} }
static void unloadD3D12FunctionPointers() static void unloadD3D12FunctionPointers()
{ {
FreeLibrary(D3D12Module); FreeLibrary(D3D12Module);
FreeLibrary(D3D11Module);
} }
GetGSFrameCb2 GetGSFrame = nullptr; GetGSFrameCb2 GetGSFrame = nullptr;
@ -95,6 +101,8 @@ void D3D12GSRender::ResourceStorage::WaitAndClean()
void D3D12GSRender::ResourceStorage::Release() void D3D12GSRender::ResourceStorage::Release()
{ {
for (auto tmp : m_dirtyTextures)
tmp->Release();
// NOTE: Should be released only after gfx pipeline last command has been finished. // NOTE: Should be released only after gfx pipeline last command has been finished.
CloseHandle(m_frameFinishedHandle); CloseHandle(m_frameFinishedHandle);
} }
@ -224,49 +232,28 @@ D3D12GSRender::D3D12GSRender()
// Common root signatures // Common root signatures
for (unsigned textureCount = 0; textureCount < 17; textureCount++) for (unsigned textureCount = 0; textureCount < 17; textureCount++)
{ {
D3D12_DESCRIPTOR_RANGE descriptorRange[4] = {}; CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
{
// Scale Offset data // Scale Offset data
descriptorRange[0].BaseShaderRegister = 0; CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0),
descriptorRange[0].NumDescriptors = 1;
descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
// Constants // Constants
descriptorRange[1].BaseShaderRegister = 1; CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1),
descriptorRange[1].NumDescriptors = 2;
descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
// Textures // Textures
descriptorRange[2].BaseShaderRegister = 0; CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, textureCount, 0),
descriptorRange[2].NumDescriptors = textureCount;
descriptorRange[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
// Samplers // Samplers
descriptorRange[3].BaseShaderRegister = 0; CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, textureCount, 0),
descriptorRange[3].NumDescriptors = textureCount; };
descriptorRange[3].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; CD3DX12_ROOT_PARAMETER RP[4];
D3D12_ROOT_PARAMETER RP[4] = {}; RP[0].InitAsDescriptorTable(1, &descriptorRange[0]);
RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; RP[1].InitAsDescriptorTable(1, &descriptorRange[1]);
RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; RP[2].InitAsDescriptorTable(1, &descriptorRange[2]);
RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0]; RP[3].InitAsDescriptorTable(1, &descriptorRange[3]);
RP[0].DescriptorTable.NumDescriptorRanges = 1;
RP[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
RP[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
RP[1].DescriptorTable.pDescriptorRanges = &descriptorRange[1];
RP[1].DescriptorTable.NumDescriptorRanges = 1;
RP[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
RP[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
RP[2].DescriptorTable.pDescriptorRanges = &descriptorRange[2];
RP[2].DescriptorTable.NumDescriptorRanges = 1;
RP[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
RP[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
RP[3].DescriptorTable.pDescriptorRanges = &descriptorRange[3];
RP[3].DescriptorTable.NumDescriptorRanges = 1;
D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {};
rootSignatureDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
rootSignatureDesc.NumParameters = (textureCount > 0) ? 4 : 2;
rootSignatureDesc.pParameters = RP;
Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob; Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob; Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
ThrowIfFailed(wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); ThrowIfFailed(wrapD3D12SerializeRootSignature(
&CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 4 : 2, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
m_device->CreateRootSignature(0, m_device->CreateRootSignature(0,
rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferPointer(),
@ -299,7 +286,7 @@ D3D12GSRender::D3D12GSRender()
m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
if (Ini.GSOverlay.GetValue()) if (Ini.GSOverlay.GetValue())
InitD2DStructures(); InitD2DStructures();
@ -475,7 +462,6 @@ void D3D12GSRender::Draw()
std::chrono::time_point<std::chrono::system_clock> vertexIndexDurationStart = std::chrono::system_clock::now(); std::chrono::time_point<std::chrono::system_clock> vertexIndexDurationStart = std::chrono::system_clock::now();
// Init vertex count // Init vertex count
// TODO: Very hackish, clean this
if (m_indexed_array.m_count) if (m_indexed_array.m_count)
{ {
for (u32 i = 0; i < m_vertex_count; ++i) for (u32 i = 0; i < m_vertex_count; ++i)
@ -484,7 +470,7 @@ void D3D12GSRender::Draw()
if (!m_vertex_data[i].addr) continue; if (!m_vertex_data[i].addr) continue;
const u32 tsize = m_vertex_data[i].GetTypeSize(); const u32 tsize = m_vertex_data[i].GetTypeSize();
m_vertex_data[i].data.resize((m_indexed_array.index_min + m_indexed_array.index_max - m_indexed_array.index_min + 1) * tsize * m_vertex_data[i].size); m_vertexBufferSize[i] = (m_indexed_array.index_min + m_indexed_array.index_max - m_indexed_array.index_min + 1) * tsize * m_vertex_data[i].size;
} }
} }
else else
@ -495,7 +481,7 @@ void D3D12GSRender::Draw()
if (!m_vertex_data[i].addr) continue; if (!m_vertex_data[i].addr) continue;
const u32 tsize = m_vertex_data[i].GetTypeSize(); const u32 tsize = m_vertex_data[i].GetTypeSize();
m_vertex_data[i].data.resize((m_draw_array_first + m_draw_array_count) * tsize * m_vertex_data[i].size); m_vertexBufferSize[i] = (m_draw_array_first + m_draw_array_count) * tsize * m_vertex_data[i].size;
} }
} }
@ -727,7 +713,7 @@ void D3D12GSRender::Flip()
size_t w = 0, h = 0, rowPitch = 0; size_t w = 0, h = 0, rowPitch = 0;
ID3D12Resource *stagingTexture; size_t offset = 0;
if (m_read_buffer) if (m_read_buffer)
{ {
CellGcmDisplayInfo* buffers = vm::get_ptr<CellGcmDisplayInfo>(m_gcm_buffers_addr); CellGcmDisplayInfo* buffers = vm::get_ptr<CellGcmDisplayInfo>(m_gcm_buffers_addr);
@ -741,21 +727,13 @@ void D3D12GSRender::Flip()
assert(m_textureUploadData.canAlloc(textureSize)); assert(m_textureUploadData.canAlloc(textureSize));
size_t heapOffset = m_textureUploadData.alloc(textureSize); size_t heapOffset = m_textureUploadData.alloc(textureSize);
ThrowIfFailed(m_device->CreatePlacedResource( void *buffer;
m_textureUploadData.m_heap, ThrowIfFailed(m_textureUploadData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer));
heapOffset, void *dstBuffer = (char*)buffer + heapOffset;
&CD3DX12_RESOURCE_DESC::Buffer(textureSize),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&stagingTexture)
));
getCurrentResourceStorage().m_singleFrameLifetimeResources.push_back(stagingTexture);
void *dstBuffer;
ThrowIfFailed(stagingTexture->Map(0, nullptr, &dstBuffer));
for (unsigned row = 0; row < h; row++) for (unsigned row = 0; row < h; row++)
memcpy((char*)dstBuffer + row * rowPitch, (char*)src_buffer + row * w * 4, w * 4); memcpy((char*)dstBuffer + row * rowPitch, (char*)src_buffer + row * w * 4, w * 4);
stagingTexture->Unmap(0, nullptr); m_textureUploadData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize));
offset = heapOffset;
} }
ThrowIfFailed( ThrowIfFailed(
@ -769,7 +747,7 @@ void D3D12GSRender::Flip()
) )
); );
getCurrentResourceStorage().m_commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.m_RAMFramebuffer.Get(), 0), 0, 0, 0, getCurrentResourceStorage().m_commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.m_RAMFramebuffer.Get(), 0), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(stagingTexture, { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)rowPitch} }), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(m_textureUploadData.m_heap, { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)rowPitch} }), nullptr);
getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.m_RAMFramebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); getCurrentResourceStorage().m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.m_RAMFramebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
resourceToFlip = storage.m_RAMFramebuffer.Get(); resourceToFlip = storage.m_RAMFramebuffer.Get();
@ -860,6 +838,10 @@ void D3D12GSRender::Flip()
if(Ini.GSOverlay.GetValue()) if(Ini.GSOverlay.GetValue())
renderOverlay(); renderOverlay();
ResetTimer();
std::chrono::time_point<std::chrono::system_clock> flipStart = std::chrono::system_clock::now();
ThrowIfFailed(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0)); ThrowIfFailed(m_swapChain->Present(Ini.GSVSyncEnable.GetValue() ? 1 : 0, 0));
// Add an event signaling queue completion // Add an event signaling queue completion
@ -890,15 +872,20 @@ void D3D12GSRender::Flip()
ResourceStorage &newStorage = getCurrentResourceStorage(); ResourceStorage &newStorage = getCurrentResourceStorage();
newStorage.WaitAndClean(); newStorage.WaitAndClean();
m_constantsData.m_getPos.store(newStorage.m_getPosConstantsHeap, std::memory_order_release); if (newStorage.m_inUse)
m_vertexIndexData.m_getPos.store(newStorage.m_getPosVertexIndexHeap, std::memory_order_release); {
m_textureUploadData.m_getPos.store(newStorage.m_getPosTextureUploadHeap, std::memory_order_release); m_constantsData.m_getPos = newStorage.m_getPosConstantsHeap;
m_readbackResources.m_getPos.store(newStorage.m_getPosReadbackHeap, std::memory_order_release); m_vertexIndexData.m_getPos = newStorage.m_getPosVertexIndexHeap;
m_UAVHeap.m_getPos.store(newStorage.m_getPosUAVHeap, std::memory_order_release); m_textureUploadData.m_getPos = newStorage.m_getPosTextureUploadHeap;
m_readbackResources.m_getPos = newStorage.m_getPosReadbackHeap;
m_UAVHeap.m_getPos = newStorage.m_getPosUAVHeap;
}
m_frame->Flip(nullptr); m_frame->Flip(nullptr);
ResetTimer();
std::chrono::time_point<std::chrono::system_clock> flipEnd = std::chrono::system_clock::now();
m_timers.m_flipDuration += std::chrono::duration_cast<std::chrono::microseconds>(flipEnd - flipStart).count();
} }
void D3D12GSRender::ResetTimer() void D3D12GSRender::ResetTimer()
@ -911,6 +898,7 @@ void D3D12GSRender::ResetTimer()
m_timers.m_programLoadDuration = 0; m_timers.m_programLoadDuration = 0;
m_timers.m_constantsDuration = 0; m_timers.m_constantsDuration = 0;
m_timers.m_textureDuration = 0; m_timers.m_textureDuration = 0;
m_timers.m_flipDuration = 0;
} }
D3D12GSRender::ResourceStorage& D3D12GSRender::getCurrentResourceStorage() D3D12GSRender::ResourceStorage& D3D12GSRender::getCurrentResourceStorage()

@ -119,14 +119,14 @@ struct DataHeap
T *m_heap; T *m_heap;
size_t m_size; size_t m_size;
size_t m_putPos; // Start of free space size_t m_putPos; // Start of free space
std::atomic<size_t> m_getPos; // End of free space size_t m_getPos; // End of free space
void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags) void Init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{ {
m_size = heapSize; m_size = heapSize;
m_heap = InitHeap<T>::Init(device, heapSize, type, flags); m_heap = InitHeap<T>::Init(device, heapSize, type, flags);
m_putPos = 0; m_putPos = 0;
m_getPos = m_size - 1; m_getPos = heapSize - 1;
} }
/** /**
@ -135,7 +135,7 @@ struct DataHeap
bool canAlloc(size_t size) const bool canAlloc(size_t size) const
{ {
size_t allocSize = align(size, Alignment); size_t allocSize = align(size, Alignment);
size_t currentGetPos = m_getPos.load(); size_t currentGetPos = m_getPos;
if (m_putPos + allocSize < m_size) if (m_putPos + allocSize < m_size)
{ {
// range before get // range before get
@ -232,6 +232,8 @@ private:
PipelineStateObjectCache m_cachePSO; PipelineStateObjectCache m_cachePSO;
std::pair<ID3D12PipelineState *, size_t> *m_PSO; std::pair<ID3D12PipelineState *, size_t> *m_PSO;
size_t m_vertexBufferSize[32];
struct struct
{ {
size_t m_drawCallDuration; size_t m_drawCallDuration;
@ -242,6 +244,7 @@ private:
size_t m_programLoadDuration; size_t m_programLoadDuration;
size_t m_constantsDuration; size_t m_constantsDuration;
size_t m_textureDuration; size_t m_textureDuration;
size_t m_flipDuration;
} m_timers; } m_timers;
void ResetTimer(); void ResetTimer();
@ -332,7 +335,7 @@ private:
// Vertex storage // Vertex storage
DataHeap<ID3D12Resource, 65536> m_vertexIndexData; DataHeap<ID3D12Resource, 65536> m_vertexIndexData;
// Texture storage // Texture storage
DataHeap<ID3D12Heap, 65536> m_textureUploadData; DataHeap<ID3D12Resource, 65536> m_textureUploadData;
DataHeap<ID3D12Heap, 65536> m_UAVHeap; DataHeap<ID3D12Heap, 65536> m_UAVHeap;
DataHeap<ID3D12Heap, 65536> m_readbackResources; DataHeap<ID3D12Heap, 65536> m_readbackResources;

@ -22,11 +22,12 @@ ComPtr<ID2D1SolidColorBrush> m_textBrush;
#pragma comment (lib, "d2d1.lib") #pragma comment (lib, "d2d1.lib")
#pragma comment (lib, "dwrite.lib") #pragma comment (lib, "dwrite.lib")
#pragma comment (lib, "d3d11.lib")
extern PFN_D3D11ON12_CREATE_DEVICE wrapD3D11On12CreateDevice;
void D3D12GSRender::InitD2DStructures() void D3D12GSRender::InitD2DStructures()
{ {
D3D11On12CreateDevice( wrapD3D11On12CreateDevice(
m_device.Get(), m_device.Get(),
D3D11_CREATE_DEVICE_BGRA_SUPPORT, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
nullptr, nullptr,
@ -129,6 +130,7 @@ void D3D12GSRender::renderOverlay()
std::wstring constantDuration = L"Constants : " + std::to_wstring(m_timers.m_constantsDuration) + L" us (" + std::to_wstring(100.f * constantsPercent) + L" %)"; std::wstring constantDuration = L"Constants : " + std::to_wstring(m_timers.m_constantsDuration) + L" us (" + std::to_wstring(100.f * constantsPercent) + L" %)";
float rttPercent = (float)m_timers.m_rttDuration / (float)m_timers.m_drawCallDuration; float rttPercent = (float)m_timers.m_rttDuration / (float)m_timers.m_drawCallDuration;
std::wstring rttDuration = L"RTT : " + std::to_wstring(m_timers.m_rttDuration) + L" us (" + std::to_wstring(100.f * rttPercent) + L" %)"; std::wstring rttDuration = L"RTT : " + std::to_wstring(m_timers.m_rttDuration) + L" us (" + std::to_wstring(100.f * rttPercent) + L" %)";
std::wstring flipDuration = L"Flip : " + std::to_wstring(m_timers.m_flipDuration) + L" us";
std::wstring count = L"Draw count : " + std::to_wstring(m_timers.m_drawCallCount); std::wstring count = L"Draw count : " + std::to_wstring(m_timers.m_drawCallCount);
@ -195,6 +197,13 @@ void D3D12GSRender::renderOverlay()
&D2D1::RectF(0, 98, rtSize.width, rtSize.height), &D2D1::RectF(0, 98, rtSize.width, rtSize.height),
m_textBrush.Get() m_textBrush.Get()
); );
m_d2dDeviceContext->DrawTextW(
flipDuration.c_str(),
(UINT32)flipDuration.size(),
m_textFormat.Get(),
&D2D1::RectF(0, 112, rtSize.width, rtSize.height),
m_textBrush.Get()
);
m_d2dDeviceContext->EndDraw(); m_d2dDeviceContext->EndDraw();
// Release our wrapped render target resource. Releasing // Release our wrapped render target resource. Releasing

@ -2,40 +2,9 @@
#if defined(DX12_SUPPORT) #if defined(DX12_SUPPORT)
#include "D3D12GSRender.h" #include "D3D12GSRender.h"
#include "d3dx12.h" #include "d3dx12.h"
#include "../Common/TextureUtils.h"
// For clarity this code deals with texture but belongs to D3D12GSRender class // For clarity this code deals with texture but belongs to D3D12GSRender class
static
u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth)
{
u32 offset = 0;
u32 shift_count = 0;
while (log2_width | log2_height | log2_depth) {
if (log2_width)
{
offset |= (x & 0x01) << shift_count;
x >>= 1;
++shift_count;
--log2_width;
}
if (log2_height)
{
offset |= (y & 0x01) << shift_count;
y >>= 1;
++shift_count;
--log2_height;
}
if (log2_depth)
{
offset |= (z & 0x01) << shift_count;
z >>= 1;
++shift_count;
--log2_depth;
}
}
return offset;
}
static static
D3D12_COMPARISON_FUNC getSamplerCompFunc[] = D3D12_COMPARISON_FUNC getSamplerCompFunc[] =
{ {
@ -151,239 +120,6 @@ D3D12_SAMPLER_DESC getSamplerDesc(const RSXTexture &texture)
return samplerDesc; return samplerDesc;
} }
struct MipmapLevelInfo
{
size_t offset;
size_t width;
size_t height;
size_t rowPitch;
};
#define MAX2(a, b) ((a) > (b)) ? (a) : (b)
/**
* Write data, assume src pixels are packed but not mipmaplevel
*/
static std::vector<MipmapLevelInfo>
writeTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
for (unsigned row = 0; row < currentHeight; row++)
memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * widthInBlock * blockSize, currentWidth * blockSize);
offsetInDst += currentHeight * rowPitch;
offsetInDst = align(offsetInDst, 512);
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write data, assume src pixels are swizzled and but not mipmaplevel
*/
static std::vector<MipmapLevelInfo>
writeTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
u32 *castedSrc, *castedDst;
u32 log2width, log2height;
castedSrc = (u32*)src + offsetInSrc;
castedDst = (u32*)dst + offsetInDst;
log2width = (u32)(logf((float)currentWidth) / logf(2.f));
log2height = (u32)(logf((float)currentHeight) / logf(2.f));
for (int row = 0; row < currentHeight; row++)
for (int j = 0; j < currentWidth; j++)
castedDst[(row * rowPitch / 4) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write data, assume compressed (DXTCn) format
*/
static std::vector<MipmapLevelInfo>
writeCompressedTexel(const char *src, char *dst, size_t widthInBlock, size_t blockWidth, size_t heightInBlock, size_t blockHeight, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight * blockHeight;
currentMipmapLevelInfo.width = currentWidth * blockWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
for (unsigned row = 0; row < currentHeight; row++)
memcpy((char*)dst + offsetInDst + row * rowPitch, (char*)src + offsetInSrc + row * currentWidth * blockSize, currentWidth * blockSize);
offsetInDst += currentHeight * rowPitch;
offsetInDst = align(offsetInDst, 512);
offsetInSrc += currentHeight * currentWidth * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write 16 bytes pixel textures, assume src pixels are swizzled and but not mipmaplevel
*/
static std::vector<MipmapLevelInfo>
write16bTexelsSwizzled(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
u16 *castedSrc, *castedDst;
u16 log2width, log2height;
castedSrc = (u16*)src + offsetInSrc;
castedDst = (u16*)dst + offsetInDst;
log2width = (u32)(logf((float)currentWidth) / logf(2.f));
log2height = (u32)(logf((float)currentHeight) / logf(2.f));
for (int row = 0; row < currentHeight; row++)
for (int j = 0; j < currentWidth; j++)
castedDst[(row * rowPitch / 2) + j] = castedSrc[LinearToSwizzleAddress(j, row, 0, log2width, log2height, 0)];
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
*/
static std::vector<MipmapLevelInfo>
write16bTexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
size_t srcPitch = widthInBlock * blockSize;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
for (unsigned row = 0; row < heightInBlock; row++)
for (int j = 0; j < currentWidth; j++)
{
u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
}
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/**
* Write 16 bytes pixel textures, assume src pixels are packed but not mipmaplevel
*/
static std::vector<MipmapLevelInfo>
write16bX4TexelsGeneric(const char *src, char *dst, size_t widthInBlock, size_t heightInBlock, size_t blockSize, size_t mipmapCount)
{
std::vector<MipmapLevelInfo> Result;
size_t offsetInDst = 0, offsetInSrc = 0;
size_t currentHeight = heightInBlock, currentWidth = widthInBlock;
size_t srcPitch = widthInBlock * blockSize;
for (unsigned mipLevel = 0; mipLevel < mipmapCount; mipLevel++)
{
size_t rowPitch = align(currentWidth * blockSize, 256);
MipmapLevelInfo currentMipmapLevelInfo = {};
currentMipmapLevelInfo.offset = offsetInDst;
currentMipmapLevelInfo.height = currentHeight;
currentMipmapLevelInfo.width = currentWidth;
currentMipmapLevelInfo.rowPitch = rowPitch;
Result.push_back(currentMipmapLevelInfo);
unsigned short *castedDst = (unsigned short *)dst, *castedSrc = (unsigned short *)src;
for (unsigned row = 0; row < heightInBlock; row++)
for (int j = 0; j < currentWidth * 4; j++)
{
u16 tmp = castedSrc[offsetInSrc / 2 + row * srcPitch / 2 + j];
castedDst[offsetInDst / 2 + row * rowPitch / 2 + j] = (tmp >> 8) | (tmp << 8);
}
offsetInDst += currentHeight * rowPitch;
offsetInSrc += currentHeight * widthInBlock * blockSize;
currentHeight = MAX2(currentHeight / 2, 1);
currentWidth = MAX2(currentWidth / 2, 1);
}
return Result;
}
/** /**
* Create a texture residing in default heap and generate uploads commands in commandList, * Create a texture residing in default heap and generate uploads commands in commandList,
@ -394,221 +130,23 @@ ID3D12Resource *uploadSingleTexture(
const RSXTexture &texture, const RSXTexture &texture,
ID3D12Device *device, ID3D12Device *device,
ID3D12GraphicsCommandList *commandList, ID3D12GraphicsCommandList *commandList,
DataHeap<ID3D12Heap, 65536> &textureBuffersHeap, DataHeap<ID3D12Resource, 65536> &textureBuffersHeap)
std::vector<ComPtr<ID3D12Resource> > &stagingRamTexture)
{ {
ID3D12Resource *vramTexture; ID3D12Resource *vramTexture;
size_t w = texture.GetWidth(), h = texture.GetHeight(); size_t w = texture.GetWidth(), h = texture.GetHeight();
size_t blockSizeInByte, blockWidthInPixel, blockHeightInPixel;
int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); int format = texture.GetFormat() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format); DXGI_FORMAT dxgiFormat = getTextureDXGIFormat(format);
const u32 texaddr = GetAddress(texture.GetOffset(), texture.GetLocation()); size_t textureSize = getPlacedTextureStorageSpace(texture, 256);
bool is_swizzled = !(texture.GetFormat() & CELL_GCM_TEXTURE_LN);
size_t srcPitch;
switch (format)
{
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
default:
LOG_ERROR(RSX, "Unimplemented Texture format : %x", format);
break;
case CELL_GCM_TEXTURE_B8:
blockSizeInByte = 1;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w;
break;
case CELL_GCM_TEXTURE_A1R5G5B5:
blockSizeInByte = 2;
blockHeightInPixel = 1, blockWidthInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_A4R4G4B4:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_R5G6B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_A8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
blockSizeInByte = 8;
blockWidthInPixel = 4, blockHeightInPixel = 4;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
blockSizeInByte = 16;
blockWidthInPixel = 4, blockHeightInPixel = 4;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_G8B8:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_R6G5B5:
// Not native
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_DEPTH16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_X16:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_Y16_X16:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_R5G5B5A1:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
blockSizeInByte = 8;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 8;
break;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
blockSizeInByte = 16;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 16;
break;
case CELL_GCM_TEXTURE_X32_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_D1R5G5B5:
blockSizeInByte = 2;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 2;
break;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_D8R8G8B8:
blockSizeInByte = 4;
blockWidthInPixel = 1, blockHeightInPixel = 1;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
srcPitch = w * 4;
break;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
blockSizeInByte = 4;
blockWidthInPixel = 2, blockHeightInPixel = 2;
srcPitch = w * 4;
break;
}
size_t heightInBlocks = (h + blockHeightInPixel - 1) / blockHeightInPixel;
size_t widthInBlocks = (w + blockWidthInPixel - 1) / blockWidthInPixel;
// Multiple of 256
size_t rowPitch = align(blockSizeInByte * widthInBlocks, 256);
ComPtr<ID3D12Resource> Texture;
size_t textureSize = rowPitch * heightInBlocks * 2; // * 4 for mipmap levels
assert(textureBuffersHeap.canAlloc(textureSize)); assert(textureBuffersHeap.canAlloc(textureSize));
size_t heapOffset = textureBuffersHeap.alloc(textureSize); size_t heapOffset = textureBuffersHeap.alloc(textureSize);
ThrowIfFailed(device->CreatePlacedResource( void *buffer;
textureBuffersHeap.m_heap, ThrowIfFailed(textureBuffersHeap.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize), &buffer));
heapOffset, void *textureData = (char*)buffer + heapOffset;
&CD3DX12_RESOURCE_DESC::Buffer(textureSize), std::vector<MipmapLevelInfo> mipInfos = uploadPlacedTexture(texture, 256, textureData);
D3D12_RESOURCE_STATE_GENERIC_READ, textureBuffersHeap.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + textureSize));
nullptr,
IID_PPV_ARGS(Texture.GetAddressOf())
));
stagingRamTexture.push_back(Texture);
auto pixels = vm::get_ptr<const u8>(texaddr);
void *textureData;
ThrowIfFailed(Texture->Map(0, nullptr, (void**)&textureData));
std::vector<MipmapLevelInfo> mipInfos;
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
{
if (is_swizzled)
mipInfos = writeTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
else
mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, 4, texture.GetMipmap());
break;
}
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
{
if (is_swizzled)
mipInfos = write16bTexelsSwizzled((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
else
mipInfos = write16bTexelsGeneric((char*)pixels, (char*)textureData, w, h, 2, texture.GetMipmap());
break;
}
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
{
mipInfos = write16bX4TexelsGeneric((char*)pixels, (char*)textureData, w, h, 8, texture.GetMipmap());
break;
}
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
{
mipInfos = writeCompressedTexel((char*)pixels, (char*)textureData, widthInBlocks, blockWidthInPixel, heightInBlocks, blockHeightInPixel, blockSizeInByte, texture.GetMipmap());
break;
}
default:
{
mipInfos = writeTexelsGeneric((char*)pixels, (char*)textureData, w, h, blockSizeInByte, texture.GetMipmap());
break;
}
}
Texture->Unmap(0, nullptr);
D3D12_RESOURCE_DESC texturedesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)w, (UINT)h, 1, texture.GetMipmap()); D3D12_RESOURCE_DESC texturedesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, (UINT)w, (UINT)h, 1, texture.GetMipmap());
textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes; textureSize = device->GetResourceAllocationInfo(0, 1, &texturedesc).SizeInBytes;
@ -626,7 +164,7 @@ ID3D12Resource *uploadSingleTexture(
for (const MipmapLevelInfo mli : mipInfos) for (const MipmapLevelInfo mli : mipInfos)
{ {
commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(vramTexture, (UINT)miplevel), 0, 0, 0, commandList->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(vramTexture, (UINT)miplevel), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(Texture.Get(), { mli.offset, { dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(textureBuffersHeap.m_heap, { heapOffset + mli.offset, { dxgiFormat, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
miplevel++; miplevel++;
} }
@ -738,7 +276,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
} }
else else
{ {
vramTexture = uploadSingleTexture(m_textures[i], m_device.Get(), cmdlist, m_textureUploadData, getCurrentResourceStorage().m_singleFrameLifetimeResources); vramTexture = uploadSingleTexture(m_textures[i], m_device.Get(), cmdlist, m_textureUploadData);
m_texturesCache[texaddr] = vramTexture; m_texturesCache[texaddr] = vramTexture;
u32 s = (u32)align(getTextureSize(m_textures[i]), 4096); u32 s = (u32)align(getTextureSize(m_textures[i]), 4096);
@ -881,7 +419,7 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
} }
m_device->CreateSampler(&getSamplerDesc(m_textures[i]), m_device->CreateSampler(&getSamplerDesc(m_textures[i]),
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart()) CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)getCurrentResourceStorage().m_currentSamplerIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV)); .Offset((UINT)getCurrentResourceStorage().m_currentSamplerIndex + (UINT)usedTexture, g_descriptorStrideSamplers));
usedTexture++; usedTexture++;
} }

@ -36,28 +36,20 @@ std::pair<ID3DBlob *, ID3DBlob *> compileF32toU8CS()
const char *tmp = (const char*)errorBlob->GetBufferPointer(); const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp); LOG_ERROR(RSX, tmp);
} }
D3D12_DESCRIPTOR_RANGE descriptorRange[2] = {}; CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
{
// Textures // Textures
descriptorRange[0].BaseShaderRegister = 0; CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0),
descriptorRange[0].NumDescriptors = 1; // UAV (same descriptor heap)
descriptorRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 1),
descriptorRange[1].BaseShaderRegister = 0; };
descriptorRange[1].NumDescriptors = 1;
descriptorRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descriptorRange[1].OffsetInDescriptorsFromTableStart = 1;
D3D12_ROOT_PARAMETER RP[2] = {};
RP[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
RP[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
RP[0].DescriptorTable.pDescriptorRanges = &descriptorRange[0];
RP[0].DescriptorTable.NumDescriptorRanges = 2;
D3D12_ROOT_SIGNATURE_DESC rootSignatureDesc = {}; CD3DX12_ROOT_PARAMETER RP;
rootSignatureDesc.NumParameters = 1; RP.InitAsDescriptorTable(2, &descriptorRange[0]);
rootSignatureDesc.pParameters = RP;
ID3DBlob *rootSignatureBlob; ID3DBlob *rootSignatureBlob;
hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); hr = wrapD3D12SerializeRootSignature(&CD3DX12_ROOT_SIGNATURE_DESC(1, &RP), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob);
if (hr != S_OK) if (hr != S_OK)
{ {
const char *tmp = (const char*)errorBlob->GetBufferPointer(); const char *tmp = (const char*)errorBlob->GetBufferPointer();

@ -260,6 +260,21 @@ enum
}; };
// GCM Primitive
enum
{
CELL_GCM_PRIMITIVE_POINTS = 1,
CELL_GCM_PRIMITIVE_LINES = 2,
CELL_GCM_PRIMITIVE_LINE_LOOP = 3,
CELL_GCM_PRIMITIVE_LINE_STRIP = 4,
CELL_GCM_PRIMITIVE_TRIANGLES = 5,
CELL_GCM_PRIMITIVE_TRIANGLE_STRIP = 6,
CELL_GCM_PRIMITIVE_TRIANGLE_FAN = 7,
CELL_GCM_PRIMITIVE_QUADS = 8,
CELL_GCM_PRIMITIVE_QUAD_STRIP = 9,
CELL_GCM_PRIMITIVE_POLYGON = 10,
};
// GCM Reports // GCM Reports
enum enum
{ {

@ -6,6 +6,7 @@
#include "Emu/Memory/Memory.h" #include "Emu/Memory/Memory.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "GLGSRender.h" #include "GLGSRender.h"
#include "../Common/TextureUtils.h"
#define CMD_DEBUG 0 #define CMD_DEBUG 0
#define DUMP_VERTEX_DATA 0 #define DUMP_VERTEX_DATA 0
@ -2153,33 +2154,3 @@ void GLGSRender::semaphorePFIFOAcquire(u32 offset, u32 value)
{ {
} }
u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth)
{
u32 offset = 0;
u32 shift_count = 0;
while (log2_width | log2_height | log2_depth){
if (log2_width)
{
offset |= (x & 0x01) << shift_count;
x >>= 1;
++shift_count;
--log2_width;
}
if (log2_height)
{
offset |= (y & 0x01) << shift_count;
y >>= 1;
++shift_count;
--log2_height;
}
if (log2_depth)
{
offset |= (z & 0x01) << shift_count;
z >>= 1;
++shift_count;
--log2_depth;
}
}
return offset;
}

@ -18,7 +18,6 @@
extern GLenum g_last_gl_error; extern GLenum g_last_gl_error;
void printGlError(GLenum err, const char* situation); void printGlError(GLenum err, const char* situation);
void printGlError(GLenum err, const std::string& situation); void printGlError(GLenum err, const std::string& situation);
u32 LinearToSwizzleAddress(u32 x, u32 y, u32 z, u32 log2_width, u32 log2_height, u32 log2_depth);
class GLTexture class GLTexture

@ -60,8 +60,10 @@
<ClCompile Include="Emu\IdManager.cpp" /> <ClCompile Include="Emu\IdManager.cpp" />
<ClCompile Include="Emu\RSX\CgBinaryFragmentProgram.cpp" /> <ClCompile Include="Emu\RSX\CgBinaryFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\CgBinaryVertexProgram.cpp" /> <ClCompile Include="Emu\RSX\CgBinaryVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\Common\BufferUtils.cpp" />
<ClCompile Include="Emu\RSX\Common\FragmentProgramDecompiler.cpp" /> <ClCompile Include="Emu\RSX\Common\FragmentProgramDecompiler.cpp" />
<ClCompile Include="Emu\RSX\Common\ShaderParam.cpp" /> <ClCompile Include="Emu\RSX\Common\ShaderParam.cpp" />
<ClCompile Include="Emu\RSX\Common\TextureUtils.cpp" />
<ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" /> <ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12Buffer.cpp" /> <ClCompile Include="Emu\RSX\D3D12\D3D12Buffer.cpp" />
<ClCompile Include="Emu\RSX\D3D12\D3D12FragmentProgramDecompiler.cpp" /> <ClCompile Include="Emu\RSX\D3D12\D3D12FragmentProgramDecompiler.cpp" />
@ -529,9 +531,11 @@
<ClInclude Include="Emu\Memory\Memory.h" /> <ClInclude Include="Emu\Memory\Memory.h" />
<ClInclude Include="Emu\Memory\MemoryBlock.h" /> <ClInclude Include="Emu\Memory\MemoryBlock.h" />
<ClInclude Include="Emu\RSX\CgBinaryProgram.h" /> <ClInclude Include="Emu\RSX\CgBinaryProgram.h" />
<ClInclude Include="Emu\RSX\Common\BufferUtils.h" />
<ClInclude Include="Emu\RSX\Common\FragmentProgramDecompiler.h" /> <ClInclude Include="Emu\RSX\Common\FragmentProgramDecompiler.h" />
<ClInclude Include="Emu\RSX\Common\ProgramStateCache.h" /> <ClInclude Include="Emu\RSX\Common\ProgramStateCache.h" />
<ClInclude Include="Emu\RSX\Common\ShaderParam.h" /> <ClInclude Include="Emu\RSX\Common\ShaderParam.h" />
<ClInclude Include="Emu\RSX\Common\TextureUtils.h" />
<ClInclude Include="Emu\RSX\Common\VertexProgramDecompiler.h" /> <ClInclude Include="Emu\RSX\Common\VertexProgramDecompiler.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12.h" /> <ClInclude Include="Emu\RSX\D3D12\D3D12.h" />
<ClInclude Include="Emu\RSX\D3D12\D3D12Buffer.h" /> <ClInclude Include="Emu\RSX\D3D12\D3D12Buffer.h" />

@ -989,6 +989,12 @@
<ClCompile Include="..\Utilities\SharedMutex.cpp"> <ClCompile Include="..\Utilities\SharedMutex.cpp">
<Filter>Utilities</Filter> <Filter>Utilities</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="Emu\RSX\Common\TextureUtils.cpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Common\BufferUtils.cpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="Crypto\aes.h"> <ClInclude Include="Crypto\aes.h">
@ -1882,5 +1888,11 @@
<ClInclude Include="Emu\RSX\D3D12\d3dx12.h"> <ClInclude Include="Emu\RSX\D3D12\d3dx12.h">
<Filter>Emu\GPU\RSX\D3D12</Filter> <Filter>Emu\GPU\RSX\D3D12</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="Emu\RSX\Common\TextureUtils.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\BufferUtils.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
</Project> </Project>