Merge pull request #1257 from vlj/d3d12

D3d12: Fixes and speed improvements.
This commit is contained in:
Raul Tambre 2015-10-15 18:38:01 +03:00
commit b3db9255e4
15 changed files with 231 additions and 218 deletions

View File

@ -1,6 +1,6 @@
#include "stdafx.h"
#include "BufferUtils.h"
#include "Utilities/Log.h"
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)
@ -55,50 +55,56 @@ std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_in
return Result;
}
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap)
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc)
{
for (int vertex = 0; vertex < vbf.elementCount; vertex++)
assert(vertex_array_desc.array);
if (vertex_array_desc.frequency > 1)
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, vertex_array_desc.frequency, index);
u32 offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
u32 address = rsx::get_address(offset & 0x7fffffff, offset >> 31);
u32 type_size = rsx::get_vertex_type_size(vertex_array_desc.type);
u32 element_size = type_size * vertex_array_desc.size;
u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
for (u32 i = 0; i < count; ++i)
{
for (size_t attributeId : vbf.attributeId)
auto src = vm::ps3::_ptr<const u8>(address + base_offset + vertex_array_desc.stride * (first + i + base_index));
u8* dst = (u8*)buffer + i * element_size;
switch (type_size)
{
u32 addrRegVal = rsx::method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + attributeId];
u32 addr = rsx::get_address(addrRegVal & 0x7fffffff, addrRegVal >> 31);
case 1:
memcpy(dst, src, vertex_array_desc.size);
break;
if (!vertex_array_desc[attributeId].array)
{
memcpy(bufferMap, vertex_data[attributeId].data(), vertex_data[attributeId].size());
continue;
}
size_t offset = (size_t)addr + baseOffset - vbf.range.first;
size_t tsize = rsx::get_vertex_type_size(vertex_array_desc[attributeId].type);
size_t size = vertex_array_desc[attributeId].size;
auto src = vm::ps3::_ptr<const u8>(addr + (u32)baseOffset + (u32)vbf.stride * vertex);
char* dst = (char*)bufferMap + offset + vbf.stride * vertex;
case 2:
{
auto* c_src = (const be_t<u16>*)src;
u16* c_dst = (u16*)dst;
switch (tsize)
for (u32 j = 0; j < vertex_array_desc.size; ++j)
{
case 1:
{
memcpy(dst, src, size);
break;
*c_dst++ = *c_src++;
}
break;
}
case 2:
{
const u16* c_src = (const u16*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ushort(*c_src++);
break;
}
case 4:
{
auto* c_src = (const be_t<u32>*)src;
u32* c_dst = (u32*)dst;
case 4:
for (u32 j = 0; j < vertex_array_desc.size; ++j)
{
const u32* c_src = (const u32*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < size; ++j) *c_dst++ = _byteswap_ulong(*c_src++);
break;
}
*c_dst++ = *c_src++;
}
break;
}
}
}
}

View File

@ -20,9 +20,9 @@ struct VertexBufferFormat
std::vector<VertexBufferFormat> FormatVertexData(const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t *vertex_data_size, size_t base_offset);
/*
* Write vertex attributes to bufferMap, swapping data as required.
* Write count vertex attributes from index array buffer starting at first, using vertex_array_desc
*/
void uploadVertexData(const VertexBufferFormat &vbf, const rsx::data_array_format_info *vertex_array_desc, const std::vector<u8> *vertex_data, size_t baseOffset, void* bufferMap);
void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_t index, const rsx::data_array_format_info &vertex_array_desc);
/*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.

View File

@ -232,16 +232,15 @@ std::string VertexProgramDecompiler::GetCond()
if (d0.cond == 0) return "false";
if (d0.cond == (lt | gt | eq)) return "true";
static const char* cond_string_table[(lt | gt | eq) + 1] =
static const COMPARE cond_string_table[(lt | gt | eq) + 1] =
{
"error",
"lessThan",
"equal",
"lessThanEqual",
"greaterThan",
"notEqual",
"greaterThanEqual",
"error"
COMPARE::FUNCTION_SLT, // "error"
COMPARE::FUNCTION_SLT,
COMPARE::FUNCTION_SEQ,
COMPARE::FUNCTION_SLE,
COMPARE::FUNCTION_SGT,
COMPARE::FUNCTION_SNE,
COMPARE::FUNCTION_SGE,
};
static const char f[4] = { 'x', 'y', 'z', 'w' };
@ -253,8 +252,7 @@ std::string VertexProgramDecompiler::GetCond()
swizzle += f[d0.mask_w];
swizzle = swizzle == "xyzw" ? "" : "." + swizzle;
return fmt::format("any(%s(cc%d%s, vec4(0.0)%s))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str(), swizzle.c_str());
return "any(" + compareFunction(cond_string_table[d0.cond], "cc" + std::to_string(d0.cond_reg_sel_1), getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")";
}
void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::string& src)
@ -330,7 +328,7 @@ std::string VertexProgramDecompiler::AddAddrMask()
std::string VertexProgramDecompiler::AddAddrReg()
{
static const char f[] = { 'x', 'y', 'z', 'w' };
return m_parr.AddParam(PF_PARAM_NONE, "ivec4", "a" + std::to_string(d0.addr_reg_sel_1), "ivec4(0)") + AddAddrMask();
return m_parr.AddParam(PF_PARAM_NONE, getFloatTypeName(4), "a" + std::to_string(d0.addr_reg_sel_1), getFloatTypeName(4) + "(0, 0, 0, 0)") + AddAddrMask();
}
u32 VertexProgramDecompiler::GetAddr()
@ -659,7 +657,7 @@ std::string VertexProgramDecompiler::Decompile()
case RSX_VEC_OPCODE_MAX: SetDSTVec("max($0, $1)"); break;
case RSX_VEC_OPCODE_SLT: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SLT, "$0", "$1") + ")"); break;
case RSX_VEC_OPCODE_SGE: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SGE, "$0", "$1") + ")"); break;
case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = ivec4($0)$am;"); break;
case RSX_VEC_OPCODE_ARL: AddCode("$ifcond $a = " + getIntTypeName(4) + "($0)$am;"); break;
case RSX_VEC_OPCODE_FRC: SetDSTVec(getFunction(FUNCTION::FUNCTION_FRACT)); break;
case RSX_VEC_OPCODE_FLR: SetDSTVec("floor($0)"); break;
case RSX_VEC_OPCODE_SEQ: SetDSTVec(getFloatTypeName(4) + "(" + compareFunction(COMPARE::FUNCTION_SEQ, "$0", "$1") + ")"); break;

View File

@ -80,6 +80,10 @@ protected:
*/
virtual std::string getFloatTypeName(size_t elementCount) = 0;
/** returns the type name of int vectors.
*/
virtual std::string getIntTypeName(size_t elementCount) = 0;
/** returns string calling function where arguments are passed via
* $0 $1 $2 substring.
*/

View File

@ -91,32 +91,6 @@ DXGI_FORMAT getFormat(u8 type, u8 size)
}
}
static
std::vector<D3D12_INPUT_ELEMENT_DESC> getIALayout(const rsx::data_array_format_info *vertex_info, const std::vector<u8> *vertex_data)
{
std::vector<D3D12_INPUT_ELEMENT_DESC> result;
size_t inputSlot = 0;
for (size_t index = 0; index < rsx::limits::vertex_count; index++)
{
const auto &info = vertex_info[index];
if (!info.size)
continue;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = info.array ? D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA : D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 0;
result.push_back(IAElement);
}
return result;
}
// D3D12GS member handling buffers
@ -138,29 +112,105 @@ D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &
return vertexIndexHeap.m_heap->GetGPUVirtualAddress() + heapOffset;
}
std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::UploadVertexBuffers(bool indexed_draw)
void D3D12GSRender::load_vertex_data(u32 first, u32 count)
{
u32 m_vertex_data_base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
std::vector<D3D12_VERTEX_BUFFER_VIEW> result;
m_IASet = getIALayout(vertex_arrays_info, vertex_arrays);
m_first_count_pairs.emplace_back(std::make_pair(first, count));
vertex_draw_count += count;
}
void D3D12GSRender::upload_vertex_attributes()
{
m_vertex_buffer_views.clear();
m_IASet.clear();
size_t inputSlot = 0;
// First array attribute
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (!info.size)
if (!info.array) // disabled or not a vertex array
continue;
D3D12_GPU_VIRTUAL_ADDRESS virtualAddress = createVertexBuffer(info, vertex_arrays[index], m_device.Get(), m_vertexIndexData);
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = element_size * vertex_draw_count;
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
for (const auto &range : m_first_count_pairs)
{
write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info);
bufferMap = (char*)bufferMap + range.second * element_size;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = virtualAddress;
vertexBufferView.SizeInBytes = (UINT)vertex_arrays[index].size();
vertexBufferView.StrideInBytes = (UINT)rsx::get_vertex_type_size(info.type) * info.size;
result.push_back(vertexBufferView);
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
m_timers.m_bufferUploadSize += subBufferSize;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_IASet.push_back(IAElement);
}
return result;
// Now immediate vertex buffer
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (info.array)
continue;
if (!info.size) // disabled
continue;
auto &data = vertex_arrays[index];
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = data.size();
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
memcpy(bufferMap, data.data(), data.size());
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {};
vertexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
vertexBufferView.SizeInBytes = (UINT)subBufferSize;
vertexBufferView.StrideInBytes = (UINT)element_size;
m_vertex_buffer_views.push_back(vertexBufferView);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)inputSlot++;
IAElement.Format = getFormat(info.type - 1, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
m_IASet.push_back(IAElement);
}
m_first_count_pairs.clear();
}
D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
@ -229,7 +279,7 @@ D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
return indexBufferView;
}
void D3D12GSRender::setScaleOffset()
void D3D12GSRender::setScaleOffset(size_t descriptorIndex)
{
float scaleOffsetMat[16] =
{
@ -273,11 +323,11 @@ void D3D12GSRender::setScaleOffset()
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)256;
m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentScaleOffsetBufferIndex, g_descriptorStrideSRVCBVUAV));
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
}
void D3D12GSRender::FillVertexShaderConstantsBuffer()
void D3D12GSRender::FillVertexShaderConstantsBuffer(size_t descriptorIndex)
{
for (const auto &entry : transform_constants)
local_transform_constants[entry.first] = entry.second;
@ -305,11 +355,11 @@ void D3D12GSRender::FillVertexShaderConstantsBuffer()
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV));
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
}
void D3D12GSRender::FillPixelShaderConstantsBuffer()
void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex)
{
// Get constant from fragment program
const std::vector<size_t> &fragmentOffset = m_cachePSO.getFragmentConstantOffsetsCache(&fragment_program);
@ -366,8 +416,8 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer()
constantBufferViewDesc.BufferLocation = m_constantsData.m_heap->GetGPUVirtualAddress() + heapOffset;
constantBufferViewDesc.SizeInBytes = (UINT)bufferSize;
m_device->CreateConstantBufferView(&constantBufferViewDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_constantsBufferIndex, g_descriptorStrideSRVCBVUAV));
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
}

View File

@ -36,9 +36,7 @@ static void unloadD3D12FunctionPointers()
void D3D12GSRender::ResourceStorage::Reset()
{
m_constantsBufferIndex = 0;
m_currentScaleOffsetBufferIndex = 0;
m_currentTextureIndex = 0;
m_descriptorsHeapIndex = 0;
m_currentSamplerIndex = 0;
m_samplerDescriptorHeapIndex = 0;
@ -65,9 +63,7 @@ void D3D12GSRender::ResourceStorage::Init(ID3D12Device *device)
ThrowIfFailed(m_commandList->Close());
D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_constantsBufferDescriptorsHeap)));
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_scaleOffsetDescriptorHeap)));
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_textureDescriptorsHeap)));
ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&m_descriptorsHeap)));
D3D12_DESCRIPTOR_HEAP_DESC samplerHeapDesc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
ThrowIfFailed(device->CreateDescriptorHeap(&samplerHeapDesc, IID_PPV_ARGS(&m_samplerDescriptorHeap[0])));
@ -205,16 +201,14 @@ D3D12GSRender::D3D12GSRender()
// Samplers
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, textureCount, 0),
};
CD3DX12_ROOT_PARAMETER RP[4];
RP[0].InitAsDescriptorTable(1, &descriptorRange[0]);
RP[1].InitAsDescriptorTable(1, &descriptorRange[1]);
RP[2].InitAsDescriptorTable(1, &descriptorRange[2]);
RP[3].InitAsDescriptorTable(1, &descriptorRange[3]);
CD3DX12_ROOT_PARAMETER RP[2];
RP[0].InitAsDescriptorTable((textureCount > 0) ? 3 : 2, &descriptorRange[0]);
RP[1].InitAsDescriptorTable(1, &descriptorRange[3]);
Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
ThrowIfFailed(wrapD3D12SerializeRootSignature(
&CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 4 : 2, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
&CD3DX12_ROOT_SIGNATURE_DESC((textureCount > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
m_device->CreateRootSignature(0,
@ -247,8 +241,8 @@ D3D12GSRender::D3D12GSRender()
m_rtts.Init(m_device.Get());
m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
if (Ini.GSOverlay.GetValue())
InitD2DStructures();
@ -417,9 +411,9 @@ void D3D12GSRender::end()
if (!vertex_index_array.empty() || vertex_draw_count)
{
const std::vector<D3D12_VERTEX_BUFFER_VIEW> &vertexBufferViews = UploadVertexBuffers(!vertex_index_array.empty());
upload_vertex_attributes();
const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty());
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)vertexBufferViews.size(), vertexBufferViews.data());
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (m_renderingInfo.m_indexed)
getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView);
}
@ -442,26 +436,11 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> constantsDurationStart = std::chrono::system_clock::now();
size_t currentDescriptorIndex = getCurrentResourceStorage().m_descriptorsHeapIndex;
// Constants
setScaleOffset();
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_scaleOffsetDescriptorHeap.GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_scaleOffsetDescriptorHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentScaleOffsetBufferIndex, g_descriptorStrideSRVCBVUAV)
);
getCurrentResourceStorage().m_currentScaleOffsetBufferIndex++;
size_t currentBufferIndex = getCurrentResourceStorage().m_constantsBufferIndex;
FillVertexShaderConstantsBuffer();
getCurrentResourceStorage().m_constantsBufferIndex++;
FillPixelShaderConstantsBuffer();
getCurrentResourceStorage().m_constantsBufferIndex++;
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_constantsBufferDescriptorsHeap.GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(1,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_constantsBufferDescriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentBufferIndex, g_descriptorStrideSRVCBVUAV)
);
setScaleOffset(currentDescriptorIndex);
FillVertexShaderConstantsBuffer(currentDescriptorIndex + 1);
FillPixelShaderConstantsBuffer(currentDescriptorIndex + 2);
std::chrono::time_point<std::chrono::system_clock> constantsDurationEnd = std::chrono::system_clock::now();
m_timers.m_constantsDuration += std::chrono::duration_cast<std::chrono::microseconds>(constantsDurationEnd - constantsDurationStart).count();
@ -471,7 +450,7 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> textureDurationStart = std::chrono::system_clock::now();
if (m_PSO->second > 0)
{
size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get());
size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get(), currentDescriptorIndex + 3);
// Fill empty slots
for (; usedTexture < m_PSO->second; usedTexture++)
@ -486,8 +465,8 @@ void D3D12GSRender::end()
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
m_device->CreateShaderResourceView(m_dummyTexture, &srvDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentTextureIndex + (INT)usedTexture, g_descriptorStrideSRVCBVUAV)
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex + 3 + (INT)usedTexture, g_descriptorStrideSRVCBVUAV)
);
D3D12_SAMPLER_DESC samplerDesc = {};
@ -501,21 +480,35 @@ void D3D12GSRender::end()
);
}
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_textureDescriptorsHeap.GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(2,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_textureDescriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentTextureIndex, g_descriptorStrideSRVCBVUAV)
);
ID3D12DescriptorHeap *descriptors[] =
{
getCurrentResourceStorage().m_descriptorsHeap.Get(),
getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex].Get(),
};
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(2, descriptors);
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex].GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(3,
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex, g_descriptorStrideSRVCBVUAV)
);
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(1,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_samplerDescriptorHeap[getCurrentResourceStorage().m_samplerDescriptorHeapIndex]->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)getCurrentResourceStorage().m_currentSamplerIndex, g_descriptorStrideSamplers)
);
getCurrentResourceStorage().m_currentTextureIndex += usedTexture;
getCurrentResourceStorage().m_currentSamplerIndex += usedTexture;
getCurrentResourceStorage().m_descriptorsHeapIndex += usedTexture + 3;
}
else
{
getCurrentResourceStorage().m_commandList->SetDescriptorHeaps(1, getCurrentResourceStorage().m_descriptorsHeap.GetAddressOf());
getCurrentResourceStorage().m_commandList->SetGraphicsRootDescriptorTable(0,
CD3DX12_GPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex, g_descriptorStrideSRVCBVUAV)
);
getCurrentResourceStorage().m_descriptorsHeapIndex += 3;
}
std::chrono::time_point<std::chrono::system_clock> textureDurationEnd = std::chrono::system_clock::now();
m_timers.m_textureDuration += std::chrono::duration_cast<std::chrono::microseconds>(textureDurationEnd - textureDurationStart).count();
@ -651,9 +644,6 @@ void D3D12GSRender::flip(int buffer)
if (false)
{
CellGcmDisplayInfo* buffers;// = vm::ps3::_ptr<CellGcmDisplayInfo>(m_gcm_buffers_addr);
u32 addr = rsx::get_address(buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL);
w = buffers[gcm_current_buffer].width;
h = buffers[gcm_current_buffer].height;
u32 addr = rsx::get_address(gcm_buffers[gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL);
w = gcm_buffers[gcm_current_buffer].width;
h = gcm_buffers[gcm_current_buffer].height;

View File

@ -374,15 +374,11 @@ private:
ComPtr<ID3D12CommandAllocator> m_commandAllocator;
ComPtr<ID3D12GraphicsCommandList> m_commandList;
// Constants storage
ComPtr<ID3D12DescriptorHeap> m_constantsBufferDescriptorsHeap;
size_t m_constantsBufferIndex;
ComPtr<ID3D12DescriptorHeap> m_scaleOffsetDescriptorHeap;
size_t m_currentScaleOffsetBufferIndex;
// Descriptor heap
ComPtr<ID3D12DescriptorHeap> m_descriptorsHeap;
size_t m_descriptorsHeapIndex;
// Texture storage
ComPtr<ID3D12DescriptorHeap> m_textureDescriptorsHeap;
size_t m_currentTextureIndex;
// Sampler heap
ComPtr<ID3D12DescriptorHeap> m_samplerDescriptorHeap[2];
size_t m_samplerDescriptorHeapIndex;
size_t m_currentSamplerIndex;
@ -416,7 +412,7 @@ private:
// Constants storage
DataHeap<ID3D12Resource, 256> m_constantsData;
// Vertex storage
DataHeap<ID3D12Resource, 65536> m_vertexIndexData;
DataHeap<ID3D12Resource, 256> m_vertexIndexData;
// Texture storage
DataHeap<ID3D12Resource, 65536> m_textureUploadData;
DataHeap<ID3D12Heap, 65536> m_UAVHeap;
@ -432,6 +428,7 @@ private:
RenderTargets m_rtts;
std::vector<D3D12_INPUT_ELEMENT_DESC> m_IASet;
std::vector<D3D12_VERTEX_BUFFER_VIEW> m_vertex_buffer_views;
INT g_descriptorStrideSRVCBVUAV;
INT g_descriptorStrideDSV;
@ -466,12 +463,11 @@ private:
bool LoadProgram();
std::vector<std::pair<u32, u32> > m_first_count_pairs;
/**
* Create as little vertex buffer as possible to hold all vertex info (in upload heap),
* create corresponding IA layout that can be used for load program and
* returns a vector of vertex buffer view that can be passed to IASetVertexBufferView().
* Upload all vertex attribute whose (first, count) info were previously accumulated.
*/
std::vector<D3D12_VERTEX_BUFFER_VIEW> UploadVertexBuffers(bool indexed_draw = false);
void upload_vertex_attributes();
/**
* Create index buffer for indexed rendering and non native primitive format if nedded, and
@ -481,16 +477,16 @@ private:
D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false);
void setScaleOffset();
void FillVertexShaderConstantsBuffer();
void FillPixelShaderConstantsBuffer();
void setScaleOffset(size_t descriptorIndex);
void FillVertexShaderConstantsBuffer(size_t descriptorIndex);
void FillPixelShaderConstantsBuffer(size_t descriptorIndex);
/**
* Fetch all textures recorded in the state in the render target cache and in the texture cache.
* If a texture is not cached, populate cmdlist with uploads command.
* Create necessary resource view/sampler descriptors in the per frame storage struct.
* returns the number of texture uploaded.
*/
size_t UploadTextures(ID3D12GraphicsCommandList *cmdlist);
size_t UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex);
/**
* Creates render target if necessary.
@ -511,6 +507,8 @@ protected:
virtual bool domethod(u32 cmd, u32 arg) override;
virtual void end() override;
virtual void flip(int buffer) override;
virtual void load_vertex_data(u32 first, u32 count) override;
};
#endif

View File

@ -135,7 +135,7 @@ struct D3D12Traits
// TODO: This shouldn't use current dir
std::string filename = "./FragmentProgram" + std::to_string(ID) + ".hlsl";
fs::file(filename, o_write | o_create | o_trunc).write(shader.c_str(), shader.size());
fs::file(filename, fom::write | fom::create | fom::trunc).write(shader.c_str(), shader.size());
fragmentProgramData.id = (u32)ID;
}
@ -148,7 +148,7 @@ struct D3D12Traits
// TODO: This shouldn't use current dir
std::string filename = "./VertexProgram" + std::to_string(ID) + ".hlsl";
fs::file(filename, o_write | o_create | o_trunc).write(shaderCode.c_str(), shaderCode.size());
fs::file(filename, fom::write | fom::create | fom::trunc).write(shaderCode.c_str(), shaderCode.size());
vertexProgramData.id = (u32)ID;
}

View File

@ -284,7 +284,7 @@ size_t getTextureSize(const rsx::texture &texture)
}
}
size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist, size_t descriptorIndex)
{
size_t usedTexture = 0;
@ -452,7 +452,8 @@ size_t D3D12GSRender::UploadTextures(ID3D12GraphicsCommandList *cmdlist)
}
m_device->CreateShaderResourceView(vramTexture, &srvDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_textureDescriptorsHeap->GetCPUDescriptorHandleForHeapStart()).Offset((UINT)getCurrentResourceStorage().m_currentTextureIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV));
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((UINT)descriptorIndex + (UINT)usedTexture, g_descriptorStrideSRVCBVUAV));
if (getCurrentResourceStorage().m_currentSamplerIndex + 16 > 2048)
{

View File

@ -11,6 +11,11 @@ std::string D3D12VertexProgramDecompiler::getFloatTypeName(size_t elementCount)
return getFloatTypeNameImp(elementCount);
}
std::string D3D12VertexProgramDecompiler::getIntTypeName(size_t elementCount)
{
return "int4";
}
std::string D3D12VertexProgramDecompiler::getFunction(enum class FUNCTION f)
{
return getFunctionImp(f);
@ -141,7 +146,7 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS)
void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS)
{
OS << " PixelInput Out;" << std::endl;
OS << " PixelInput Out = (PixelInput)0;" << std::endl;
// Declare inside main function
for (auto &i : reg_table)
{

View File

@ -8,6 +8,7 @@ struct D3D12VertexProgramDecompiler : public VertexProgramDecompiler
{
protected:
virtual std::string getFloatTypeName(size_t elementCount) override;
std::string getIntTypeName(size_t elementCount) override;
virtual std::string getFunction(enum class FUNCTION) override;
virtual std::string compareFunction(enum class COMPARE, const std::string &, const std::string &) override;

View File

@ -10,6 +10,12 @@ std::string GLVertexDecompilerThread::getFloatTypeName(size_t elementCount)
return getFloatTypeNameImpl(elementCount);
}
std::string GLVertexDecompilerThread::getIntTypeName(size_t elementCount)
{
return "ivec4";
}
std::string GLVertexDecompilerThread::getFunction(FUNCTION f)
{
return getFunctionImpl(f);

View File

@ -9,6 +9,7 @@ struct GLVertexDecompilerThread : public VertexProgramDecompiler
std::string &m_shader;
protected:
virtual std::string getFloatTypeName(size_t elementCount) override;
std::string getIntTypeName(size_t elementCount) override;
virtual std::string getFunction(FUNCTION) override;
virtual std::string compareFunction(COMPARE, const std::string&, const std::string&) override;

View File

@ -10,6 +10,8 @@
#include "Emu/SysCalls/CB_FUNC.h"
#include "Emu/SysCalls/lv2/sys_time.h"
#include "Common/BufferUtils.h"
#include "Utilities/types.h"
extern "C"
@ -823,68 +825,19 @@ namespace rsx
for (int index = 0; index < limits::vertex_count; ++index)
{
auto &info = vertex_arrays_info[index];
const auto &info = vertex_arrays_info[index];
if (!info.array) // disabled or not a vertex array
{
continue;
}
auto &data = vertex_arrays[index];
if (info.frequency > 1)
{
LOG_ERROR(RSX, "%s: frequency is not null (%d, index=%d)", __FUNCTION__, info.frequency, index);
}
u32 offset = method_registers[NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + index];
u32 address = get_address(offset & 0x7fffffff, offset >> 31);
u32 type_size = get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
u32 dst_position = (u32)data.size();
data.resize(dst_position + count * element_size);
u32 base_offset = method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
for (u32 i = 0; i < count; ++i)
{
const u8* src = vm::_ptr<u8>(address + base_offset + info.stride * (first + i + base_index));
u8* dst = data.data() + dst_position + i * element_size;
switch (type_size)
{
case 1:
memcpy(dst, src, info.size);
break;
case 2:
{
auto* c_src = (const be_t<u16>*)src;
u16* c_dst = (u16*)dst;
for (u32 j = 0; j < info.size; ++j)
{
*c_dst++ = *c_src++;
}
break;
}
case 4:
{
auto* c_src = (const be_t<u32>*)src;
u32* c_dst = (u32*)dst;
for (u32 j = 0; j < info.size; ++j)
{
*c_dst++ = *c_src++;
}
break;
}
}
}
write_vertex_array_data_to_buffer(data.data() + dst_position, first, count, index, info);
}
}

View File

@ -163,7 +163,7 @@ namespace rsx
u32 transform_program[512 * 4] = {};
void load_vertex_data(u32 first, u32 count);
virtual void load_vertex_data(u32 first, u32 count);
void load_vertex_index_data(u32 first, u32 count);
public: