rsx: Use range intersection to speed up xform constants patching

This commit is contained in:
kd-11 2024-04-28 18:10:36 +03:00 committed by kd-11
parent b0375d9c9a
commit 0d0fc23f9e
2 changed files with 34 additions and 4 deletions

View File

@ -999,11 +999,18 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
void GLGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count) void GLGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count)
{ {
if (!m_vertex_prog)
{
// Shouldn't be reachable, but handle it correctly anyway
m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
return;
}
std::pair<u32, u32> data_range {}; std::pair<u32, u32> data_range {};
void* data_source = nullptr; void* data_source = nullptr;
const auto bound_range = m_transform_constants_buffer->bound_range(); const auto bound_range = m_transform_constants_buffer->bound_range();
if (!m_vertex_prog || m_vertex_prog->has_indexed_constants) if (m_vertex_prog->has_indexed_constants)
{ {
// We're working with a full range. We can do a direct patch in this case since no index translation is required. // We're working with a full range. We can do a direct patch in this case since no index translation is required.
const auto byte_count = count * 16; const auto byte_count = count * 16;
@ -1012,6 +1019,14 @@ void GLGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou
data_range = { bound_range.first + byte_offset, byte_count}; data_range = { bound_range.first + byte_offset, byte_count};
data_source = &REGS(ctx)->transform_constants[index]; data_source = &REGS(ctx)->transform_constants[index];
} }
else if (auto xform_id = m_vertex_prog->TranslateConstantsRange(index, count); xform_id >= 0)
{
const auto write_offset = xform_id * 16;
const auto byte_count = count * 16;
data_range = { bound_range.first + write_offset, byte_count };
data_source = &REGS(ctx)->transform_constants[index];
}
else else
{ {
auto allocate_mem = [&](usz size) -> std::pair<void*, usz> auto allocate_mem = [&](usz size) -> std::pair<void*, usz>

View File

@ -2354,11 +2354,18 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count) void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count)
{ {
if (!m_vertex_prog)
{
// Shouldn't be reachable, but handle it correctly anyway
m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
return;
}
// Hot-patching transform constants mid-draw (instanced draw) // Hot-patching transform constants mid-draw (instanced draw)
std::pair<VkDeviceSize, VkDeviceSize> data_range; std::pair<VkDeviceSize, VkDeviceSize> data_range;
void* data_source = nullptr; void* data_source = nullptr;
if (!m_vertex_prog || m_vertex_prog->has_indexed_constants) if (m_vertex_prog->has_indexed_constants)
{ {
// We're working with a full range. We can do a direct patch in this case since no index translation is required. // We're working with a full range. We can do a direct patch in this case since no index translation is required.
const auto byte_count = count * 16; const auto byte_count = count * 16;
@ -2367,6 +2374,14 @@ void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou
data_range = { m_vertex_constants_buffer_info.offset + byte_offset, byte_count }; data_range = { m_vertex_constants_buffer_info.offset + byte_offset, byte_count };
data_source = &REGS(ctx)->transform_constants[index]; data_source = &REGS(ctx)->transform_constants[index];
} }
else if (auto xform_id = m_vertex_prog->TranslateConstantsRange(index, count); xform_id >= 0)
{
const auto write_offset = xform_id * 16;
const auto byte_count = count * 16;
data_range = { m_vertex_constants_buffer_info.offset + write_offset, byte_count };
data_source = &REGS(ctx)->transform_constants[index];
}
else else
{ {
// Indexed. This is a bit trickier. Use scratchpad to avoid UAF // Indexed. This is a bit trickier. Use scratchpad to avoid UAF
@ -2390,7 +2405,7 @@ void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou
data_range.first, data_range.first,
data_range.second, data_range.second,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); VK_ACCESS_UNIFORM_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
vkCmdUpdateBuffer( vkCmdUpdateBuffer(
*m_current_command_buffer, *m_current_command_buffer,
@ -2405,7 +2420,7 @@ void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou
data_range.first, data_range.first,
data_range.second, data_range.second,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT); VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT);
} }
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool) void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool)