vk: Add explicit sync when doing copy_image_to_buffer

This commit is contained in:
kd-11 2023-06-26 23:01:09 +03:00 committed by kd-11
parent 3e8f9cbbf2
commit 942dbbc8fa
3 changed files with 62 additions and 17 deletions

View File

@ -37,6 +37,18 @@ namespace vk
heap_changed = 4, heap_changed = 4,
}; };
struct image_readback_options_t
{
bool swap_bytes = false;
struct
{
u64 offset = 0;
u64 length = 0;
operator bool() const { return length != 0; }
} sync_region {};
};
const vk::render_device *get_current_renderer(); const vk::render_device *get_current_renderer();
void set_current_renderer(const vk::render_device &device); void set_current_renderer(const vk::render_device &device);
@ -74,7 +86,7 @@ namespace vk
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags); VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags);
// Other texture management helpers // Other texture management helpers
void copy_image_to_buffer(const vk::command_buffer& cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes = false); void copy_image_to_buffer(const vk::command_buffer& cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, const image_readback_options_t& options = {});
void copy_buffer_to_image(const vk::command_buffer& cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region); void copy_buffer_to_image(const vk::command_buffer& cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region);
u64 calculate_working_buffer_size(u64 base_size, VkImageAspectFlags aspect); u64 calculate_working_buffer_size(u64 base_size, VkImageAspectFlags aspect);

View File

@ -566,11 +566,14 @@ namespace vk
} }
}; };
vk::copy_image_to_buffer(cmd, source, dest, region); // inject post-transfer barrier
vk::insert_buffer_memory_barrier(cmd, image_readback_options_t options{};
dest->value, src_offset_in_buffer, max_copy_length, options.sync_region =
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); .offset = src_offset_in_buffer,
.length = max_copy_length
};
vk::copy_image_to_buffer(cmd, source, dest, region, options);
if (dest != bo) if (dest != bo)
{ {

View File

@ -46,7 +46,12 @@ namespace vk
} }
} }
void copy_image_to_buffer(const vk::command_buffer& cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes) void copy_image_to_buffer(
const vk::command_buffer& cmd,
const vk::image* src,
const vk::buffer* dst,
const VkBufferImageCopy& region,
const image_readback_options_t& options)
{ {
// Always validate // Always validate
ensure(src->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src->current_layout == VK_IMAGE_LAYOUT_GENERAL); ensure(src->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src->current_layout == VK_IMAGE_LAYOUT_GENERAL);
@ -63,8 +68,17 @@ namespace vk
{ {
default: default:
{ {
ensure(!swap_bytes); // "Implicit byteswap option not supported for speficied format" ensure(!options.swap_bytes); // "Implicit byteswap option not supported for speficied format"
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, &region); vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, &region);
if (options.sync_region)
{
// Post-Transfer barrier
vk::insert_buffer_memory_barrier(cmd, dst->value,
options.sync_region.offset, options.sync_region.length,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
}
break; break;
} }
case VK_FORMAT_D32_SFLOAT: case VK_FORMAT_D32_SFLOAT:
@ -95,7 +109,7 @@ namespace vk
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
// 3. Do conversion with byteswap [D32->D16F] // 3. Do conversion with byteswap [D32->D16F]
if (!swap_bytes) [[likely]] if (!options.swap_bytes) [[likely]]
{ {
auto job = vk::get_compute_task<vk::cs_fconvert_task<f32, f16>>(); auto job = vk::get_compute_task<vk::cs_fconvert_task<f32, f16>>();
job->run(cmd, dst, z32_offset, packed32_length, data_offset); job->run(cmd, dst, z32_offset, packed32_length, data_offset);
@ -106,10 +120,18 @@ namespace vk
job->run(cmd, dst, z32_offset, packed32_length, data_offset); job->run(cmd, dst, z32_offset, packed32_length, data_offset);
} }
// 4. Post-compute barrier if (options.sync_region)
vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed16_length, {
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, u64 sync_end = options.sync_region.offset + options.sync_region.length;
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); u64 write_end = region.bufferOffset + packed16_length;
const u64 sync_offset = std::min<u64>(region.bufferOffset, options.sync_region.offset);
const u64 sync_length = std::max<u64>(sync_end, write_end) - sync_offset;
// 4. Post-compute barrier
vk::insert_buffer_memory_barrier(cmd, dst->value, sync_offset, sync_length,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
}
break; break;
} }
case VK_FORMAT_D24_UNORM_S8_UINT: case VK_FORMAT_D24_UNORM_S8_UINT:
@ -141,7 +163,7 @@ namespace vk
// 2. Interleave the separated data blocks with a compute job // 2. Interleave the separated data blocks with a compute job
vk::cs_interleave_task *job; vk::cs_interleave_task *job;
if (!swap_bytes) [[likely]] if (!options.swap_bytes) [[likely]]
{ {
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT) if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
{ {
@ -178,9 +200,17 @@ namespace vk
job->run(cmd, dst, data_offset, packed_length, z_offset, s_offset); job->run(cmd, dst, data_offset, packed_length, z_offset, s_offset);
vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length, if (options.sync_region)
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); u64 sync_end = options.sync_region.offset + options.sync_region.length;
u64 write_end = region.bufferOffset + packed_length;
const u64 sync_offset = std::min<u64>(region.bufferOffset, options.sync_region.offset);
const u64 sync_length = std::max<u64>(sync_end, write_end) - sync_offset;
vk::insert_buffer_memory_barrier(cmd, dst->value, sync_offset, sync_length,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
}
break; break;
} }
} }