diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 43135ea53d..3b87ba46da 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -37,6 +37,18 @@ namespace vk heap_changed = 4, }; + struct image_readback_options_t + { + bool swap_bytes = false; + struct + { + u64 offset = 0; + u64 length = 0; + + operator bool() const { return length != 0; } + } sync_region {}; + }; + const vk::render_device *get_current_renderer(); void set_current_renderer(const vk::render_device &device); @@ -74,7 +86,7 @@ namespace vk VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags); // Other texture management helpers - void copy_image_to_buffer(const vk::command_buffer& cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes = false); + void copy_image_to_buffer(const vk::command_buffer& cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, const image_readback_options_t& options = {}); void copy_buffer_to_image(const vk::command_buffer& cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region); u64 calculate_working_buffer_size(u64 base_size, VkImageAspectFlags aspect); diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index e1dfe0142d..8e5598f040 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -566,11 +566,14 @@ namespace vk } }; - vk::copy_image_to_buffer(cmd, source, dest, region); - vk::insert_buffer_memory_barrier(cmd, - dest->value, src_offset_in_buffer, max_copy_length, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + // inject post-transfer barrier + image_readback_options_t options{}; + options.sync_region = + { + .offset = src_offset_in_buffer, + .length = max_copy_length + }; + vk::copy_image_to_buffer(cmd, source, dest, region, options); if (dest != bo) { diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 555c0eb567..349697ffd7 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -46,7 +46,12 @@ namespace vk } } - void copy_image_to_buffer(const vk::command_buffer& cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region, bool swap_bytes) + void copy_image_to_buffer( + const vk::command_buffer& cmd, + const vk::image* src, + const vk::buffer* dst, + const VkBufferImageCopy& region, + const image_readback_options_t& options) { // Always validate ensure(src->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src->current_layout == VK_IMAGE_LAYOUT_GENERAL); @@ -63,8 +68,17 @@ namespace vk { default: { - ensure(!swap_bytes); // "Implicit byteswap option not supported for speficied format" + ensure(!options.swap_bytes); // "Implicit byteswap option not supported for speficied format" vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, ®ion); + + if (options.sync_region) + { + // Post-Transfer barrier + vk::insert_buffer_memory_barrier(cmd, dst->value, + options.sync_region.offset, options.sync_region.length, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + } break; } case VK_FORMAT_D32_SFLOAT: @@ -95,7 +109,7 @@ namespace vk VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); // 3. Do conversion with byteswap [D32->D16F] - if (!swap_bytes) [[likely]] + if (!options.swap_bytes) [[likely]] { auto job = vk::get_compute_task>(); job->run(cmd, dst, z32_offset, packed32_length, data_offset); @@ -106,10 +120,18 @@ namespace vk job->run(cmd, dst, z32_offset, packed32_length, data_offset); } - // 4. Post-compute barrier - vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed16_length, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + if (options.sync_region) + { + u64 sync_end = options.sync_region.offset + options.sync_region.length; + u64 write_end = region.bufferOffset + packed16_length; + const u64 sync_offset = std::min(region.bufferOffset, options.sync_region.offset); + const u64 sync_length = std::max(sync_end, write_end) - sync_offset; + + // 4. Post-compute barrier + vk::insert_buffer_memory_barrier(cmd, dst->value, sync_offset, sync_length, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + } break; } case VK_FORMAT_D24_UNORM_S8_UINT: @@ -141,7 +163,7 @@ namespace vk // 2. Interleave the separated data blocks with a compute job vk::cs_interleave_task *job; - if (!swap_bytes) [[likely]] + if (!options.swap_bytes) [[likely]] { if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT) { @@ -178,9 +200,17 @@ namespace vk job->run(cmd, dst, data_offset, packed_length, z_offset, s_offset); - vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + if (options.sync_region) + { + u64 sync_end = options.sync_region.offset + options.sync_region.length; + u64 write_end = region.bufferOffset + packed_length; + const u64 sync_offset = std::min(region.bufferOffset, options.sync_region.offset); + const u64 sync_length = std::max(sync_end, write_end) - sync_offset; + + vk::insert_buffer_memory_barrier(cmd, dst->value, sync_offset, sync_length, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + } break; } }