From a1ab6c28c1d420f5eb66878e8b5b82fdb3b21572 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 29 Dec 2020 23:16:21 +0300 Subject: [PATCH] vk/rsx: Fix some more bugs --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 15 +++---- rpcs3/Emu/RSX/VK/VKTexture.cpp | 56 +++++++++++++++++++-------- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index f855e22357..83371433f8 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -735,15 +735,15 @@ namespace rsx { if (word_size == 1) { - if (caps.supports_zero_copy) + if (is_swizzled) + { + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + } + else if (caps.supports_zero_copy) { result.require_upload = true; result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } - else if (is_swizzled) - { - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - } else { copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); @@ -751,6 +751,9 @@ namespace rsx } else { + result.element_size = word_size; + result.block_length = words_per_block; + bool require_cpu_swizzle = !caps.supports_hw_deswizzle; bool require_cpu_byteswap = !caps.supports_byteswap; @@ -768,9 +771,7 @@ namespace rsx if (!require_cpu_byteswap && !require_cpu_swizzle) { - result.require_deswizzle = is_swizzled; result.require_swap = true; - result.element_size = word_size; if (caps.supports_zero_copy) { diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index aa2239fe88..d748b39bec 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -809,7 +809,7 @@ namespace vk u32 block_in_pixel = rsx::get_format_block_size_in_texel(format); u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - rsx::texture_uploader_capabilities caps{ true, false, true, true, heap_align }; + rsx::texture_uploader_capabilities caps{ .alignment = heap_align }; rsx::texture_memory_info opt{}; bool check_caps = true; @@ -817,13 +817,14 @@ namespace vk u32 scratch_offset = 0; u32 row_pitch, image_linear_size; + vk::buffer* upload_buffer = nullptr; + usz offset_in_upload_buffer = 0; + std::vector copy_regions; std::vector buffer_copies; + std::vector> upload_commands; copy_regions.reserve(subresource_layout.size()); - VkBuffer read_buffer = upload_heap.heap->value; - VkDeviceSize offset_in_read_buffer = 0; - if (vk::is_renderpass_open(cmd)) { vk::end_renderpass(cmd); @@ -854,14 +855,16 @@ namespace vk image_linear_size = row_pitch * layout.height_in_block * layout.depth; // Map with extra padding bytes in case of realignment - usz offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8); - void* mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8); + offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8); + void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8); // Only do GPU-side conversion if occupancy is good if (check_caps) { caps.supports_byteswap = (image_linear_size >= 1024); caps.supports_hw_deswizzle = caps.supports_byteswap; + caps.supports_zero_copy = caps.supports_byteswap; + caps.supports_vtc_decoding = false; check_caps = false; } @@ -871,7 +874,7 @@ namespace vk copy_regions.push_back({}); auto& copy_info = copy_regions.back(); - copy_info.bufferOffset = offset_in_buffer; + copy_info.bufferOffset = offset_in_upload_buffer; copy_info.imageExtent.height = layout.height_in_texel; copy_info.imageExtent.width = layout.width_in_texel; copy_info.imageExtent.depth = layout.depth; @@ -881,6 +884,8 @@ namespace vk copy_info.imageSubresource.mipLevel = layout.level; copy_info.bufferRowLength = std::max(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel); + upload_buffer = upload_heap.heap.get(); + if (opt.require_upload) { ensure(!opt.deferred_cmds.empty()); @@ -900,12 +905,11 @@ namespace vk } auto dma_mapping = vk::map_dma(cmd, static_cast(src_address), static_cast(data_length)); - vk::load_dma(src_address, data_length); + vk::load_dma(::narrow(src_address), data_length); - read_buffer = dma_mapping.second->value; - offset_in_read_buffer = dma_mapping.first; - - copy_info.bufferOffset = offset_in_read_buffer; + upload_buffer = dma_mapping.second; + offset_in_upload_buffer = dma_mapping.first; + copy_info.bufferOffset = offset_in_upload_buffer; } if (opt.require_swap || opt.require_deswizzle || requires_depth_processing) @@ -929,7 +933,7 @@ namespace vk { buffer_copies.push_back({}); auto& copy = buffer_copies.back(); - copy.srcOffset = uptr(copy_cmd.dst) + offset_in_read_buffer; + copy.srcOffset = uptr(copy_cmd.dst) + offset_in_upload_buffer; copy.dstOffset = scratch_offset; copy.size = copy_cmd.length; } @@ -938,7 +942,7 @@ namespace vk { buffer_copies.push_back({}); auto& copy = buffer_copies.back(); - copy.srcOffset = offset_in_buffer; + copy.srcOffset = offset_in_upload_buffer; copy.dstOffset = scratch_offset; copy.size = image_linear_size; } @@ -951,15 +955,26 @@ namespace vk } else if (opt.require_upload) { + if (upload_commands.empty() || upload_buffer->value != upload_commands.back().first) + { + upload_commands.emplace_back(upload_buffer->value, 1); + } + else + { + upload_commands.back().second++; + } + copy_info.bufferRowLength = std::max(block_in_pixel * layout.pitch_in_block, layout.width_in_texel); } } + ensure(upload_buffer); + if (opt.require_swap || opt.require_deswizzle || requires_depth_processing) { ensure(scratch_buf); - vkCmdCopyBuffer(cmd, read_buffer, scratch_buf->value, static_cast(buffer_copies.size()), buffer_copies.data()); + vkCmdCopyBuffer(cmd, upload_buffer->value, scratch_buf->value, static_cast(buffer_copies.size()), buffer_copies.data()); insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); @@ -1005,9 +1020,18 @@ namespace vk vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); } + else if (opt.require_upload) + { + auto region_ptr = copy_regions.data(); + for (const auto& op : upload_commands) + { + vkCmdCopyBufferToImage(cmd, op.first, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, op.second, region_ptr); + region_ptr += op.second; + } + } else { - vkCmdCopyBufferToImage(cmd, read_buffer, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); + vkCmdCopyBufferToImage(cmd, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copy_regions.size()), copy_regions.data()); } }