vk/rsx: Fix some more bugs

This commit is contained in:
kd-11 2020-12-29 23:16:21 +03:00 committed by kd-11
parent 59e46f09b7
commit a1ab6c28c1
2 changed files with 48 additions and 23 deletions

View File

@ -735,15 +735,15 @@ namespace rsx
{ {
if (word_size == 1) if (word_size == 1)
{ {
if (caps.supports_zero_copy) if (is_swizzled)
{
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
}
else if (caps.supports_zero_copy)
{ {
result.require_upload = true; result.require_upload = true;
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
} }
else if (is_swizzled)
{
copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
}
else else
{ {
copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); copy_unmodified_block::copy_mipmap_level(as_span_workaround<u8>(dst_buffer), as_const_span<const u8>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
@ -751,6 +751,9 @@ namespace rsx
} }
else else
{ {
result.element_size = word_size;
result.block_length = words_per_block;
bool require_cpu_swizzle = !caps.supports_hw_deswizzle; bool require_cpu_swizzle = !caps.supports_hw_deswizzle;
bool require_cpu_byteswap = !caps.supports_byteswap; bool require_cpu_byteswap = !caps.supports_byteswap;
@ -768,9 +771,7 @@ namespace rsx
if (!require_cpu_byteswap && !require_cpu_swizzle) if (!require_cpu_byteswap && !require_cpu_swizzle)
{ {
result.require_deswizzle = is_swizzled;
result.require_swap = true; result.require_swap = true;
result.element_size = word_size;
if (caps.supports_zero_copy) if (caps.supports_zero_copy)
{ {

View File

@ -809,7 +809,7 @@ namespace vk
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format); u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
rsx::texture_uploader_capabilities caps{ true, false, true, true, heap_align }; rsx::texture_uploader_capabilities caps{ .alignment = heap_align };
rsx::texture_memory_info opt{}; rsx::texture_memory_info opt{};
bool check_caps = true; bool check_caps = true;
@ -817,13 +817,14 @@ namespace vk
u32 scratch_offset = 0; u32 scratch_offset = 0;
u32 row_pitch, image_linear_size; u32 row_pitch, image_linear_size;
vk::buffer* upload_buffer = nullptr;
usz offset_in_upload_buffer = 0;
std::vector<VkBufferImageCopy> copy_regions; std::vector<VkBufferImageCopy> copy_regions;
std::vector<VkBufferCopy> buffer_copies; std::vector<VkBufferCopy> buffer_copies;
std::vector<std::pair<VkBuffer, u32>> upload_commands;
copy_regions.reserve(subresource_layout.size()); copy_regions.reserve(subresource_layout.size());
VkBuffer read_buffer = upload_heap.heap->value;
VkDeviceSize offset_in_read_buffer = 0;
if (vk::is_renderpass_open(cmd)) if (vk::is_renderpass_open(cmd))
{ {
vk::end_renderpass(cmd); vk::end_renderpass(cmd);
@ -854,14 +855,16 @@ namespace vk
image_linear_size = row_pitch * layout.height_in_block * layout.depth; image_linear_size = row_pitch * layout.height_in_block * layout.depth;
// Map with extra padding bytes in case of realignment // Map with extra padding bytes in case of realignment
usz offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8); offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8);
void* mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8); void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8);
// Only do GPU-side conversion if occupancy is good // Only do GPU-side conversion if occupancy is good
if (check_caps) if (check_caps)
{ {
caps.supports_byteswap = (image_linear_size >= 1024); caps.supports_byteswap = (image_linear_size >= 1024);
caps.supports_hw_deswizzle = caps.supports_byteswap; caps.supports_hw_deswizzle = caps.supports_byteswap;
caps.supports_zero_copy = caps.supports_byteswap;
caps.supports_vtc_decoding = false;
check_caps = false; check_caps = false;
} }
@ -871,7 +874,7 @@ namespace vk
copy_regions.push_back({}); copy_regions.push_back({});
auto& copy_info = copy_regions.back(); auto& copy_info = copy_regions.back();
copy_info.bufferOffset = offset_in_buffer; copy_info.bufferOffset = offset_in_upload_buffer;
copy_info.imageExtent.height = layout.height_in_texel; copy_info.imageExtent.height = layout.height_in_texel;
copy_info.imageExtent.width = layout.width_in_texel; copy_info.imageExtent.width = layout.width_in_texel;
copy_info.imageExtent.depth = layout.depth; copy_info.imageExtent.depth = layout.depth;
@ -881,6 +884,8 @@ namespace vk
copy_info.imageSubresource.mipLevel = layout.level; copy_info.imageSubresource.mipLevel = layout.level;
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel); copy_info.bufferRowLength = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
upload_buffer = upload_heap.heap.get();
if (opt.require_upload) if (opt.require_upload)
{ {
ensure(!opt.deferred_cmds.empty()); ensure(!opt.deferred_cmds.empty());
@ -900,12 +905,11 @@ namespace vk
} }
auto dma_mapping = vk::map_dma(cmd, static_cast<u32>(src_address), static_cast<u32>(data_length)); auto dma_mapping = vk::map_dma(cmd, static_cast<u32>(src_address), static_cast<u32>(data_length));
vk::load_dma(src_address, data_length); vk::load_dma(::narrow<u32>(src_address), data_length);
read_buffer = dma_mapping.second->value; upload_buffer = dma_mapping.second;
offset_in_read_buffer = dma_mapping.first; offset_in_upload_buffer = dma_mapping.first;
copy_info.bufferOffset = offset_in_upload_buffer;
copy_info.bufferOffset = offset_in_read_buffer;
} }
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing) if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
@ -929,7 +933,7 @@ namespace vk
{ {
buffer_copies.push_back({}); buffer_copies.push_back({});
auto& copy = buffer_copies.back(); auto& copy = buffer_copies.back();
copy.srcOffset = uptr(copy_cmd.dst) + offset_in_read_buffer; copy.srcOffset = uptr(copy_cmd.dst) + offset_in_upload_buffer;
copy.dstOffset = scratch_offset; copy.dstOffset = scratch_offset;
copy.size = copy_cmd.length; copy.size = copy_cmd.length;
} }
@ -938,7 +942,7 @@ namespace vk
{ {
buffer_copies.push_back({}); buffer_copies.push_back({});
auto& copy = buffer_copies.back(); auto& copy = buffer_copies.back();
copy.srcOffset = offset_in_buffer; copy.srcOffset = offset_in_upload_buffer;
copy.dstOffset = scratch_offset; copy.dstOffset = scratch_offset;
copy.size = image_linear_size; copy.size = image_linear_size;
} }
@ -951,15 +955,26 @@ namespace vk
} }
else if (opt.require_upload) else if (opt.require_upload)
{ {
if (upload_commands.empty() || upload_buffer->value != upload_commands.back().first)
{
upload_commands.emplace_back(upload_buffer->value, 1);
}
else
{
upload_commands.back().second++;
}
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * layout.pitch_in_block, layout.width_in_texel); copy_info.bufferRowLength = std::max<u32>(block_in_pixel * layout.pitch_in_block, layout.width_in_texel);
} }
} }
ensure(upload_buffer);
if (opt.require_swap || opt.require_deswizzle || requires_depth_processing) if (opt.require_swap || opt.require_deswizzle || requires_depth_processing)
{ {
ensure(scratch_buf); ensure(scratch_buf);
vkCmdCopyBuffer(cmd, read_buffer, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data()); vkCmdCopyBuffer(cmd, upload_buffer->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, scratch_offset, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
@ -1005,9 +1020,18 @@ namespace vk
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data()); vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
} }
else if (opt.require_upload)
{
auto region_ptr = copy_regions.data();
for (const auto& op : upload_commands)
{
vkCmdCopyBufferToImage(cmd, op.first, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, op.second, region_ptr);
region_ptr += op.second;
}
}
else else
{ {
vkCmdCopyBufferToImage(cmd, read_buffer, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data()); vkCmdCopyBufferToImage(cmd, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
} }
} }