vk: Implement copy-to-buffer and copy-from-buffer for depth_stencil

formats
- Allows D24S8 and D32S8 transport via typeless channels
- Allows uploading and downloading D24S8 data easily
- TODO: Implement optional byteswapping to fix flushed readbacks with
the same method
This commit is contained in:
kd-11 2019-04-02 15:16:52 +03:00 committed by kd-11
parent 366e4c2422
commit 0f7af391d7
4 changed files with 344 additions and 70 deletions

View File

@ -1528,14 +1528,14 @@ namespace rsx
{
case CELL_GCM_TEXTURE_X16:
{
// NOP, a simple way to quickly read DEPTH16 data without shadow comparison
// A simple way to quickly read DEPTH16 data without shadow comparison
break;
}
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_A4R4G4B4: //TODO
case CELL_GCM_TEXTURE_R5G6B5: //TODO
{
// Reading depth data as XRGB8 is supported with in-shader conversion
// TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that)
u32 remap = tex.remap();
result.redirected_textures |= (1 << i);
result.texture_scale[i][2] = (f32&)remap;

View File

@ -10,6 +10,7 @@ namespace vk
std::string m_src;
vk::glsl::shader m_shader;
std::unique_ptr<vk::glsl::program> m_program;
std::unique_ptr<vk::buffer> m_param_buffer;
vk::descriptor_pool m_descriptor_pool;
VkDescriptorSet m_descriptor_set = nullptr;
@ -19,20 +20,22 @@ namespace vk
bool initialized = false;
bool unroll_loops = true;
bool uniform_inputs = false;
u32 optimal_group_size = 1;
u32 optimal_kernel_size = 1;
void init_descriptors()
{
VkDescriptorPoolSize descriptor_pool_sizes[1] =
VkDescriptorPoolSize descriptor_pool_sizes[2] =
{
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS },
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_MAX_COMPUTE_TASKS }
};
//Reserve descriptor pools
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 1);
std::vector<VkDescriptorSetLayoutBinding> bindings(1);
std::vector<VkDescriptorSetLayoutBinding> bindings(2);
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[0].descriptorCount = 1;
@ -40,10 +43,16 @@ namespace vk
bindings[0].binding = 0;
bindings[0].pImmutableSamplers = nullptr;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[1].binding = 1;
bindings[1].pImmutableSamplers = nullptr;
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings.data();
infos.bindingCount = (u32)bindings.size();
infos.bindingCount = uniform_inputs? 2u : 1u;
CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));
@ -88,6 +97,7 @@ namespace vk
{
m_shader.destroy();
m_program.reset();
m_param_buffer.reset();
vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr);
vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr);
@ -162,11 +172,32 @@ namespace vk
struct cs_shuffle_base : compute_task
{
vk::buffer* m_data;
const vk::buffer* m_data;
u32 m_data_offset = 0;
u32 m_data_length = 0;
u32 kernel_size = 1;
std::string variables, work_kernel, loop_advance, suffix;
cs_shuffle_base()
{
work_kernel =
{
" value = data[index];\n"
" data[index] = %f(value);\n"
};
loop_advance =
{
" index++;\n"
};
suffix =
{
"}\n"
};
}
void build(const char* function_name, u32 _kernel_size = 0)
{
// Initialize to allow detecting optimal settings
@ -178,7 +209,8 @@ namespace vk
{
"#version 430\n"
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n\n"
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n"
"%ub"
"\n"
"#define KERNEL_SIZE %ks\n"
"\n"
@ -188,38 +220,27 @@ namespace vk
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
"\n"
"// Depth format conversions\n"
"#define d24x8_to_f32(bits) floatBitsToUint(float(bits >> 8) / 16777214.f)\n"
"#define d24_to_f32(bits) floatBitsToUint(float(bits) / 16777215.f)\n"
"#define f32_to_d24(bits) uint(uintBitsToFloat(bits) * 16777215.f)\n"
"#define d24x8_to_f32(bits) d24_to_f32(bits >> 8)\n"
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(uint(uintBitsToFloat(bits) * 16777214.f))\n"
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(f32_to_d24(bits))\n"
"\n"
"void main()\n"
"{\n"
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
" uint value;\n"
" %vars"
"\n"
};
std::string work_kernel =
{
" value = data[index];\n"
" data[index] = %f(value);\n"
};
std::string loop_advance =
{
" index++;\n"
};
const std::string suffix =
{
"}\n"
};
const std::pair<std::string, std::string> syntax_replace[] =
{
{ "%ws", std::to_string(optimal_group_size) },
{ "%ks", std::to_string(kernel_size) },
{ "%f", function_name }
{ "%vars", variables },
{ "%f", function_name },
{ "%ub", uniform_inputs? "layout(std140, set=0, binding=1) uniform ubo{ uvec4 params[16]; };\n" : "" },
};
m_src = fmt::replace_all(m_src, syntax_replace);
@ -262,9 +283,29 @@ namespace vk
void bind_resources() override
{
m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
if (uniform_inputs)
{
verify(HERE), m_param_buffer, m_param_buffer->value != VK_NULL_HANDLE;
m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
}
}
void run(VkCommandBuffer cmd, vk::buffer* data, u32 data_length, u32 data_offset = 0)
void set_parameters(VkCommandBuffer cmd, const u32* params, u8 count)
{
verify(HERE), uniform_inputs;
if (!m_param_buffer)
{
auto pdev = vk::get_current_renderer();
m_param_buffer = std::make_unique<vk::buffer>(*pdev, 256, pdev->get_memory_mapping().host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
}
vkCmdUpdateBuffer(cmd, m_param_buffer->value, 0, count * sizeof(u32), params);
}
void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0)
{
m_data = data;
m_data_offset = data_offset;
@ -274,7 +315,7 @@ namespace vk
const auto num_bytes_to_process = align(data_length, num_bytes_per_invocation);
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
if (num_bytes_to_process > data->size())
if ((num_bytes_to_process + data_offset) > data->size())
{
// Technically robust buffer access should keep the driver from crashing in OOB situations
LOG_ERROR(RSX, "Inadequate buffer length submitted for a compute operation."
@ -339,6 +380,134 @@ namespace vk
}
};
// NOTE: D24S8 layout has the stencil in the MSB! Its actually S8|D24|S8|D24 starting at offset 0
struct cs_interleave_task : cs_shuffle_base
{
u32 m_ssbo_length = 0;
cs_interleave_task()
{
uniform_inputs = true;
variables =
{
" uint block_length = params[0].x >> 2;\n"
" uint z_offset = params[0].y >> 2;\n"
" uint s_offset = params[0].z >> 2;\n"
" uint depth;\n"
" uint stencil;\n"
" uint stencil_shift;\n"
" uint stencil_offset;\n"
};
}
void bind_resources() override
{
m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
if (uniform_inputs)
{
verify(HERE), m_param_buffer;
m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
}
}
void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset)
{
u32 parameters[3] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset };
set_parameters(cmd, parameters, 3);
m_ssbo_length = stencil_offset + (data_length / 4) - data_offset;
cs_shuffle_base::run(cmd, data, data_length, data_offset);
}
};
struct cs_gather_d24x8 : cs_interleave_task
{
cs_gather_d24x8()
{
work_kernel =
{
" if (index >= block_length)\n"
" return;\n"
"\n"
" depth = data[index + z_offset] & 0x00FFFFFF;\n"
" stencil_offset = (index / 4);\n"
" stencil_shift = (index % 4) * 8;\n"
" stencil = data[stencil_offset + s_offset];\n"
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
" value = (depth << 8) | stencil;\n"
" data[index] = value;\n"
};
cs_shuffle_base::build("");
}
};
struct cs_gather_d32x8 : cs_interleave_task
{
cs_gather_d32x8()
{
work_kernel =
{
" if (index >= block_length)\n"
" return;\n"
"\n"
" depth = f32_to_d24(data[index + z_offset]);\n"
" stencil_offset = (index / 4);\n"
" stencil_shift = (index % 4) * 8;\n"
" stencil = data[stencil_offset + s_offset];\n"
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
" value = (depth << 8) | stencil;\n"
" data[index] = value;\n"
};
cs_shuffle_base::build("");
}
};
struct cs_scatter_d24x8 : cs_interleave_task
{
cs_scatter_d24x8()
{
work_kernel =
{
" if (index >= block_length)\n"
" return;\n"
"\n"
" value = data[index];\n"
" data[index + z_offset] = (value >> 8);\n"
" stencil_offset = (index / 4);\n"
" stencil_shift = (index % 4) * 8;\n"
" stencil = (value & 0xFF) << stencil_shift;\n"
" data[stencil_offset + s_offset] |= stencil;\n"
};
cs_shuffle_base::build("");
}
};
struct cs_scatter_d32x8 : cs_interleave_task
{
cs_scatter_d32x8()
{
work_kernel =
{
" if (index >= block_length)\n"
" return;\n"
"\n"
" value = data[index];\n"
" data[index + z_offset] = d24_to_f32(value >> 8);\n"
" stencil_offset = (index / 4);\n"
" stencil_shift = (index % 4) * 8;\n"
" stencil = (value & 0xFF) << stencil_shift;\n"
" data[stencil_offset + s_offset] |= stencil;\n"
};
cs_shuffle_base::build("");
}
};
// TODO: Replace with a proper manager
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;

View File

@ -146,6 +146,9 @@ namespace vk
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region);
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region);
void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect,
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF);

View File

@ -106,6 +106,127 @@ namespace vk
fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format);
}
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region)
{
switch (src->format())
{
default:
{
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, &region);
break;
}
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
{
verify(HERE), region.imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width;
const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height;
const u32 packed_length = out_w * out_h * 4;
const u32 in_depth_size = packed_length;
const u32 in_stencil_size = out_w * out_h;
const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
verify(HERE), dst->size() >= allocation_end;
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
// 1. Copy the depth and stencil blocks to separate banks
VkBufferImageCopy sub_regions[2];
sub_regions[0] = sub_regions[1] = region;
sub_regions[0].bufferOffset = z_offset;
sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
sub_regions[1].bufferOffset = s_offset;
sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 2, sub_regions);
// 2. Interleave the separated data blocks with a compute job
vk::cs_interleave_task *job;
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
{
job = vk::get_compute_task<vk::cs_gather_d24x8>();
}
else
{
job = vk::get_compute_task<vk::cs_gather_d32x8>();
}
vk::insert_buffer_memory_barrier(cmd, dst->value, z_offset, in_depth_size + in_stencil_size,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
job->run(cmd, dst, (u32)region.bufferOffset, packed_length, z_offset, s_offset);
vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
break;
}
}
}
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region)
{
switch (dst->format())
{
default:
{
vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 1, &region);
break;
}
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
{
const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width;
const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height;
const u32 packed_length = out_w * out_h * 4;
const u32 in_depth_size = packed_length;
const u32 in_stencil_size = out_w * out_h;
const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
verify(HERE), src->size() >= allocation_end;
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
// Zero out the stencil block
vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0);
vk::insert_buffer_memory_barrier(cmd, src->value, s_offset, in_stencil_size,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
// 1. Scatter the interleaved data into separate depth and stencil blocks
vk::cs_interleave_task *job;
if (dst->format() == VK_FORMAT_D24_UNORM_S8_UINT)
{
job = vk::get_compute_task<vk::cs_scatter_d24x8>();
}
else
{
job = vk::get_compute_task<vk::cs_scatter_d32x8>();
}
job->run(cmd, src, (u32)region.bufferOffset, packed_length, z_offset, s_offset);
vk::insert_buffer_memory_barrier(cmd, src->value, z_offset, in_depth_size + in_stencil_size,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
// 2. Copy the separated blocks into the target
VkBufferImageCopy sub_regions[2];
sub_regions[0] = sub_regions[1] = region;
sub_regions[0].bufferOffset = z_offset;
sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
sub_regions[1].bufferOffset = s_offset;
sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 2, sub_regions);
break;
}
}
}
void copy_image_typeless(const vk::command_buffer& cmd, const vk::image* src, const vk::image* dst, const areai& src_rect, const areai& dst_rect,
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask)
{
@ -138,7 +259,7 @@ namespace vk
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
{
vkCmdCopyImageToBuffer(cmd, src->value, preferred_src_format, scratch_buf->value, 1, &src_copy);
vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy);
const auto src_convert = get_format_convert_flags(src->info.format);
const auto dst_convert = get_format_convert_flags(dst->info.format);
@ -187,7 +308,7 @@ namespace vk
}
}
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst->value, preferred_dst_format, 1, &dst_copy);
vk::copy_buffer_to_image(cmd, scratch_buf, dst, dst_copy);
src_copy.imageSubresource.mipLevel++;
dst_copy.imageSubresource.mipLevel++;
@ -438,9 +559,6 @@ namespace vk
u32 block_in_pixel = get_format_block_size_in_texel(format);
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
//TODO: Depth and stencil transfer together
flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT);
for (const rsx_subresource_layout &layout : subresource_layout)
{
u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256);
@ -449,29 +567,26 @@ namespace vk
//Map with extra padding bytes in case of realignment
size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8);
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
void *dst = mapped_buffer;
VkBuffer buffer_handle = upload_heap.heap->value;
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT)
{
//Misalign intentionally to skip the first stencil byte in D24S8 data
//Ensures the real depth data is dword aligned
//Skip leading dword when writing to texture
offset_in_buffer += 4;
dst = (char*)(mapped_buffer) + 4 - 1;
}
gsl::span<gsl::byte> mapped{ (gsl::byte*)dst, ::narrow<int>(image_linear_size) };
gsl::span<gsl::byte> mapped{ (gsl::byte*)mapped_buffer, ::narrow<int>(image_linear_size) };
upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256);
upload_heap.unmap();
if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
{
// Run GPU compute task to convert the D24x8 to FP32
// NOTE: On commandbuffer submission, the HOST_WRITE to ALL_COMMANDS barrier is implicitly inserted according to spec
// No need to add another explicit barrier unless a driver bug is found
VkBufferImageCopy copy_info = {};
copy_info.bufferOffset = offset_in_buffer;
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
copy_info.imageExtent.depth = layout.depth;
copy_info.imageSubresource.aspectMask = flags;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT ||
dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
{
// Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead
auto scratch_buf = vk::get_scratch_buffer();
@ -485,27 +600,14 @@ namespace vk
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::get_compute_task<vk::cs_shuffle_d24x8_f32>()->run(cmd, upload_heap.heap.get(), image_linear_size, (u32)offset_in_buffer);
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
buffer_handle = scratch_buf->value;
offset_in_buffer = 0;
copy_info.bufferOffset = 0;
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, copy_info);
}
else
{
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);
}
VkBufferImageCopy copy_info = {};
copy_info.bufferOffset = offset_in_buffer;
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
copy_info.imageExtent.depth = layout.depth;
copy_info.imageSubresource.aspectMask = flags;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);
mipmap_level++;
}
}