mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-14 10:21:21 +00:00
vk: Implement copy-to-buffer and copy-from-buffer for depth_stencil
formats - Allows D24S8 and D32S8 transport via typeless channels - Allows uploading and downloading D24S8 data easily - TODO: Implement optional byteswapping to fix flushed readbacks with the same method
This commit is contained in:
parent
366e4c2422
commit
0f7af391d7
@ -1528,14 +1528,14 @@ namespace rsx
|
||||
{
|
||||
case CELL_GCM_TEXTURE_X16:
|
||||
{
|
||||
// NOP, a simple way to quickly read DEPTH16 data without shadow comparison
|
||||
// A simple way to quickly read DEPTH16 data without shadow comparison
|
||||
break;
|
||||
}
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_A4R4G4B4: //TODO
|
||||
case CELL_GCM_TEXTURE_R5G6B5: //TODO
|
||||
{
|
||||
// Reading depth data as XRGB8 is supported with in-shader conversion
|
||||
// TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that)
|
||||
u32 remap = tex.remap();
|
||||
result.redirected_textures |= (1 << i);
|
||||
result.texture_scale[i][2] = (f32&)remap;
|
||||
|
@ -10,6 +10,7 @@ namespace vk
|
||||
std::string m_src;
|
||||
vk::glsl::shader m_shader;
|
||||
std::unique_ptr<vk::glsl::program> m_program;
|
||||
std::unique_ptr<vk::buffer> m_param_buffer;
|
||||
|
||||
vk::descriptor_pool m_descriptor_pool;
|
||||
VkDescriptorSet m_descriptor_set = nullptr;
|
||||
@ -19,20 +20,22 @@ namespace vk
|
||||
|
||||
bool initialized = false;
|
||||
bool unroll_loops = true;
|
||||
bool uniform_inputs = false;
|
||||
u32 optimal_group_size = 1;
|
||||
u32 optimal_kernel_size = 1;
|
||||
|
||||
void init_descriptors()
|
||||
{
|
||||
VkDescriptorPoolSize descriptor_pool_sizes[1] =
|
||||
VkDescriptorPoolSize descriptor_pool_sizes[2] =
|
||||
{
|
||||
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS },
|
||||
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_MAX_COMPUTE_TASKS }
|
||||
};
|
||||
|
||||
//Reserve descriptor pools
|
||||
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 1);
|
||||
|
||||
std::vector<VkDescriptorSetLayoutBinding> bindings(1);
|
||||
std::vector<VkDescriptorSetLayoutBinding> bindings(2);
|
||||
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[0].descriptorCount = 1;
|
||||
@ -40,10 +43,16 @@ namespace vk
|
||||
bindings[0].binding = 0;
|
||||
bindings[0].pImmutableSamplers = nullptr;
|
||||
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[1].binding = 1;
|
||||
bindings[1].pImmutableSamplers = nullptr;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo infos = {};
|
||||
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
infos.pBindings = bindings.data();
|
||||
infos.bindingCount = (u32)bindings.size();
|
||||
infos.bindingCount = uniform_inputs? 2u : 1u;
|
||||
|
||||
CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));
|
||||
|
||||
@ -88,6 +97,7 @@ namespace vk
|
||||
{
|
||||
m_shader.destroy();
|
||||
m_program.reset();
|
||||
m_param_buffer.reset();
|
||||
|
||||
vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr);
|
||||
vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr);
|
||||
@ -162,11 +172,32 @@ namespace vk
|
||||
|
||||
struct cs_shuffle_base : compute_task
|
||||
{
|
||||
vk::buffer* m_data;
|
||||
const vk::buffer* m_data;
|
||||
u32 m_data_offset = 0;
|
||||
u32 m_data_length = 0;
|
||||
u32 kernel_size = 1;
|
||||
|
||||
std::string variables, work_kernel, loop_advance, suffix;
|
||||
|
||||
cs_shuffle_base()
|
||||
{
|
||||
work_kernel =
|
||||
{
|
||||
" value = data[index];\n"
|
||||
" data[index] = %f(value);\n"
|
||||
};
|
||||
|
||||
loop_advance =
|
||||
{
|
||||
" index++;\n"
|
||||
};
|
||||
|
||||
suffix =
|
||||
{
|
||||
"}\n"
|
||||
};
|
||||
}
|
||||
|
||||
void build(const char* function_name, u32 _kernel_size = 0)
|
||||
{
|
||||
// Initialize to allow detecting optimal settings
|
||||
@ -178,7 +209,8 @@ namespace vk
|
||||
{
|
||||
"#version 430\n"
|
||||
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
|
||||
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n\n"
|
||||
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n"
|
||||
"%ub"
|
||||
"\n"
|
||||
"#define KERNEL_SIZE %ks\n"
|
||||
"\n"
|
||||
@ -188,38 +220,27 @@ namespace vk
|
||||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||
"\n"
|
||||
"// Depth format conversions\n"
|
||||
"#define d24x8_to_f32(bits) floatBitsToUint(float(bits >> 8) / 16777214.f)\n"
|
||||
"#define d24_to_f32(bits) floatBitsToUint(float(bits) / 16777215.f)\n"
|
||||
"#define f32_to_d24(bits) uint(uintBitsToFloat(bits) * 16777215.f)\n"
|
||||
"#define d24x8_to_f32(bits) d24_to_f32(bits >> 8)\n"
|
||||
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
|
||||
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(uint(uintBitsToFloat(bits) * 16777214.f))\n"
|
||||
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(f32_to_d24(bits))\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
|
||||
" uint value;\n"
|
||||
" %vars"
|
||||
"\n"
|
||||
};
|
||||
|
||||
std::string work_kernel =
|
||||
{
|
||||
" value = data[index];\n"
|
||||
" data[index] = %f(value);\n"
|
||||
};
|
||||
|
||||
std::string loop_advance =
|
||||
{
|
||||
" index++;\n"
|
||||
};
|
||||
|
||||
const std::string suffix =
|
||||
{
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const std::pair<std::string, std::string> syntax_replace[] =
|
||||
{
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%ks", std::to_string(kernel_size) },
|
||||
{ "%f", function_name }
|
||||
{ "%vars", variables },
|
||||
{ "%f", function_name },
|
||||
{ "%ub", uniform_inputs? "layout(std140, set=0, binding=1) uniform ubo{ uvec4 params[16]; };\n" : "" },
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
@ -262,9 +283,29 @@ namespace vk
|
||||
void bind_resources() override
|
||||
{
|
||||
m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||
|
||||
if (uniform_inputs)
|
||||
{
|
||||
verify(HERE), m_param_buffer, m_param_buffer->value != VK_NULL_HANDLE;
|
||||
m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
void run(VkCommandBuffer cmd, vk::buffer* data, u32 data_length, u32 data_offset = 0)
|
||||
void set_parameters(VkCommandBuffer cmd, const u32* params, u8 count)
|
||||
{
|
||||
verify(HERE), uniform_inputs;
|
||||
|
||||
if (!m_param_buffer)
|
||||
{
|
||||
auto pdev = vk::get_current_renderer();
|
||||
m_param_buffer = std::make_unique<vk::buffer>(*pdev, 256, pdev->get_memory_mapping().host_visible_coherent,
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
}
|
||||
|
||||
vkCmdUpdateBuffer(cmd, m_param_buffer->value, 0, count * sizeof(u32), params);
|
||||
}
|
||||
|
||||
void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0)
|
||||
{
|
||||
m_data = data;
|
||||
m_data_offset = data_offset;
|
||||
@ -274,7 +315,7 @@ namespace vk
|
||||
const auto num_bytes_to_process = align(data_length, num_bytes_per_invocation);
|
||||
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
|
||||
|
||||
if (num_bytes_to_process > data->size())
|
||||
if ((num_bytes_to_process + data_offset) > data->size())
|
||||
{
|
||||
// Technically robust buffer access should keep the driver from crashing in OOB situations
|
||||
LOG_ERROR(RSX, "Inadequate buffer length submitted for a compute operation."
|
||||
@ -339,6 +380,134 @@ namespace vk
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: D24S8 layout has the stencil in the MSB! Its actually S8|D24|S8|D24 starting at offset 0
|
||||
struct cs_interleave_task : cs_shuffle_base
|
||||
{
|
||||
u32 m_ssbo_length = 0;
|
||||
|
||||
cs_interleave_task()
|
||||
{
|
||||
uniform_inputs = true;
|
||||
|
||||
variables =
|
||||
{
|
||||
" uint block_length = params[0].x >> 2;\n"
|
||||
" uint z_offset = params[0].y >> 2;\n"
|
||||
" uint s_offset = params[0].z >> 2;\n"
|
||||
" uint depth;\n"
|
||||
" uint stencil;\n"
|
||||
" uint stencil_shift;\n"
|
||||
" uint stencil_offset;\n"
|
||||
};
|
||||
}
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||
|
||||
if (uniform_inputs)
|
||||
{
|
||||
verify(HERE), m_param_buffer;
|
||||
m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset)
|
||||
{
|
||||
u32 parameters[3] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset };
|
||||
set_parameters(cmd, parameters, 3);
|
||||
|
||||
m_ssbo_length = stencil_offset + (data_length / 4) - data_offset;
|
||||
cs_shuffle_base::run(cmd, data, data_length, data_offset);
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_gather_d24x8 : cs_interleave_task
|
||||
{
|
||||
cs_gather_d24x8()
|
||||
{
|
||||
work_kernel =
|
||||
{
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" depth = data[index + z_offset] & 0x00FFFFFF;\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = data[stencil_offset + s_offset];\n"
|
||||
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||
" value = (depth << 8) | stencil;\n"
|
||||
" data[index] = value;\n"
|
||||
};
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_gather_d32x8 : cs_interleave_task
|
||||
{
|
||||
cs_gather_d32x8()
|
||||
{
|
||||
work_kernel =
|
||||
{
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" depth = f32_to_d24(data[index + z_offset]);\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = data[stencil_offset + s_offset];\n"
|
||||
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||
" value = (depth << 8) | stencil;\n"
|
||||
" data[index] = value;\n"
|
||||
};
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_scatter_d24x8 : cs_interleave_task
|
||||
{
|
||||
cs_scatter_d24x8()
|
||||
{
|
||||
work_kernel =
|
||||
{
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" value = data[index];\n"
|
||||
" data[index + z_offset] = (value >> 8);\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = (value & 0xFF) << stencil_shift;\n"
|
||||
" data[stencil_offset + s_offset] |= stencil;\n"
|
||||
};
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_scatter_d32x8 : cs_interleave_task
|
||||
{
|
||||
cs_scatter_d32x8()
|
||||
{
|
||||
work_kernel =
|
||||
{
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" value = data[index];\n"
|
||||
" data[index + z_offset] = d24_to_f32(value >> 8);\n"
|
||||
" stencil_offset = (index / 4);\n"
|
||||
" stencil_shift = (index % 4) * 8;\n"
|
||||
" stencil = (value & 0xFF) << stencil_shift;\n"
|
||||
" data[stencil_offset + s_offset] |= stencil;\n"
|
||||
};
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Replace with a proper manager
|
||||
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;
|
||||
|
||||
|
@ -146,6 +146,9 @@ namespace vk
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
|
||||
|
||||
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region);
|
||||
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region);
|
||||
|
||||
void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect,
|
||||
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
|
||||
VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF);
|
||||
|
@ -106,6 +106,127 @@ namespace vk
|
||||
fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format);
|
||||
}
|
||||
|
||||
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region)
|
||||
{
|
||||
switch (src->format())
|
||||
{
|
||||
default:
|
||||
{
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, ®ion);
|
||||
break;
|
||||
}
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
{
|
||||
verify(HERE), region.imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width;
|
||||
const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height;
|
||||
const u32 packed_length = out_w * out_h * 4;
|
||||
const u32 in_depth_size = packed_length;
|
||||
const u32 in_stencil_size = out_w * out_h;
|
||||
|
||||
const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
|
||||
verify(HERE), dst->size() >= allocation_end;
|
||||
|
||||
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
|
||||
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
|
||||
|
||||
// 1. Copy the depth and stencil blocks to separate banks
|
||||
VkBufferImageCopy sub_regions[2];
|
||||
sub_regions[0] = sub_regions[1] = region;
|
||||
sub_regions[0].bufferOffset = z_offset;
|
||||
sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
sub_regions[1].bufferOffset = s_offset;
|
||||
sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 2, sub_regions);
|
||||
|
||||
// 2. Interleave the separated data blocks with a compute job
|
||||
vk::cs_interleave_task *job;
|
||||
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d24x8>();
|
||||
}
|
||||
else
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_gather_d32x8>();
|
||||
}
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, dst->value, z_offset, in_depth_size + in_stencil_size,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
job->run(cmd, dst, (u32)region.bufferOffset, packed_length, z_offset, s_offset);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region)
|
||||
{
|
||||
switch (dst->format())
|
||||
{
|
||||
default:
|
||||
{
|
||||
vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 1, ®ion);
|
||||
break;
|
||||
}
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
{
|
||||
const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width;
|
||||
const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height;
|
||||
const u32 packed_length = out_w * out_h * 4;
|
||||
const u32 in_depth_size = packed_length;
|
||||
const u32 in_stencil_size = out_w * out_h;
|
||||
|
||||
const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
|
||||
verify(HERE), src->size() >= allocation_end;
|
||||
|
||||
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
|
||||
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
|
||||
|
||||
// Zero out the stencil block
|
||||
vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, src->value, s_offset, in_stencil_size,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||
|
||||
// 1. Scatter the interleaved data into separate depth and stencil blocks
|
||||
vk::cs_interleave_task *job;
|
||||
if (dst->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_scatter_d24x8>();
|
||||
}
|
||||
else
|
||||
{
|
||||
job = vk::get_compute_task<vk::cs_scatter_d32x8>();
|
||||
}
|
||||
|
||||
job->run(cmd, src, (u32)region.bufferOffset, packed_length, z_offset, s_offset);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, src->value, z_offset, in_depth_size + in_stencil_size,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
// 2. Copy the separated blocks into the target
|
||||
VkBufferImageCopy sub_regions[2];
|
||||
sub_regions[0] = sub_regions[1] = region;
|
||||
sub_regions[0].bufferOffset = z_offset;
|
||||
sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
sub_regions[1].bufferOffset = s_offset;
|
||||
sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 2, sub_regions);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_image_typeless(const vk::command_buffer& cmd, const vk::image* src, const vk::image* dst, const areai& src_rect, const areai& dst_rect,
|
||||
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask)
|
||||
{
|
||||
@ -138,7 +259,7 @@ namespace vk
|
||||
|
||||
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
|
||||
{
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, preferred_src_format, scratch_buf->value, 1, &src_copy);
|
||||
vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy);
|
||||
|
||||
const auto src_convert = get_format_convert_flags(src->info.format);
|
||||
const auto dst_convert = get_format_convert_flags(dst->info.format);
|
||||
@ -187,7 +308,7 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst->value, preferred_dst_format, 1, &dst_copy);
|
||||
vk::copy_buffer_to_image(cmd, scratch_buf, dst, dst_copy);
|
||||
|
||||
src_copy.imageSubresource.mipLevel++;
|
||||
dst_copy.imageSubresource.mipLevel++;
|
||||
@ -438,9 +559,6 @@ namespace vk
|
||||
u32 block_in_pixel = get_format_block_size_in_texel(format);
|
||||
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
|
||||
|
||||
//TODO: Depth and stencil transfer together
|
||||
flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
|
||||
for (const rsx_subresource_layout &layout : subresource_layout)
|
||||
{
|
||||
u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256);
|
||||
@ -449,29 +567,26 @@ namespace vk
|
||||
//Map with extra padding bytes in case of realignment
|
||||
size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8);
|
||||
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
|
||||
void *dst = mapped_buffer;
|
||||
VkBuffer buffer_handle = upload_heap.heap->value;
|
||||
|
||||
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
//Misalign intentionally to skip the first stencil byte in D24S8 data
|
||||
//Ensures the real depth data is dword aligned
|
||||
|
||||
//Skip leading dword when writing to texture
|
||||
offset_in_buffer += 4;
|
||||
dst = (char*)(mapped_buffer) + 4 - 1;
|
||||
}
|
||||
|
||||
gsl::span<gsl::byte> mapped{ (gsl::byte*)dst, ::narrow<int>(image_linear_size) };
|
||||
gsl::span<gsl::byte> mapped{ (gsl::byte*)mapped_buffer, ::narrow<int>(image_linear_size) };
|
||||
upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256);
|
||||
upload_heap.unmap();
|
||||
|
||||
if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
// Run GPU compute task to convert the D24x8 to FP32
|
||||
// NOTE: On commandbuffer submission, the HOST_WRITE to ALL_COMMANDS barrier is implicitly inserted according to spec
|
||||
// No need to add another explicit barrier unless a driver bug is found
|
||||
VkBufferImageCopy copy_info = {};
|
||||
copy_info.bufferOffset = offset_in_buffer;
|
||||
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
|
||||
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
|
||||
copy_info.imageExtent.depth = layout.depth;
|
||||
copy_info.imageSubresource.aspectMask = flags;
|
||||
copy_info.imageSubresource.layerCount = 1;
|
||||
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
|
||||
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
|
||||
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
||||
|
||||
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||||
dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
// Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead
|
||||
auto scratch_buf = vk::get_scratch_buffer();
|
||||
|
||||
@ -485,27 +600,14 @@ namespace vk
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
vk::get_compute_task<vk::cs_shuffle_d24x8_f32>()->run(cmd, upload_heap.heap.get(), image_linear_size, (u32)offset_in_buffer);
|
||||
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
buffer_handle = scratch_buf->value;
|
||||
offset_in_buffer = 0;
|
||||
copy_info.bufferOffset = 0;
|
||||
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, copy_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info);
|
||||
}
|
||||
|
||||
VkBufferImageCopy copy_info = {};
|
||||
copy_info.bufferOffset = offset_in_buffer;
|
||||
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
|
||||
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
|
||||
copy_info.imageExtent.depth = layout.depth;
|
||||
copy_info.imageSubresource.aspectMask = flags;
|
||||
copy_info.imageSubresource.layerCount = 1;
|
||||
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
|
||||
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
|
||||
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
||||
|
||||
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info);
|
||||
mipmap_level++;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user