mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-29 22:20:48 +00:00
vk: Implement copy-to-buffer and copy-from-buffer for depth_stencil
formats - Allows D24S8 and D32S8 transport via typeless channels - Allows uploading and downloading D24S8 data easily - TODO: Implement optional byteswapping to fix flushed readbacks with the same method
This commit is contained in:
parent
366e4c2422
commit
0f7af391d7
@ -1528,14 +1528,14 @@ namespace rsx
|
|||||||
{
|
{
|
||||||
case CELL_GCM_TEXTURE_X16:
|
case CELL_GCM_TEXTURE_X16:
|
||||||
{
|
{
|
||||||
// NOP, a simple way to quickly read DEPTH16 data without shadow comparison
|
// A simple way to quickly read DEPTH16 data without shadow comparison
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||||
case CELL_GCM_TEXTURE_D8R8G8B8:
|
case CELL_GCM_TEXTURE_D8R8G8B8:
|
||||||
case CELL_GCM_TEXTURE_A4R4G4B4: //TODO
|
|
||||||
case CELL_GCM_TEXTURE_R5G6B5: //TODO
|
|
||||||
{
|
{
|
||||||
|
// Reading depth data as XRGB8 is supported with in-shader conversion
|
||||||
|
// TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that)
|
||||||
u32 remap = tex.remap();
|
u32 remap = tex.remap();
|
||||||
result.redirected_textures |= (1 << i);
|
result.redirected_textures |= (1 << i);
|
||||||
result.texture_scale[i][2] = (f32&)remap;
|
result.texture_scale[i][2] = (f32&)remap;
|
||||||
|
@ -10,6 +10,7 @@ namespace vk
|
|||||||
std::string m_src;
|
std::string m_src;
|
||||||
vk::glsl::shader m_shader;
|
vk::glsl::shader m_shader;
|
||||||
std::unique_ptr<vk::glsl::program> m_program;
|
std::unique_ptr<vk::glsl::program> m_program;
|
||||||
|
std::unique_ptr<vk::buffer> m_param_buffer;
|
||||||
|
|
||||||
vk::descriptor_pool m_descriptor_pool;
|
vk::descriptor_pool m_descriptor_pool;
|
||||||
VkDescriptorSet m_descriptor_set = nullptr;
|
VkDescriptorSet m_descriptor_set = nullptr;
|
||||||
@ -19,20 +20,22 @@ namespace vk
|
|||||||
|
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
bool unroll_loops = true;
|
bool unroll_loops = true;
|
||||||
|
bool uniform_inputs = false;
|
||||||
u32 optimal_group_size = 1;
|
u32 optimal_group_size = 1;
|
||||||
u32 optimal_kernel_size = 1;
|
u32 optimal_kernel_size = 1;
|
||||||
|
|
||||||
void init_descriptors()
|
void init_descriptors()
|
||||||
{
|
{
|
||||||
VkDescriptorPoolSize descriptor_pool_sizes[1] =
|
VkDescriptorPoolSize descriptor_pool_sizes[2] =
|
||||||
{
|
{
|
||||||
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS },
|
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS },
|
||||||
|
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_MAX_COMPUTE_TASKS }
|
||||||
};
|
};
|
||||||
|
|
||||||
//Reserve descriptor pools
|
//Reserve descriptor pools
|
||||||
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 1);
|
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 1);
|
||||||
|
|
||||||
std::vector<VkDescriptorSetLayoutBinding> bindings(1);
|
std::vector<VkDescriptorSetLayoutBinding> bindings(2);
|
||||||
|
|
||||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
bindings[0].descriptorCount = 1;
|
bindings[0].descriptorCount = 1;
|
||||||
@ -40,10 +43,16 @@ namespace vk
|
|||||||
bindings[0].binding = 0;
|
bindings[0].binding = 0;
|
||||||
bindings[0].pImmutableSamplers = nullptr;
|
bindings[0].pImmutableSamplers = nullptr;
|
||||||
|
|
||||||
|
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||||
|
bindings[1].descriptorCount = 1;
|
||||||
|
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
|
bindings[1].binding = 1;
|
||||||
|
bindings[1].pImmutableSamplers = nullptr;
|
||||||
|
|
||||||
VkDescriptorSetLayoutCreateInfo infos = {};
|
VkDescriptorSetLayoutCreateInfo infos = {};
|
||||||
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||||
infos.pBindings = bindings.data();
|
infos.pBindings = bindings.data();
|
||||||
infos.bindingCount = (u32)bindings.size();
|
infos.bindingCount = uniform_inputs? 2u : 1u;
|
||||||
|
|
||||||
CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));
|
CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));
|
||||||
|
|
||||||
@ -88,6 +97,7 @@ namespace vk
|
|||||||
{
|
{
|
||||||
m_shader.destroy();
|
m_shader.destroy();
|
||||||
m_program.reset();
|
m_program.reset();
|
||||||
|
m_param_buffer.reset();
|
||||||
|
|
||||||
vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr);
|
vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr);
|
||||||
vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr);
|
vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr);
|
||||||
@ -162,11 +172,32 @@ namespace vk
|
|||||||
|
|
||||||
struct cs_shuffle_base : compute_task
|
struct cs_shuffle_base : compute_task
|
||||||
{
|
{
|
||||||
vk::buffer* m_data;
|
const vk::buffer* m_data;
|
||||||
u32 m_data_offset = 0;
|
u32 m_data_offset = 0;
|
||||||
u32 m_data_length = 0;
|
u32 m_data_length = 0;
|
||||||
u32 kernel_size = 1;
|
u32 kernel_size = 1;
|
||||||
|
|
||||||
|
std::string variables, work_kernel, loop_advance, suffix;
|
||||||
|
|
||||||
|
cs_shuffle_base()
|
||||||
|
{
|
||||||
|
work_kernel =
|
||||||
|
{
|
||||||
|
" value = data[index];\n"
|
||||||
|
" data[index] = %f(value);\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
loop_advance =
|
||||||
|
{
|
||||||
|
" index++;\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
suffix =
|
||||||
|
{
|
||||||
|
"}\n"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
void build(const char* function_name, u32 _kernel_size = 0)
|
void build(const char* function_name, u32 _kernel_size = 0)
|
||||||
{
|
{
|
||||||
// Initialize to allow detecting optimal settings
|
// Initialize to allow detecting optimal settings
|
||||||
@ -178,7 +209,8 @@ namespace vk
|
|||||||
{
|
{
|
||||||
"#version 430\n"
|
"#version 430\n"
|
||||||
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
|
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
|
||||||
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n\n"
|
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n"
|
||||||
|
"%ub"
|
||||||
"\n"
|
"\n"
|
||||||
"#define KERNEL_SIZE %ks\n"
|
"#define KERNEL_SIZE %ks\n"
|
||||||
"\n"
|
"\n"
|
||||||
@ -188,38 +220,27 @@ namespace vk
|
|||||||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||||
"\n"
|
"\n"
|
||||||
"// Depth format conversions\n"
|
"// Depth format conversions\n"
|
||||||
"#define d24x8_to_f32(bits) floatBitsToUint(float(bits >> 8) / 16777214.f)\n"
|
"#define d24_to_f32(bits) floatBitsToUint(float(bits) / 16777215.f)\n"
|
||||||
|
"#define f32_to_d24(bits) uint(uintBitsToFloat(bits) * 16777215.f)\n"
|
||||||
|
"#define d24x8_to_f32(bits) d24_to_f32(bits >> 8)\n"
|
||||||
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
|
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
|
||||||
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(uint(uintBitsToFloat(bits) * 16777214.f))\n"
|
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(f32_to_d24(bits))\n"
|
||||||
"\n"
|
"\n"
|
||||||
"void main()\n"
|
"void main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
|
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
|
||||||
" uint value;\n"
|
" uint value;\n"
|
||||||
|
" %vars"
|
||||||
"\n"
|
"\n"
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string work_kernel =
|
|
||||||
{
|
|
||||||
" value = data[index];\n"
|
|
||||||
" data[index] = %f(value);\n"
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string loop_advance =
|
|
||||||
{
|
|
||||||
" index++;\n"
|
|
||||||
};
|
|
||||||
|
|
||||||
const std::string suffix =
|
|
||||||
{
|
|
||||||
"}\n"
|
|
||||||
};
|
|
||||||
|
|
||||||
const std::pair<std::string, std::string> syntax_replace[] =
|
const std::pair<std::string, std::string> syntax_replace[] =
|
||||||
{
|
{
|
||||||
{ "%ws", std::to_string(optimal_group_size) },
|
{ "%ws", std::to_string(optimal_group_size) },
|
||||||
{ "%ks", std::to_string(kernel_size) },
|
{ "%ks", std::to_string(kernel_size) },
|
||||||
{ "%f", function_name }
|
{ "%vars", variables },
|
||||||
|
{ "%f", function_name },
|
||||||
|
{ "%ub", uniform_inputs? "layout(std140, set=0, binding=1) uniform ubo{ uvec4 params[16]; };\n" : "" },
|
||||||
};
|
};
|
||||||
|
|
||||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||||
@ -262,9 +283,29 @@ namespace vk
|
|||||||
void bind_resources() override
|
void bind_resources() override
|
||||||
{
|
{
|
||||||
m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||||
|
|
||||||
|
if (uniform_inputs)
|
||||||
|
{
|
||||||
|
verify(HERE), m_param_buffer, m_param_buffer->value != VK_NULL_HANDLE;
|
||||||
|
m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void run(VkCommandBuffer cmd, vk::buffer* data, u32 data_length, u32 data_offset = 0)
|
void set_parameters(VkCommandBuffer cmd, const u32* params, u8 count)
|
||||||
|
{
|
||||||
|
verify(HERE), uniform_inputs;
|
||||||
|
|
||||||
|
if (!m_param_buffer)
|
||||||
|
{
|
||||||
|
auto pdev = vk::get_current_renderer();
|
||||||
|
m_param_buffer = std::make_unique<vk::buffer>(*pdev, 256, pdev->get_memory_mapping().host_visible_coherent,
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
vkCmdUpdateBuffer(cmd, m_param_buffer->value, 0, count * sizeof(u32), params);
|
||||||
|
}
|
||||||
|
|
||||||
|
void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0)
|
||||||
{
|
{
|
||||||
m_data = data;
|
m_data = data;
|
||||||
m_data_offset = data_offset;
|
m_data_offset = data_offset;
|
||||||
@ -274,7 +315,7 @@ namespace vk
|
|||||||
const auto num_bytes_to_process = align(data_length, num_bytes_per_invocation);
|
const auto num_bytes_to_process = align(data_length, num_bytes_per_invocation);
|
||||||
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
|
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
|
||||||
|
|
||||||
if (num_bytes_to_process > data->size())
|
if ((num_bytes_to_process + data_offset) > data->size())
|
||||||
{
|
{
|
||||||
// Technically robust buffer access should keep the driver from crashing in OOB situations
|
// Technically robust buffer access should keep the driver from crashing in OOB situations
|
||||||
LOG_ERROR(RSX, "Inadequate buffer length submitted for a compute operation."
|
LOG_ERROR(RSX, "Inadequate buffer length submitted for a compute operation."
|
||||||
@ -339,6 +380,134 @@ namespace vk
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// NOTE: D24S8 layout has the stencil in the MSB! Its actually S8|D24|S8|D24 starting at offset 0
|
||||||
|
struct cs_interleave_task : cs_shuffle_base
|
||||||
|
{
|
||||||
|
u32 m_ssbo_length = 0;
|
||||||
|
|
||||||
|
cs_interleave_task()
|
||||||
|
{
|
||||||
|
uniform_inputs = true;
|
||||||
|
|
||||||
|
variables =
|
||||||
|
{
|
||||||
|
" uint block_length = params[0].x >> 2;\n"
|
||||||
|
" uint z_offset = params[0].y >> 2;\n"
|
||||||
|
" uint s_offset = params[0].z >> 2;\n"
|
||||||
|
" uint depth;\n"
|
||||||
|
" uint stencil;\n"
|
||||||
|
" uint stencil_shift;\n"
|
||||||
|
" uint stencil_offset;\n"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void bind_resources() override
|
||||||
|
{
|
||||||
|
m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||||
|
|
||||||
|
if (uniform_inputs)
|
||||||
|
{
|
||||||
|
verify(HERE), m_param_buffer;
|
||||||
|
m_program->bind_buffer({ m_param_buffer->value, 0, 256 }, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_descriptor_set);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void run(VkCommandBuffer cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset)
|
||||||
|
{
|
||||||
|
u32 parameters[3] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset };
|
||||||
|
set_parameters(cmd, parameters, 3);
|
||||||
|
|
||||||
|
m_ssbo_length = stencil_offset + (data_length / 4) - data_offset;
|
||||||
|
cs_shuffle_base::run(cmd, data, data_length, data_offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cs_gather_d24x8 : cs_interleave_task
|
||||||
|
{
|
||||||
|
cs_gather_d24x8()
|
||||||
|
{
|
||||||
|
work_kernel =
|
||||||
|
{
|
||||||
|
" if (index >= block_length)\n"
|
||||||
|
" return;\n"
|
||||||
|
"\n"
|
||||||
|
" depth = data[index + z_offset] & 0x00FFFFFF;\n"
|
||||||
|
" stencil_offset = (index / 4);\n"
|
||||||
|
" stencil_shift = (index % 4) * 8;\n"
|
||||||
|
" stencil = data[stencil_offset + s_offset];\n"
|
||||||
|
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||||
|
" value = (depth << 8) | stencil;\n"
|
||||||
|
" data[index] = value;\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
cs_shuffle_base::build("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cs_gather_d32x8 : cs_interleave_task
|
||||||
|
{
|
||||||
|
cs_gather_d32x8()
|
||||||
|
{
|
||||||
|
work_kernel =
|
||||||
|
{
|
||||||
|
" if (index >= block_length)\n"
|
||||||
|
" return;\n"
|
||||||
|
"\n"
|
||||||
|
" depth = f32_to_d24(data[index + z_offset]);\n"
|
||||||
|
" stencil_offset = (index / 4);\n"
|
||||||
|
" stencil_shift = (index % 4) * 8;\n"
|
||||||
|
" stencil = data[stencil_offset + s_offset];\n"
|
||||||
|
" stencil = (stencil >> stencil_shift) & 0xFF;\n"
|
||||||
|
" value = (depth << 8) | stencil;\n"
|
||||||
|
" data[index] = value;\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
cs_shuffle_base::build("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cs_scatter_d24x8 : cs_interleave_task
|
||||||
|
{
|
||||||
|
cs_scatter_d24x8()
|
||||||
|
{
|
||||||
|
work_kernel =
|
||||||
|
{
|
||||||
|
" if (index >= block_length)\n"
|
||||||
|
" return;\n"
|
||||||
|
"\n"
|
||||||
|
" value = data[index];\n"
|
||||||
|
" data[index + z_offset] = (value >> 8);\n"
|
||||||
|
" stencil_offset = (index / 4);\n"
|
||||||
|
" stencil_shift = (index % 4) * 8;\n"
|
||||||
|
" stencil = (value & 0xFF) << stencil_shift;\n"
|
||||||
|
" data[stencil_offset + s_offset] |= stencil;\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
cs_shuffle_base::build("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cs_scatter_d32x8 : cs_interleave_task
|
||||||
|
{
|
||||||
|
cs_scatter_d32x8()
|
||||||
|
{
|
||||||
|
work_kernel =
|
||||||
|
{
|
||||||
|
" if (index >= block_length)\n"
|
||||||
|
" return;\n"
|
||||||
|
"\n"
|
||||||
|
" value = data[index];\n"
|
||||||
|
" data[index + z_offset] = d24_to_f32(value >> 8);\n"
|
||||||
|
" stencil_offset = (index / 4);\n"
|
||||||
|
" stencil_shift = (index % 4) * 8;\n"
|
||||||
|
" stencil = (value & 0xFF) << stencil_shift;\n"
|
||||||
|
" data[stencil_offset + s_offset] |= stencil;\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
cs_shuffle_base::build("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// TODO: Replace with a proper manager
|
// TODO: Replace with a proper manager
|
||||||
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;
|
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;
|
||||||
|
|
||||||
|
@ -146,6 +146,9 @@ namespace vk
|
|||||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
|
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
|
||||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
|
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
|
||||||
|
|
||||||
|
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region);
|
||||||
|
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region);
|
||||||
|
|
||||||
void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect,
|
void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect,
|
||||||
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
|
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
|
||||||
VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF);
|
VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF);
|
||||||
|
@ -106,6 +106,127 @@ namespace vk
|
|||||||
fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format);
|
fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region)
|
||||||
|
{
|
||||||
|
switch (src->format())
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 1, ®ion);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||||
|
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||||
|
{
|
||||||
|
verify(HERE), region.imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||||
|
|
||||||
|
const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width;
|
||||||
|
const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height;
|
||||||
|
const u32 packed_length = out_w * out_h * 4;
|
||||||
|
const u32 in_depth_size = packed_length;
|
||||||
|
const u32 in_stencil_size = out_w * out_h;
|
||||||
|
|
||||||
|
const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
|
||||||
|
verify(HERE), dst->size() >= allocation_end;
|
||||||
|
|
||||||
|
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
|
||||||
|
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
|
||||||
|
|
||||||
|
// 1. Copy the depth and stencil blocks to separate banks
|
||||||
|
VkBufferImageCopy sub_regions[2];
|
||||||
|
sub_regions[0] = sub_regions[1] = region;
|
||||||
|
sub_regions[0].bufferOffset = z_offset;
|
||||||
|
sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||||
|
sub_regions[1].bufferOffset = s_offset;
|
||||||
|
sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||||
|
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dst->value, 2, sub_regions);
|
||||||
|
|
||||||
|
// 2. Interleave the separated data blocks with a compute job
|
||||||
|
vk::cs_interleave_task *job;
|
||||||
|
if (src->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||||
|
{
|
||||||
|
job = vk::get_compute_task<vk::cs_gather_d24x8>();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
job = vk::get_compute_task<vk::cs_gather_d32x8>();
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, dst->value, z_offset, in_depth_size + in_stencil_size,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||||
|
|
||||||
|
job->run(cmd, dst, (u32)region.bufferOffset, packed_length, z_offset, s_offset);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, dst->value, region.bufferOffset, packed_length,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region)
|
||||||
|
{
|
||||||
|
switch (dst->format())
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 1, ®ion);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||||
|
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||||
|
{
|
||||||
|
const u32 out_w = region.bufferRowLength? region.bufferRowLength : region.imageExtent.width;
|
||||||
|
const u32 out_h = region.bufferImageHeight? region.bufferImageHeight : region.imageExtent.height;
|
||||||
|
const u32 packed_length = out_w * out_h * 4;
|
||||||
|
const u32 in_depth_size = packed_length;
|
||||||
|
const u32 in_stencil_size = out_w * out_h;
|
||||||
|
|
||||||
|
const u32 allocation_end = region.bufferOffset + packed_length + in_depth_size + in_stencil_size;
|
||||||
|
verify(HERE), src->size() >= allocation_end;
|
||||||
|
|
||||||
|
const VkDeviceSize z_offset = align<VkDeviceSize>(region.bufferOffset + packed_length, 256);
|
||||||
|
const VkDeviceSize s_offset = align<VkDeviceSize>(z_offset + in_depth_size, 256);
|
||||||
|
|
||||||
|
// Zero out the stencil block
|
||||||
|
vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, src->value, s_offset, in_stencil_size,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||||
|
|
||||||
|
// 1. Scatter the interleaved data into separate depth and stencil blocks
|
||||||
|
vk::cs_interleave_task *job;
|
||||||
|
if (dst->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||||
|
{
|
||||||
|
job = vk::get_compute_task<vk::cs_scatter_d24x8>();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
job = vk::get_compute_task<vk::cs_scatter_d32x8>();
|
||||||
|
}
|
||||||
|
|
||||||
|
job->run(cmd, src, (u32)region.bufferOffset, packed_length, z_offset, s_offset);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, src->value, z_offset, in_depth_size + in_stencil_size,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
|
// 2. Copy the separated blocks into the target
|
||||||
|
VkBufferImageCopy sub_regions[2];
|
||||||
|
sub_regions[0] = sub_regions[1] = region;
|
||||||
|
sub_regions[0].bufferOffset = z_offset;
|
||||||
|
sub_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||||
|
sub_regions[1].bufferOffset = s_offset;
|
||||||
|
sub_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||||
|
vkCmdCopyBufferToImage(cmd, src->value, dst->value, dst->current_layout, 2, sub_regions);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void copy_image_typeless(const vk::command_buffer& cmd, const vk::image* src, const vk::image* dst, const areai& src_rect, const areai& dst_rect,
|
void copy_image_typeless(const vk::command_buffer& cmd, const vk::image* src, const vk::image* dst, const areai& src_rect, const areai& dst_rect,
|
||||||
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask)
|
u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask)
|
||||||
{
|
{
|
||||||
@ -138,7 +259,7 @@ namespace vk
|
|||||||
|
|
||||||
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
|
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
|
||||||
{
|
{
|
||||||
vkCmdCopyImageToBuffer(cmd, src->value, preferred_src_format, scratch_buf->value, 1, &src_copy);
|
vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy);
|
||||||
|
|
||||||
const auto src_convert = get_format_convert_flags(src->info.format);
|
const auto src_convert = get_format_convert_flags(src->info.format);
|
||||||
const auto dst_convert = get_format_convert_flags(dst->info.format);
|
const auto dst_convert = get_format_convert_flags(dst->info.format);
|
||||||
@ -187,7 +308,7 @@ namespace vk
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst->value, preferred_dst_format, 1, &dst_copy);
|
vk::copy_buffer_to_image(cmd, scratch_buf, dst, dst_copy);
|
||||||
|
|
||||||
src_copy.imageSubresource.mipLevel++;
|
src_copy.imageSubresource.mipLevel++;
|
||||||
dst_copy.imageSubresource.mipLevel++;
|
dst_copy.imageSubresource.mipLevel++;
|
||||||
@ -438,9 +559,6 @@ namespace vk
|
|||||||
u32 block_in_pixel = get_format_block_size_in_texel(format);
|
u32 block_in_pixel = get_format_block_size_in_texel(format);
|
||||||
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
|
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
|
||||||
|
|
||||||
//TODO: Depth and stencil transfer together
|
|
||||||
flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT);
|
|
||||||
|
|
||||||
for (const rsx_subresource_layout &layout : subresource_layout)
|
for (const rsx_subresource_layout &layout : subresource_layout)
|
||||||
{
|
{
|
||||||
u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256);
|
u32 row_pitch = align(layout.width_in_block * block_size_in_bytes, 256);
|
||||||
@ -449,29 +567,26 @@ namespace vk
|
|||||||
//Map with extra padding bytes in case of realignment
|
//Map with extra padding bytes in case of realignment
|
||||||
size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8);
|
size_t offset_in_buffer = upload_heap.alloc<512>(image_linear_size + 8);
|
||||||
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
|
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
|
||||||
void *dst = mapped_buffer;
|
|
||||||
VkBuffer buffer_handle = upload_heap.heap->value;
|
VkBuffer buffer_handle = upload_heap.heap->value;
|
||||||
|
|
||||||
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT)
|
gsl::span<gsl::byte> mapped{ (gsl::byte*)mapped_buffer, ::narrow<int>(image_linear_size) };
|
||||||
{
|
|
||||||
//Misalign intentionally to skip the first stencil byte in D24S8 data
|
|
||||||
//Ensures the real depth data is dword aligned
|
|
||||||
|
|
||||||
//Skip leading dword when writing to texture
|
|
||||||
offset_in_buffer += 4;
|
|
||||||
dst = (char*)(mapped_buffer) + 4 - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
gsl::span<gsl::byte> mapped{ (gsl::byte*)dst, ::narrow<int>(image_linear_size) };
|
|
||||||
upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256);
|
upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256);
|
||||||
upload_heap.unmap();
|
upload_heap.unmap();
|
||||||
|
|
||||||
if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
VkBufferImageCopy copy_info = {};
|
||||||
{
|
copy_info.bufferOffset = offset_in_buffer;
|
||||||
// Run GPU compute task to convert the D24x8 to FP32
|
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
|
||||||
// NOTE: On commandbuffer submission, the HOST_WRITE to ALL_COMMANDS barrier is implicitly inserted according to spec
|
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
|
||||||
// No need to add another explicit barrier unless a driver bug is found
|
copy_info.imageExtent.depth = layout.depth;
|
||||||
|
copy_info.imageSubresource.aspectMask = flags;
|
||||||
|
copy_info.imageSubresource.layerCount = 1;
|
||||||
|
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
|
||||||
|
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
|
||||||
|
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
||||||
|
|
||||||
|
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||||||
|
dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||||
|
{
|
||||||
// Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead
|
// Executing GPU tasks on host_visible RAM is awful, copy to device-local buffer instead
|
||||||
auto scratch_buf = vk::get_scratch_buffer();
|
auto scratch_buf = vk::get_scratch_buffer();
|
||||||
|
|
||||||
@ -485,27 +600,14 @@ namespace vk
|
|||||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||||
|
|
||||||
vk::get_compute_task<vk::cs_shuffle_d24x8_f32>()->run(cmd, upload_heap.heap.get(), image_linear_size, (u32)offset_in_buffer);
|
copy_info.bufferOffset = 0;
|
||||||
|
vk::copy_buffer_to_image(cmd, scratch_buf, dst_image, copy_info);
|
||||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
}
|
||||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
else
|
||||||
|
{
|
||||||
buffer_handle = scratch_buf->value;
|
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info);
|
||||||
offset_in_buffer = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VkBufferImageCopy copy_info = {};
|
|
||||||
copy_info.bufferOffset = offset_in_buffer;
|
|
||||||
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
|
|
||||||
copy_info.imageExtent.width = layout.width_in_block * block_in_pixel;
|
|
||||||
copy_info.imageExtent.depth = layout.depth;
|
|
||||||
copy_info.imageSubresource.aspectMask = flags;
|
|
||||||
copy_info.imageSubresource.layerCount = 1;
|
|
||||||
copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count;
|
|
||||||
copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count;
|
|
||||||
copy_info.bufferRowLength = block_in_pixel * row_pitch / block_size_in_bytes;
|
|
||||||
|
|
||||||
vkCmdCopyBufferToImage(cmd, buffer_handle, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_info);
|
|
||||||
mipmap_level++;
|
mipmap_level++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user