mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-24 00:40:06 +00:00
vk; Add more compute routines to handle texture format conversions
- Implement le D24x8 to le D32 upload routine - Implement endianness swapping and depth format conversions routines (readback)
This commit is contained in:
parent
f45dcfe18a
commit
5fb4009a07
@ -97,7 +97,7 @@ namespace vk
|
||||
virtual void bind_resources()
|
||||
{}
|
||||
|
||||
void load_program(const vk::command_buffer& cmd)
|
||||
void load_program(VkCommandBuffer cmd)
|
||||
{
|
||||
if (!m_program)
|
||||
{
|
||||
@ -141,7 +141,7 @@ namespace vk
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr);
|
||||
}
|
||||
|
||||
virtual void run(const vk::command_buffer& cmd, u32 num_invocations)
|
||||
virtual void run(VkCommandBuffer cmd, u32 num_invocations)
|
||||
{
|
||||
load_program(cmd);
|
||||
vkCmdDispatch(cmd, num_invocations, 1, 1);
|
||||
@ -151,6 +151,8 @@ namespace vk
|
||||
struct cs_shuffle_base : compute_task
|
||||
{
|
||||
vk::buffer* m_data;
|
||||
u32 m_data_offset = 0;
|
||||
u32 m_data_length = 0;
|
||||
u32 kernel_size = 1;
|
||||
|
||||
void build(const char* function_name, u32 _kernel_size)
|
||||
@ -164,10 +166,17 @@ namespace vk
|
||||
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n\n"
|
||||
"\n"
|
||||
"#define KERNEL_SIZE %ks\n"
|
||||
"\n"
|
||||
"// Generic swap routines\n"
|
||||
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
|
||||
"#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24\n"
|
||||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||
"\n"
|
||||
"// Depth format conversions\n"
|
||||
"#define d24x8_to_f32(bits) floatBitsToUint(float(bits >> 8) / 16777214.f)\n"
|
||||
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00FF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
|
||||
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(uint(uintBitsToFloat(bits) * 16777214.f))\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
|
||||
@ -192,23 +201,23 @@ namespace vk
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
m_program->bind_buffer({ m_data->value, 0, VK_WHOLE_SIZE }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||
m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||
}
|
||||
|
||||
void run(const vk::command_buffer& cmd, vk::buffer* data, u32 mem_size)
|
||||
void run(VkCommandBuffer cmd, vk::buffer* data, u32 data_length, u32 data_offset = 0)
|
||||
{
|
||||
m_data = data;
|
||||
m_data_offset = data_offset;
|
||||
m_data_length = data_length;
|
||||
|
||||
const auto num_bytes_per_invocation = optimal_group_size * kernel_size * 4;
|
||||
const auto num_invocations = align(mem_size, 256) / num_bytes_per_invocation;
|
||||
const auto num_invocations = align(data_length, 256) / num_bytes_per_invocation;
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_16 : cs_shuffle_base
|
||||
{
|
||||
vk::buffer* m_data;
|
||||
|
||||
// byteswap ushort
|
||||
cs_shuffle_16()
|
||||
{
|
||||
@ -234,6 +243,33 @@ namespace vk
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_d24x8_f32 : cs_shuffle_base
|
||||
{
|
||||
// convert d24x8 to f32
|
||||
cs_shuffle_d24x8_f32()
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_f32", 32);
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_se_f32_d24x8 : cs_shuffle_base
|
||||
{
|
||||
// convert f32 to d24x8 and swap endianness
|
||||
cs_shuffle_se_f32_d24x8()
|
||||
{
|
||||
cs_shuffle_base::build("f32_to_d24x8_swapped", 32);
|
||||
}
|
||||
};
|
||||
|
||||
struct cs_shuffle_se_d24x8 : cs_shuffle_base
|
||||
{
|
||||
// swap endianness of d24x8
|
||||
cs_shuffle_se_d24x8()
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_d24x8_swapped", 32);
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Replace with a proper manager
|
||||
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;
|
||||
|
||||
|
@ -347,13 +347,13 @@ namespace vk
|
||||
return g_drv_disable_fence_reset;
|
||||
}
|
||||
|
||||
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
|
||||
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
|
||||
{
|
||||
VkBufferMemoryBarrier barrier = {};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.buffer = buffer;
|
||||
barrier.offset = 0;
|
||||
barrier.size = VK_WHOLE_SIZE;
|
||||
barrier.offset = offset;
|
||||
barrier.size = length;
|
||||
barrier.srcAccessMask = src_mask;
|
||||
barrier.dstAccessMask = dst_mask;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
@ -149,7 +149,8 @@ namespace vk
|
||||
void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range);
|
||||
void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image);
|
||||
|
||||
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask);
|
||||
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length,
|
||||
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask);
|
||||
|
||||
//Manage 'uininterruptible' state where secondary operations (e.g violation handlers) will have to wait
|
||||
void enter_uninterruptible();
|
||||
|
@ -152,7 +152,10 @@ namespace vk
|
||||
}
|
||||
else
|
||||
{
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
const auto elem_size = vk::get_format_texel_width(src->info.format);
|
||||
const auto length = elem_size * src_copy.imageExtent.width * src_copy.imageExtent.height;
|
||||
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
vk::cs_shuffle_base *shuffle_kernel = nullptr;
|
||||
@ -177,12 +180,9 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
const auto elem_size = vk::get_format_texel_width(src->info.format);
|
||||
const auto length = elem_size * src_copy.imageExtent.width * src_copy.imageExtent.height;
|
||||
|
||||
shuffle_kernel->run(cmd, scratch_buf, length);
|
||||
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
}
|
||||
}
|
||||
@ -338,7 +338,7 @@ namespace vk
|
||||
info.imageSubresource = { aspect & transfer_flags, 0, 0, 1 };
|
||||
|
||||
vkCmdCopyImageToBuffer(cmd, src, preferred_src_format, scratch_buf->value, 1, &info);
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
info.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
|
||||
vkCmdCopyBufferToImage(cmd, scratch_buf->value, typeless, VK_IMAGE_LAYOUT_GENERAL, 1, &info);
|
||||
@ -352,7 +352,7 @@ namespace vk
|
||||
info.imageOffset = { 0, (s32)src_h, 0 };
|
||||
|
||||
vkCmdCopyImageToBuffer(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL, scratch_buf->value, 1, &info);
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
info.imageOffset = { dst_rect.x1, dst_rect.y1, 0 };
|
||||
info.imageSubresource = { aspect & transfer_flags, 0, 0, 1 };
|
||||
@ -432,7 +432,6 @@ namespace vk
|
||||
u32 mipmap_level = 0;
|
||||
u32 block_in_pixel = get_format_block_size_in_texel(format);
|
||||
u8 block_size_in_bytes = get_format_block_size_in_bytes(format);
|
||||
std::vector<u8> staging_buffer;
|
||||
|
||||
//TODO: Depth and stencil transfer together
|
||||
flags &= ~(VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
@ -447,49 +446,32 @@ namespace vk
|
||||
void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size + 8);
|
||||
void *dst = mapped_buffer;
|
||||
|
||||
bool use_staging = false;
|
||||
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||||
dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
if (dst_image->info.format == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
//Misalign intentionally to skip the first stencil byte in D24S8 data
|
||||
//Ensures the real depth data is dword aligned
|
||||
|
||||
if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
//Emulate D24x8 passthrough to D32 format
|
||||
//Reads from GPU managed memory are slow at best and at worst unreliable
|
||||
use_staging = true;
|
||||
staging_buffer.resize(image_linear_size + 8);
|
||||
dst = staging_buffer.data() + 4 - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
//Skip leading dword when writing to texture
|
||||
offset_in_buffer += 4;
|
||||
dst = (char*)(mapped_buffer) + 4 - 1;
|
||||
}
|
||||
//Skip leading dword when writing to texture
|
||||
offset_in_buffer += 4;
|
||||
dst = (char*)(mapped_buffer) + 4 - 1;
|
||||
}
|
||||
|
||||
gsl::span<gsl::byte> mapped{ (gsl::byte*)dst, ::narrow<int>(image_linear_size) };
|
||||
upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256);
|
||||
|
||||
if (use_staging)
|
||||
{
|
||||
if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
//Map depth component from D24x8 to a f32 depth value
|
||||
//NOTE: One byte (contains first S8 value) is skipped
|
||||
rsx::convert_le_d24x8_to_le_f32(mapped_buffer, (char*)dst + 1, image_linear_size >> 2, 1);
|
||||
}
|
||||
else //unused
|
||||
{
|
||||
//Copy emulated data back to the target buffer
|
||||
memcpy(mapped_buffer, dst, image_linear_size);
|
||||
}
|
||||
}
|
||||
|
||||
upload_heap.unmap();
|
||||
|
||||
if (dst_image->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
// Run GPU compute task to convert the D24x8 to FP32
|
||||
// NOTE: On commandbuffer submission, the HOST_WRITE to ALL_COMMANDS barrier is implicitly inserted according to spec
|
||||
// No need to add another explicit barrier unless a driver bug is found
|
||||
|
||||
vk::get_compute_task<vk::cs_shuffle_d24x8_f32>()->run(cmd, upload_heap.heap.get(), image_linear_size, offset_in_buffer);
|
||||
|
||||
insert_buffer_memory_barrier(cmd, upload_heap.heap->value, offset_in_buffer, image_linear_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
}
|
||||
|
||||
VkBufferImageCopy copy_info = {};
|
||||
copy_info.bufferOffset = offset_in_buffer;
|
||||
copy_info.imageExtent.height = layout.height_in_block * block_in_pixel;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "stdafx.h"
|
||||
#include "VKRenderTargets.h"
|
||||
#include "VKGSRender.h"
|
||||
#include "VKCompute.h"
|
||||
#include "Emu/System.h"
|
||||
#include "../Common/TextureUtils.h"
|
||||
#include "../rsx_utils.h"
|
||||
@ -220,6 +221,29 @@ namespace vk
|
||||
change_image_layout(cmd, vram_texture, old_layout, subresource_range);
|
||||
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * transfer_width;
|
||||
|
||||
if (vram_texture->info.format == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
{
|
||||
vk::get_compute_task<vk::cs_shuffle_se_d24x8>()->run(cmd, dma_buffer.get(), cpu_address_range);
|
||||
}
|
||||
else if (vram_texture->info.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
vk::get_compute_task<vk::cs_shuffle_se_f32_d24x8>()->run(cmd, dma_buffer.get(), cpu_address_range);
|
||||
}
|
||||
else if (pack_unpack_swap_bytes)
|
||||
{
|
||||
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
|
||||
const auto elem_size = texel_layout.first;
|
||||
|
||||
if (elem_size == 2)
|
||||
{
|
||||
vk::get_compute_task<vk::cs_shuffle_16>()->run(cmd, dma_buffer.get(), cpu_address_range);
|
||||
}
|
||||
else if (elem_size == 4)
|
||||
{
|
||||
vk::get_compute_task<vk::cs_shuffle_32>()->run(cmd, dma_buffer.get(), cpu_address_range);
|
||||
}
|
||||
}
|
||||
|
||||
if (manage_cb_lifetime)
|
||||
{
|
||||
cmd.end();
|
||||
@ -238,56 +262,6 @@ namespace vk
|
||||
sync_timestamp = get_system_time();
|
||||
}
|
||||
|
||||
template<typename T, bool swapped>
|
||||
void do_memory_transfer_packed(void *pixels_dst, const void *pixels_src, u32 max_length)
|
||||
{
|
||||
if (sizeof(T) == 1 || !swapped)
|
||||
{
|
||||
memcpy(pixels_dst, pixels_src, max_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 block_size = max_length / sizeof(T);
|
||||
auto typed_dst = (be_t<T> *)pixels_dst;
|
||||
auto typed_src = (T *)pixels_src;
|
||||
|
||||
for (u32 px = 0; px < block_size; ++px)
|
||||
typed_dst[px] = typed_src[px];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, bool swapped>
|
||||
void do_memory_transfer_padded(void *pixels_dst, const void *pixels_src, u32 src_pitch, u32 dst_pitch, u32 num_rows)
|
||||
{
|
||||
auto src = (char*)pixels_src;
|
||||
auto dst = (char*)pixels_dst;
|
||||
|
||||
if (sizeof(T) == 1 || !swapped)
|
||||
{
|
||||
for (u32 y = 0; y < num_rows; ++y)
|
||||
{
|
||||
memcpy(dst, src, src_pitch);
|
||||
src += src_pitch;
|
||||
dst += dst_pitch;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 block_size = src_pitch / sizeof(T);
|
||||
for (u32 y = 0; y < num_rows; ++y)
|
||||
{
|
||||
auto typed_dst = (be_t<T> *)dst;
|
||||
auto typed_src = (T *)src;
|
||||
|
||||
for (u32 px = 0; px < block_size; ++px)
|
||||
typed_dst[px] = typed_src[px];
|
||||
|
||||
src += src_pitch;
|
||||
dst += dst_pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
|
||||
{
|
||||
if (flushed) return true;
|
||||
@ -314,93 +288,26 @@ namespace vk
|
||||
void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
|
||||
void* pixels_dst = get_raw_ptr(valid_range.first, true);
|
||||
|
||||
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
|
||||
const auto elem_size = texel_layout.first;
|
||||
|
||||
auto memory_transfer_packed = [=]()
|
||||
{
|
||||
switch (elem_size)
|
||||
{
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
|
||||
case 1:
|
||||
do_memory_transfer_packed<u8, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_packed<u16, true>(pixels_dst, pixels_src, valid_range.second);
|
||||
else
|
||||
do_memory_transfer_packed<u16, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_packed<u32, true>(pixels_dst, pixels_src, valid_range.second);
|
||||
else
|
||||
do_memory_transfer_packed<u32, false>(pixels_dst, pixels_src, valid_range.second);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
auto memory_transfer_padded = [=]()
|
||||
{
|
||||
const u32 num_rows = valid_range.second / rsx_pitch;
|
||||
switch (elem_size)
|
||||
{
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
|
||||
case 1:
|
||||
do_memory_transfer_padded<u8, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_padded<u16, true>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
else
|
||||
do_memory_transfer_padded<u16, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer_padded<u32, true>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
else
|
||||
do_memory_transfer_padded<u32, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: We have to do our own byte swapping since the driver doesnt do it for us
|
||||
// TODO: Replace the cpu-side transformations with trivial compute pipelines
|
||||
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
|
||||
{
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
{
|
||||
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, valid_range.second >> 2, 1);
|
||||
break;
|
||||
}
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
{
|
||||
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, valid_range.second >> 2, 1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
memory_transfer_packed();
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcpy(pixels_dst, pixels_src, valid_range.second);
|
||||
}
|
||||
else
|
||||
{
|
||||
memory_transfer_padded();
|
||||
|
||||
switch (vram_texture->info.format)
|
||||
if (valid_range.second % rsx_pitch)
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_dst, valid_range.second >> 2, 1);
|
||||
break;
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_dst, valid_range.second >> 2, 1);
|
||||
break;
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
const u32 num_rows = valid_range.second / rsx_pitch;
|
||||
auto _src = (u8*)pixels_src;
|
||||
auto _dst = (u8*)pixels_dst;
|
||||
|
||||
for (u32 y = 0; y < num_rows; ++y)
|
||||
{
|
||||
memcpy(_dst, _src, real_pitch);
|
||||
_src += real_pitch;
|
||||
_dst += real_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user