mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-01 16:13:23 +00:00
vulkan API fixes
- Fix for texture barriers - vulkan: Rework texture cache handling of depth surfaces - Support for scaled depth blit using overlay pass - Support proper readback of D24S8 in both D32F_S8 and D24U_S8 variants - Optimize the depth conversion routines with SSE - vulkan: Replace slow single element copy with std::memcpy - Check heap status before attempting blit operations - Bump guard size on upload buffer as well
This commit is contained in:
parent
3bbecd998a
commit
c191a98ec3
@ -109,7 +109,7 @@ public:
|
||||
bool is_critical()
|
||||
{
|
||||
const size_t guard_length = std::max(m_min_guard_size, m_largest_allocated_pool);
|
||||
return (m_current_allocated_size + guard_length) > m_size;
|
||||
return (m_current_allocated_size + guard_length) >= m_size;
|
||||
}
|
||||
|
||||
void reset_allocation_stats()
|
||||
|
@ -35,10 +35,8 @@ namespace gl
|
||||
switch (type)
|
||||
{
|
||||
case GL_DEBUG_TYPE_ERROR:
|
||||
{
|
||||
LOG_ERROR(RSX, "%s", message);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOG_WARNING(RSX, "%s", message);
|
||||
return;
|
||||
|
@ -603,7 +603,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
|
||||
m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
||||
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
|
||||
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 0x400000);
|
||||
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
|
||||
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
||||
|
||||
for (auto &ctx : frame_context_storage)
|
||||
@ -627,6 +627,9 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
m_depth_converter.reset(new vk::depth_convert_pass());
|
||||
m_depth_converter->create(*m_device);
|
||||
|
||||
m_depth_scaler.reset(new vk::depth_scaling_pass());
|
||||
m_depth_scaler->create(*m_device);
|
||||
|
||||
m_prog_buffer.reset(new VKProgramBuffer(m_render_passes.data()));
|
||||
|
||||
if (g_cfg.video.disable_vertex_cache)
|
||||
@ -750,6 +753,10 @@ VKGSRender::~VKGSRender()
|
||||
m_depth_converter->destroy();
|
||||
m_depth_converter.reset();
|
||||
|
||||
//Depth surface blitter
|
||||
m_depth_scaler->destroy();
|
||||
m_depth_scaler.reset();
|
||||
|
||||
//Pipeline descriptors
|
||||
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
|
||||
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
|
||||
@ -884,29 +891,8 @@ void VKGSRender::notify_tile_unbound(u32 tile)
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::begin()
|
||||
void VKGSRender::check_heap_status()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
||||
if (skip_frame || renderer_unavailable ||
|
||||
(conditional_render_enabled && conditional_render_test_failed))
|
||||
return;
|
||||
|
||||
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
||||
|
||||
if (!framebuffer_status_valid)
|
||||
return;
|
||||
|
||||
//Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
|
||||
if (m_current_frame->used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS)
|
||||
{
|
||||
//No need to stall if we have more than one frame queue anyway
|
||||
flush_command_queue();
|
||||
|
||||
CHECK_RESULT(vkResetDescriptorPool(*m_device, m_current_frame->descriptor_pool, 0));
|
||||
m_current_frame->used_descriptors = 0;
|
||||
}
|
||||
|
||||
if (m_attrib_ring_info.is_critical() ||
|
||||
m_texture_upload_buffer_ring_info.is_critical() ||
|
||||
m_uniform_buffer_ring_info.is_critical() ||
|
||||
@ -953,6 +939,32 @@ void VKGSRender::begin()
|
||||
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
|
||||
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::begin()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
||||
if (skip_frame || renderer_unavailable ||
|
||||
(conditional_render_enabled && conditional_render_test_failed))
|
||||
return;
|
||||
|
||||
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
||||
|
||||
if (!framebuffer_status_valid)
|
||||
return;
|
||||
|
||||
//Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
|
||||
if (m_current_frame->used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS)
|
||||
{
|
||||
//No need to stall if we have more than one frame queue anyway
|
||||
flush_command_queue();
|
||||
|
||||
CHECK_RESULT(vkResetDescriptorPool(*m_device, m_current_frame->descriptor_pool, 0));
|
||||
m_current_frame->used_descriptors = 0;
|
||||
}
|
||||
|
||||
check_heap_status();
|
||||
|
||||
VkDescriptorSetAllocateInfo alloc_info = {};
|
||||
alloc_info.descriptorPool = m_current_frame->descriptor_pool;
|
||||
@ -1994,6 +2006,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
|
||||
}
|
||||
|
||||
m_depth_converter->free_resources();
|
||||
m_depth_scaler->free_resources();
|
||||
m_ui_renderer->free_resources();
|
||||
|
||||
ctx->buffer_views_to_clean.clear();
|
||||
@ -2736,7 +2749,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||
|
||||
const u32 range = pitch * m_depth_surface_info.height * aa_factor;
|
||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
|
||||
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true);
|
||||
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3165,11 +3178,39 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
||||
if (renderer_unavailable)
|
||||
return false;
|
||||
|
||||
//Verify enough memory exists before attempting to handle data transfer
|
||||
check_heap_status();
|
||||
|
||||
//Stop all parallel operations until this is finished
|
||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||
|
||||
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
|
||||
m_current_command_buffer->begin();
|
||||
|
||||
if (auto deferred_op_dst = std::get<1>(result))
|
||||
{
|
||||
//Requires manual scaling; depth/stencil surface
|
||||
auto deferred_op_src = std::get<2>(result);
|
||||
auto src_view = std::get<3>(result);
|
||||
|
||||
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, deferred_op_dst->info.format, 0);
|
||||
auto render_pass = m_render_passes[rp];
|
||||
|
||||
auto old_src_layout = deferred_op_src->current_layout;
|
||||
auto old_dst_layout = deferred_op_dst->current_layout;
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, deferred_op_src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
|
||||
|
||||
m_depth_scaler->run(*m_current_command_buffer, deferred_op_dst->width(), deferred_op_dst->height(), deferred_op_dst,
|
||||
src_view, render_pass, m_framebuffers_to_clean);
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, deferred_op_src, old_src_layout);
|
||||
vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, old_dst_layout);
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(true);
|
||||
return result;
|
||||
return std::get<0>(result);
|
||||
}
|
||||
|
||||
void VKGSRender::clear_zcull_stats(u32 type)
|
||||
|
@ -252,6 +252,7 @@ private:
|
||||
|
||||
std::unique_ptr<vk::text_writer> m_text_writer;
|
||||
std::unique_ptr<vk::depth_convert_pass> m_depth_converter;
|
||||
std::unique_ptr<vk::depth_scaling_pass> m_depth_scaler;
|
||||
std::unique_ptr<vk::ui_overlay_renderer> m_ui_renderer;
|
||||
|
||||
std::mutex m_sampler_mutex;
|
||||
@ -376,6 +377,8 @@ private:
|
||||
|
||||
void update_draw_state();
|
||||
|
||||
void check_heap_status();
|
||||
|
||||
/// returns primitive topology, index_count, allocated_verts, vertex_base_index, (offset in index buffer, index type)
|
||||
std::tuple<VkPrimitiveTopology, u32, u32, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
|
||||
public:
|
||||
|
@ -388,6 +388,26 @@ namespace vk
|
||||
image->current_layout = new_layout;
|
||||
}
|
||||
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout)
|
||||
{
|
||||
if (image->current_layout == new_layout) return;
|
||||
|
||||
VkImageAspectFlags flags = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
switch (image->info.format)
|
||||
{
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
flags = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
break;
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
change_image_layout(cmd, image->value, image->current_layout, new_layout, { flags, 0, 1, 0, 1 });
|
||||
image->current_layout = new_layout;
|
||||
}
|
||||
|
||||
void insert_texture_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout layout, VkImageSubresourceRange range)
|
||||
{
|
||||
VkImageMemoryBarrier barrier = {};
|
||||
@ -419,7 +439,9 @@ namespace vk
|
||||
{
|
||||
if (image->info.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
|
||||
{
|
||||
insert_texture_barrier(cmd, image->value, image->current_layout, { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 });
|
||||
VkImageAspectFlags aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
if (image->info.format != VK_FORMAT_D16_UNORM) aspect |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
insert_texture_barrier(cmd, image->value, image->current_layout, { aspect, 0, 1, 0, 1 });
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -82,6 +82,7 @@ namespace vk
|
||||
|
||||
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range);
|
||||
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
|
||||
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect);
|
||||
void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlagBits aspect, bool compatible_formats);
|
||||
|
||||
|
@ -716,4 +716,42 @@ namespace vk
|
||||
ui.update();
|
||||
}
|
||||
};
|
||||
|
||||
struct depth_scaling_pass : public overlay_pass
|
||||
{
|
||||
depth_scaling_pass()
|
||||
{
|
||||
vs_src =
|
||||
{
|
||||
"#version 450\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(location=0) out vec2 tc0;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
|
||||
" vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n"
|
||||
" gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n"
|
||||
" tc0 = coords[gl_VertexIndex % 4];\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
fs_src =
|
||||
{
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n"
|
||||
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
|
||||
"layout(location=0) in vec2 tc0;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_FragDepth = texture(fs0, tc0).x;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
renderpass_config.write_color = false;
|
||||
m_vertex_shader.id = 100006;
|
||||
m_fragment_shader.id = 100007;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -183,15 +183,16 @@ namespace vk
|
||||
break;
|
||||
}
|
||||
|
||||
//TODO: Read back stencil values (is this really necessary?)
|
||||
VkBufferImageCopy copyRegion = {};
|
||||
copyRegion.bufferOffset = 0;
|
||||
copyRegion.bufferRowLength = internal_width;
|
||||
copyRegion.bufferImageHeight = internal_height;
|
||||
copyRegion.imageSubresource = {aspect_flag, 0, 0, 1};
|
||||
copyRegion.imageSubresource = {aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1};
|
||||
copyRegion.imageOffset = {};
|
||||
copyRegion.imageExtent = {internal_width, internal_height, 1};
|
||||
|
||||
VkImageSubresourceRange subresource_range = { aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 };
|
||||
VkImageSubresourceRange subresource_range = { aspect_flag, 0, 1, 0, 1 };
|
||||
|
||||
VkImageLayout layout = vram_texture->current_layout;
|
||||
change_image_layout(cmd, vram_texture, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
|
||||
@ -235,11 +236,7 @@ namespace vk
|
||||
}
|
||||
else
|
||||
{
|
||||
auto typed_dst = (T *)pixels_dst;
|
||||
auto typed_src = (T *)pixels_src;
|
||||
|
||||
for (u32 px = 0; px < block_size; ++px)
|
||||
typed_dst[px] = typed_src[px];
|
||||
memcpy(pixels_dst, pixels_src, block_size * sizeof(T));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -273,38 +270,55 @@ namespace vk
|
||||
//We have to do our own byte swapping since the driver doesnt do it for us
|
||||
if (real_pitch == rsx_pitch)
|
||||
{
|
||||
switch (bpp)
|
||||
bool is_depth_format = true;
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, cpu_address_range >> 2, 1);
|
||||
break;
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, cpu_address_range >> 2, 1);
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid bpp %d", bpp);
|
||||
case 1:
|
||||
do_memory_transfer<u8, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u16, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u16, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u32, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u32, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 8:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u64, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u64, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 16:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u128, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u128, false>(pixels_dst, pixels_src);
|
||||
is_depth_format = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!is_depth_format)
|
||||
{
|
||||
switch (bpp)
|
||||
{
|
||||
default:
|
||||
LOG_ERROR(RSX, "Invalid bpp %d", bpp);
|
||||
case 1:
|
||||
do_memory_transfer<u8, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 2:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u16, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u16, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 4:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u32, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u32, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 8:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u64, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u64, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
case 16:
|
||||
if (pack_unpack_swap_bytes)
|
||||
do_memory_transfer<u128, true>(pixels_dst, pixels_src);
|
||||
else
|
||||
do_memory_transfer<u128, false>(pixels_dst, pixels_src);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -324,6 +338,16 @@ namespace vk
|
||||
}
|
||||
|
||||
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples_u, samples_v, pack_unpack_swap_bytes);
|
||||
|
||||
switch (vram_texture->info.format)
|
||||
{
|
||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
||||
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_dst, cpu_address_range >> 2, 1);
|
||||
break;
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_dst, cpu_address_range >> 2, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dma_buffer->unmap();
|
||||
@ -690,6 +714,7 @@ namespace vk
|
||||
VkImageAspectFlags aspect_flags;
|
||||
VkImageType image_type;
|
||||
VkImageViewType image_view_type;
|
||||
VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
u8 layer = 0;
|
||||
|
||||
switch (type)
|
||||
@ -724,10 +749,12 @@ namespace vk
|
||||
{
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
usage_flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
vk_format = m_formats_support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
break;
|
||||
case CELL_GCM_TEXTURE_DEPTH16:
|
||||
aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
usage_flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
vk_format = VK_FORMAT_D16_UNORM;
|
||||
break;
|
||||
default:
|
||||
@ -740,8 +767,7 @@ namespace vk
|
||||
image_type,
|
||||
vk_format,
|
||||
width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0);
|
||||
VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0);
|
||||
|
||||
mapping = apply_component_mapping_flags(gcm_format, flags, remap_vector);
|
||||
|
||||
@ -768,7 +794,7 @@ namespace vk
|
||||
{
|
||||
//TODO: Confirm byte swap patterns
|
||||
region.protect(utils::protection::no);
|
||||
region.set_unpack_swap_bytes(true);
|
||||
region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
no_access_range = region.get_min_max(no_access_range);
|
||||
}
|
||||
|
||||
@ -954,12 +980,16 @@ namespace vk
|
||||
return upload_texture(cmd, tex, m_rtts, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
|
||||
}
|
||||
|
||||
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
|
||||
std::tuple<bool, vk::image*, vk::image*, vk::image_view*> blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
|
||||
{
|
||||
struct blit_helper
|
||||
{
|
||||
vk::command_buffer* commands;
|
||||
blit_helper(vk::command_buffer *c) : commands(c) {}
|
||||
|
||||
vk::image* deferred_op_src = nullptr;
|
||||
vk::image* deferred_op_dst = nullptr;
|
||||
|
||||
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool /*interpolate*/, bool is_depth)
|
||||
{
|
||||
VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
@ -984,15 +1014,44 @@ namespace vk
|
||||
return;
|
||||
}
|
||||
|
||||
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_area.x2 - src_area.x1, src_area.y2 - src_area.y1,
|
||||
dst_area.x1, dst_area.y1, dst_area.x2 - dst_area.x1, dst_area.y2 - dst_area.y1, 1, aspect, src->info.format == dst->info.format);
|
||||
const auto src_width = src_area.x2 - src_area.x1;
|
||||
const auto src_height = src_area.y2 - src_area.y1;
|
||||
const auto dst_width = dst_area.x2 - dst_area.x1;
|
||||
const auto dst_height = dst_area.y2 - dst_area.y1;
|
||||
|
||||
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
|
||||
{
|
||||
if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format)
|
||||
{
|
||||
//Scaled depth scaling
|
||||
deferred_op_src = src;
|
||||
deferred_op_dst = dst;
|
||||
}
|
||||
}
|
||||
|
||||
if (!deferred_op_src)
|
||||
{
|
||||
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
|
||||
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format);
|
||||
}
|
||||
|
||||
change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers});
|
||||
}
|
||||
}
|
||||
helper(&cmd);
|
||||
|
||||
return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
|
||||
bool reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
|
||||
|
||||
if (helper.deferred_op_src == nullptr)
|
||||
return std::make_tuple(reply, nullptr, nullptr, nullptr);
|
||||
|
||||
VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 };
|
||||
auto tmp_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D,
|
||||
helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range);
|
||||
|
||||
auto src_view = tmp_view.get();
|
||||
m_discardable_storage.push_back(tmp_view);
|
||||
return std::make_tuple(reply, helper.deferred_op_dst, helper.deferred_op_src, src_view);
|
||||
}
|
||||
|
||||
const u32 get_unreleased_textures_count() const override
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "Emu/RSX/GCM.h"
|
||||
#include "Common/BufferUtils.h"
|
||||
#include "overlays.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
@ -363,4 +364,113 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void convert_le_f32_to_be_d24(void *dst, void *src, u32 row_length_in_texels, u32 num_rows)
|
||||
{
|
||||
const u32 num_pixels = row_length_in_texels * num_rows;
|
||||
verify(HERE), (num_pixels & 3) == 0;
|
||||
|
||||
const auto num_iterations = (num_pixels >> 2);
|
||||
|
||||
__m128i* dst_ptr = (__m128i*)dst;
|
||||
__m128i* src_ptr = (__m128i*)src;
|
||||
|
||||
const __m128 scale_vector = _mm_set1_ps(16777214.f);
|
||||
|
||||
#if defined (_MSC_VER) || defined (__SSSE3__)
|
||||
if (LIKELY(utils::has_ssse3()))
|
||||
{
|
||||
const __m128i swap_mask = _mm_set_epi8
|
||||
(
|
||||
0xF, 0xC, 0xD, 0xE,
|
||||
0xB, 0x8, 0x9, 0xA,
|
||||
0x7, 0x4, 0x5, 0x6,
|
||||
0x3, 0x0, 0x1, 0x2
|
||||
);
|
||||
|
||||
for (u32 n = 0; n < num_iterations; ++n)
|
||||
{
|
||||
const __m128i src_vector = _mm_loadu_si128(src_ptr);
|
||||
const __m128i result = _mm_cvtps_epi32(_mm_mul_ps((__m128&)src_vector, scale_vector));
|
||||
const __m128i shuffled_vector = _mm_shuffle_epi8(result, swap_mask);
|
||||
_mm_stream_si128(dst_ptr, shuffled_vector);
|
||||
++dst_ptr;
|
||||
++src_ptr;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const __m128i mask1 = _mm_set1_epi32(0xFF00FF00);
|
||||
const __m128i mask2 = _mm_set1_epi32(0x00FF0000);
|
||||
const __m128i mask3 = _mm_set1_epi32(0x000000FF);
|
||||
|
||||
for (u32 n = 0; n < num_iterations; ++n)
|
||||
{
|
||||
const __m128i src_vector = _mm_loadu_si128(src_ptr);
|
||||
const __m128i result = _mm_cvtps_epi32(_mm_mul_ps((__m128&)src_vector, scale_vector));
|
||||
|
||||
const __m128i v1 = _mm_and_si128(result, mask1);
|
||||
const __m128i v2 = _mm_and_si128(_mm_slli_epi32(result, 16), mask2);
|
||||
const __m128i v3 = _mm_and_si128(_mm_srli_epi32(result, 16), mask3);
|
||||
const __m128i shuffled_vector = _mm_or_si128(_mm_or_si128(v1, v2), v3);
|
||||
|
||||
_mm_stream_si128(dst_ptr, shuffled_vector);
|
||||
++dst_ptr;
|
||||
++src_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
void convert_le_d24x8_to_be_d24x8(void *dst, void *src, u32 row_length_in_texels, u32 num_rows)
|
||||
{
|
||||
const u32 num_pixels = row_length_in_texels * num_rows;
|
||||
verify(HERE), (num_pixels & 3) == 0;
|
||||
|
||||
const auto num_iterations = (num_pixels >> 2);
|
||||
|
||||
__m128i* dst_ptr = (__m128i*)dst;
|
||||
__m128i* src_ptr = (__m128i*)src;
|
||||
|
||||
#if defined (_MSC_VER) || defined (__SSSE3__)
|
||||
if (LIKELY(utils::has_ssse3()))
|
||||
{
|
||||
const __m128i swap_mask = _mm_set_epi8
|
||||
(
|
||||
0xF, 0xC, 0xD, 0xE,
|
||||
0xB, 0x8, 0x9, 0xA,
|
||||
0x7, 0x4, 0x5, 0x6,
|
||||
0x3, 0x0, 0x1, 0x2
|
||||
);
|
||||
|
||||
for (u32 n = 0; n < num_iterations; ++n)
|
||||
{
|
||||
const __m128i src_vector = _mm_loadu_si128(src_ptr);
|
||||
const __m128i shuffled_vector = _mm_shuffle_epi8(src_vector, swap_mask);
|
||||
_mm_stream_si128(dst_ptr, shuffled_vector);
|
||||
++dst_ptr;
|
||||
++src_ptr;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const __m128i mask1 = _mm_set1_epi32(0xFF00FF00);
|
||||
const __m128i mask2 = _mm_set1_epi32(0x00FF0000);
|
||||
const __m128i mask3 = _mm_set1_epi32(0x000000FF);
|
||||
|
||||
for (u32 n = 0; n < num_iterations; ++n)
|
||||
{
|
||||
const __m128i src_vector = _mm_loadu_si128(src_ptr);
|
||||
const __m128i v1 = _mm_and_si128(src_vector, mask1);
|
||||
const __m128i v2 = _mm_and_si128(_mm_slli_epi32(src_vector, 16), mask2);
|
||||
const __m128i v3 = _mm_and_si128(_mm_srli_epi32(src_vector, 16), mask3);
|
||||
const __m128i shuffled_vector = _mm_or_si128(_mm_or_si128(v1, v2), v3);
|
||||
|
||||
_mm_stream_si128(dst_ptr, shuffled_vector);
|
||||
++dst_ptr;
|
||||
++src_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -169,6 +169,9 @@ namespace rsx
|
||||
void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
|
||||
void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
|
||||
|
||||
void convert_le_f32_to_be_d24(void *dst, void *src, u32 row_length_in_texels, u32 num_rows);
|
||||
void convert_le_d24x8_to_be_d24x8(void *dst, void *src, u32 row_length_in_texels, u32 num_rows);
|
||||
|
||||
void fill_scale_offset_matrix(void *dest_, bool transpose,
|
||||
float offset_x, float offset_y, float offset_z,
|
||||
float scale_x, float scale_y, float scale_z);
|
||||
|
Loading…
x
Reference in New Issue
Block a user