From fc18e17ba64ce49e47285b94d85cc1999060dd0b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 4 Jun 2018 19:57:16 +0300 Subject: [PATCH] vk: Implement depth scaling using hardware blit/copy engines - Removes the old depth scaling using an overlay. It was never going to work properly due to per-pixel stencil writes being unavailable - TODO: Preserve stencil buffer during ARGB8->D32S8 shader conversion pass --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 9 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 27 +++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 69 +---------- rpcs3/Emu/RSX/VK/VKGSRender.h | 1 - rpcs3/Emu/RSX/VK/VKHelpers.cpp | 47 ++++++- rpcs3/Emu/RSX/VK/VKHelpers.h | 29 ++++- rpcs3/Emu/RSX/VK/VKOverlays.h | 142 ++++++--------------- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 18 +-- rpcs3/Emu/RSX/VK/VKTexture.cpp | 190 ++++++++++++++++++++++------- rpcs3/Emu/RSX/VK/VKTextureCache.h | 77 ++++-------- rpcs3/Emu/RSX/rsx_cache.h | 2 +- 11 files changed, 324 insertions(+), 287 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 5118f83a63..248253af93 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1632,8 +1632,13 @@ void GLGSRender::synchronize_buffers() bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) { - m_samplers_dirty.store(true); - return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts); + if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts)) + { + m_samplers_dirty.store(true); + return true; + } + + return false; } void GLGSRender::notify_tile_unbound(u32 tile) diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index bd2a1fb981..9c7d1afc5f 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -84,8 +84,29 @@ namespace gl s32 old_fbo = 0; glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); - filter interp = linear_interpolation ? filter::linear : filter::nearest; - GLenum attachment = is_depth_copy ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; + filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest; + GLenum attachment; + gl::buffers target; + + if (is_depth_copy) + { + if (src->get_internal_format() == gl::texture::internal_format::depth16 || + dst->get_internal_format() == gl::texture::internal_format::depth16) + { + attachment = GL_DEPTH_ATTACHMENT; + target = gl::buffers::depth; + } + else + { + attachment = GL_DEPTH_STENCIL_ATTACHMENT; + target = gl::buffers::depth_stencil; + } + } + else + { + attachment = GL_COLOR_ATTACHMENT0; + target = gl::buffers::color; + } blit_src.bind(); glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0); @@ -99,7 +120,7 @@ namespace gl if (scissor_test_enabled) glDisable(GL_SCISSOR_TEST); - blit_src.blit(blit_dst, src_rect, dst_rect, is_depth_copy ? buffers::depth : buffers::color, interp); + blit_src.blit(blit_dst, src_rect, dst_rect, target, interp); if (xfer_info.dst_is_typeless) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 3ed4cc81ff..4b7f7f2a64 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -634,9 +634,6 @@ VKGSRender::VKGSRender() : GSRender() m_depth_converter.reset(new vk::depth_convert_pass()); m_depth_converter->create(*m_device); - m_depth_scaler.reset(new vk::depth_scaling_pass()); - m_depth_scaler->create(*m_device); - m_attachment_clear_pass.reset(new vk::attachment_clear_pass()); m_attachment_clear_pass->create(*m_device); @@ -764,10 +761,6 @@ VKGSRender::~VKGSRender() m_depth_converter->destroy(); m_depth_converter.reset(); - //Depth surface blitter - m_depth_scaler->destroy(); - m_depth_scaler.reset(); - //Attachment clear helper m_attachment_clear_pass->destroy(); m_attachment_clear_pass.reset(); @@ -1164,24 +1157,11 @@ void VKGSRender::end() const auto dst_w = std::get<2>(region); const auto dst_h = std::get<3>(region); - if (!is_depth || (src_w == dst_w && src_h == dst_h)) - { - const VkImageAspectFlags aspect = surface->attachment_aspect_flag; + const VkImageAspectFlags aspect = surface->attachment_aspect_flag; - vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value, - surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h, - 0, 0, dst_w, dst_h, 1, aspect, true); - } - else - { - auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, surface->info.format, 0); - auto render_pass = m_render_passes[rp]; - - vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - m_depth_scaler->run(*m_current_command_buffer, { 0, 0, (f32)src_w, (f32)src_h }, { 0, 0, (f32)dst_w, (f32)dst_h }, surface, - surface->old_contents, static_cast(surface->old_contents)->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean); - } + vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value, + surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h, + 0, 0, dst_w, dst_h, 1, aspect, true, VK_FILTER_LINEAR, surface->info.format, surface->old_contents->info.format); } }; @@ -2048,7 +2028,6 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) m_attachment_clear_pass->free_resources(); m_depth_converter->free_resources(); - m_depth_scaler->free_resources(); m_ui_renderer->free_resources(); ctx->buffer_views_to_clean.clear(); @@ -3311,46 +3290,8 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst //Stop all parallel operations until this is finished std::lock_guard lock(m_secondary_cb_guard); - auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); - m_current_command_buffer->begin(); - - if (result.succeeded) + if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer)) { - bool require_flush = false; - if (result.deferred) - { - //Requires manual scaling; depth/stencil surface - auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, result.dst_image->info.format, 0); - auto render_pass = m_render_passes[rp]; - - auto old_src_layout = result.src_image->current_layout; - auto old_dst_layout = result.dst_image->current_layout; - - vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - - // TODO: Insets - const areaf src_area = { 0, 0, (f32)result.src_image->width(), (f32)result.src_image->height() }; - const areaf dst_area = { 0, 0, (f32)result.dst_image->width(), (f32)result.dst_image->height() }; - m_depth_scaler->run(*m_current_command_buffer, src_area, dst_area, result.dst_image, result.src_image, - result.src_view, render_pass, m_framebuffers_to_clean); - - vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout); - vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout); - - require_flush = true; - } - - if (result.dst_image) - { - if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size, - *m_current_command_buffer, m_swapchain->get_graphics_queue())) - require_flush = true; - } - - if (require_flush) - flush_command_queue(); - m_samplers_dirty.store(true); return true; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 6564281126..da28fa6e0a 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -261,7 +261,6 @@ private: std::unique_ptr m_text_writer; std::unique_ptr m_depth_converter; - std::unique_ptr m_depth_scaler; std::unique_ptr m_ui_renderer; std::unique_ptr m_attachment_clear_pass; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 89959852b8..128b78fe19 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -6,10 +6,12 @@ namespace vk { context* g_current_vulkan_ctx = nullptr; render_device g_current_renderer; + driver_vendor g_driver_vendor = driver_vendor::unknown; std::shared_ptr g_mem_allocator = nullptr; std::unique_ptr g_null_texture; std::unique_ptr g_null_image_view; + std::unordered_map> g_typeless_textures; VkSampler g_null_sampler = nullptr; @@ -148,7 +150,7 @@ namespace vk if (g_null_image_view) return g_null_image_view->value; - g_null_texture.reset(new image(g_current_renderer, get_memory_mapping(g_current_renderer.gpu()).device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + g_null_texture.reset(new image(g_current_renderer, g_current_renderer.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0)); @@ -167,6 +169,25 @@ namespace vk return g_null_image_view->value; } + vk::image* get_typeless_helper(VkFormat format) + { + auto create_texture = [&]() + { + return new vk::image(g_current_renderer, g_current_renderer.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, format, 4096, 4096, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0); + }; + + auto &ptr = g_typeless_textures[format]; + if (!ptr) + { + auto _img = create_texture(); + ptr.reset(_img); + } + + return ptr.get(); + } + void acquire_global_submit_lock() { g_submit_mutex.lock(); @@ -180,7 +201,9 @@ namespace vk void destroy_global_resources() { g_null_texture.reset(); - g_null_image_view .reset(); + g_null_image_view.reset(); + + g_typeless_textures.clear(); if (g_null_sampler) vkDestroySampler(g_current_renderer, g_null_sampler, nullptr); @@ -222,6 +245,7 @@ namespace vk g_drv_disable_fence_reset = false; g_num_processed_frames = 0; g_num_total_frames = 0; + g_driver_vendor = driver_vendor::unknown; const auto gpu_name = g_current_renderer.gpu().name(); @@ -240,14 +264,33 @@ namespace vk //Disable fence reset for proprietary driver and delete+initialize a new fence instead if (gpu_name.find("Radeon") != std::string::npos) { + g_driver_vendor = driver_vendor::AMD; g_drv_disable_fence_reset = true; } //Nvidia cards are easily susceptible to NaN poisoning if (gpu_name.find("NVIDIA") != std::string::npos || gpu_name.find("GeForce") != std::string::npos) { + g_driver_vendor = driver_vendor::NVIDIA; g_drv_sanitize_fp_values = true; } + + if (g_driver_vendor == driver_vendor::unknown) + { + if (gpu_name.find("RADV") != std::string::npos) + { + g_driver_vendor = driver_vendor::RADV; + } + else + { + LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name); + } + } + } + + driver_vendor get_driver_vendor() + { + return g_driver_vendor; } bool emulate_primitive_restart(rsx::primitive_type type) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 6bb7395ef9..3d7156efae 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -66,6 +66,7 @@ namespace vk struct image; struct vk_data_heap; class mem_allocator_base; + enum driver_vendor; vk::context *get_current_thread_ctx(); void set_current_thread_ctx(const vk::context &ctx); @@ -80,6 +81,7 @@ namespace vk bool emulate_primitive_restart(rsx::primitive_type type); bool sanitize_fp_values(); bool fence_reset_disabled(); + driver_vendor get_driver_vendor(); VkComponentMapping default_component_map(); VkComponentMapping apply_swizzle_remap(const std::array& base_remap, const std::pair, std::array>& remap_vector); @@ -89,6 +91,7 @@ namespace vk VkSampler null_sampler(); VkImageView null_image_view(vk::command_buffer&); + image* get_typeless_helper(VkFormat format); //Sync helpers around vkQueueSubmit void acquire_global_submit_lock(); @@ -109,8 +112,14 @@ namespace vk void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout); - void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect); - void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats); + + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, + const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, + VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF); + + void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, + u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, + VkImageAspectFlags aspect, bool compatible_formats, VkFilter filter = VK_FILTER_LINEAR, VkFormat src_format = VK_FORMAT_UNDEFINED, VkFormat dst_format = VK_FORMAT_UNDEFINED); std::pair get_compatible_surface_format(rsx::surface_color_format color_format); size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count); @@ -134,6 +143,14 @@ namespace vk void die_with_error(const char* faulting_addr, VkResult error_code); + enum driver_vendor + { + unknown, + AMD, + NVIDIA, + RADV + }; + struct memory_type_mapping { uint32_t host_visible_coherent; @@ -328,7 +345,7 @@ namespace vk public: using mem_handle_t = void *; - mem_allocator_base(VkDevice dev, VkPhysicalDevice pdev) : m_device(dev) {}; + mem_allocator_base(VkDevice dev, VkPhysicalDevice /*pdev*/) : m_device(dev) {}; ~mem_allocator_base() {}; virtual void destroy() = 0; @@ -386,7 +403,7 @@ namespace vk vmaFreeMemory(m_allocator, static_cast(mem_handle)); } - void *map(mem_handle_t mem_handle, u64 offset, u64 size) override + void *map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override { void *data = nullptr; @@ -432,7 +449,7 @@ namespace vk void destroy() override {}; - mem_handle_t alloc(u64 block_sz, u64 alignment, uint32_t memory_type_index) override + mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, uint32_t memory_type_index) override { VkDeviceMemory memory; VkMemoryAllocateInfo info = {}; @@ -466,7 +483,7 @@ namespace vk return (VkDeviceMemory)mem_handle; } - u64 get_vk_device_memory_offset(mem_handle_t mem_handle) + u64 get_vk_device_memory_offset(mem_handle_t /*mem_handle*/) { return 0; } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index d576b40ebd..a2dff18d78 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -76,22 +76,27 @@ namespace vk //Reserve descriptor pools m_descriptor_pool.create(*m_device, descriptor_pool_sizes, 2); - VkDescriptorSetLayoutBinding bindings[2] = {}; + VkDescriptorSetLayoutBinding bindings[3] = {}; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[0].binding = 0; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; bindings[1].binding = 1; + bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[2].descriptorCount = 1; + bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[2].binding = 2; + VkDescriptorSetLayoutCreateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; infos.pBindings = bindings; - infos.bindingCount = 2; + infos.bindingCount = 3; CHECK_RESULT(vkCreateDescriptorSetLayout(*m_device, &infos, nullptr, &m_descriptor_layout)); @@ -116,8 +121,9 @@ namespace vk virtual std::vector get_fragment_inputs() { std::vector fs_inputs; - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 0, "fs0" }); - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 1, "static_data" }); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, "static_data" }); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 1, "fs0" }); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 2, "fs1" }); return fs_inputs; } @@ -209,7 +215,7 @@ namespace vk return result; } - void load_program(vk::command_buffer cmd, VkRenderPass pass, VkImageView src) + void load_program(vk::command_buffer cmd, VkRenderPass pass, const std::vector& src) { vk::glsl::program *program = nullptr; auto found = m_program_cache.find(pass); @@ -233,14 +239,18 @@ namespace vk { m_sampler = std::make_unique(*m_device, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); + VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); } update_uniforms(program); - VkDescriptorImageInfo info = { m_sampler->value, src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; - program->bind_uniform(info, "fs0", m_descriptor_set); - program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 1, m_descriptor_set); + program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set); + + for (int n = 0; n < src.size(); ++n) + { + VkDescriptorImageInfo info = { m_sampler->value, src[n], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }; + program->bind_uniform(info, "fs" + std::to_string(n), m_descriptor_set); + } vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr); @@ -343,7 +353,7 @@ namespace vk vkCmdSetScissor(cmd, 0, 1, &vs); } - void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass) + void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, const std::vector& src, VkRenderPass render_pass) { load_program(cmd, render_pass, src); set_up_viewport(cmd, w, h); @@ -362,12 +372,18 @@ namespace vk vkCmdEndRenderPass(cmd); } - void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, VkImageView src, VkRenderPass render_pass, std::list>& framebuffer_resources) + void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, const std::vector& src, VkRenderPass render_pass, std::list>& framebuffer_resources) { vk::framebuffer *fbo = get_framebuffer(target, render_pass, framebuffer_resources); run(cmd, w, h, fbo, src, render_pass); } + void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, VkImageView src, VkRenderPass render_pass, std::list>& framebuffer_resources) + { + std::vector views = { src }; + run(cmd, w, h, target, views, render_pass, framebuffer_resources); + } + void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, vk::image_view* src, VkRenderPass render_pass, std::list>& framebuffer_resources) { run(cmd, w, h, target, src->value, render_pass, framebuffer_resources); @@ -397,7 +413,8 @@ namespace vk { "#version 420\n" "#extension GL_ARB_separate_shader_objects : enable\n" - "layout(set=0, binding=0) uniform sampler2D fs0;\n" + "#extension GL_ARB_shader_stencil_export : enable\n\n" + "layout(set=0, binding=1) uniform sampler2D fs0;\n" "layout(location=0) in vec2 tc0;\n" "\n" "void main()\n" @@ -409,6 +426,7 @@ namespace vk renderpass_config.set_depth_mask(true); renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS); + renderpass_config.enable_stencil_test(VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_COMPARE_OP_ALWAYS, 0xFF, 0xFF); m_vertex_shader.id = 100002; m_fragment_shader.id = 100003; @@ -438,7 +456,7 @@ namespace vk "#version 450\n" "#extension GL_ARB_separate_shader_objects : enable\n" "layout(location=0) in vec4 in_pos;\n" - "layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n" + "layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n" "layout(location=0) out vec2 tc0;\n" "layout(location=1) out vec4 color;\n" "layout(location=2) out vec4 parameters;\n" @@ -459,7 +477,7 @@ namespace vk { "#version 420\n" "#extension GL_ARB_separate_shader_objects : enable\n" - "layout(set=0, binding=0) uniform sampler2D fs0;\n" + "layout(set=0, binding=1) uniform sampler2D fs0;\n" "layout(location=0) in vec2 tc0;\n" "layout(location=1) in vec4 color;\n" "layout(location=2) in vec4 parameters;\n" @@ -704,95 +722,13 @@ namespace vk break; } - overlay_pass::run(cmd, w, h, target, src, render_pass); + overlay_pass::run(cmd, w, h, target, { src }, render_pass); } ui.update(); } }; - struct depth_scaling_pass : public overlay_pass - { - areaf src_area; - areaf dst_area; - u16 src_width; - u16 src_height; - - depth_scaling_pass() - { - vs_src = - { - "#version 450\n" - "#extension GL_ARB_separate_shader_objects : enable\n" - "layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n" - "layout(location=0) out vec2 tc0;\n" - "\n" - "void main()\n" - "{\n" - " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" - " vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n" - " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" - " tc0 = coords[gl_VertexIndex % 4] * regs[0].xy + regs[0].zw;\n" - "}\n" - }; - - fs_src = - { - "#version 420\n" - "#extension GL_ARB_separate_shader_objects : enable\n" - "layout(set=0, binding=0) uniform sampler2D fs0;\n" - "layout(location=0) in vec2 tc0;\n" - "\n" - "void main()\n" - "{\n" - " gl_FragDepth = texture(fs0, tc0).x;\n" - "}\n" - }; - - renderpass_config.set_depth_mask(true); - renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS); - - m_vertex_shader.id = 100006; - m_fragment_shader.id = 100007; - } - - void update_uniforms(vk::glsl::program* /*program*/) override - { - m_ubo_offset = (u32)m_ubo.alloc<256>(128); - auto dst = (f32*)m_ubo.map(m_ubo_offset, 128); - dst[0] = f32(src_area.x2 - src_area.x1) / src_width; - dst[1] = f32(src_area.y2 - src_area.y1) / src_height; - dst[2] = src_area.x1 / f32(src_area.x2 - src_area.x1); - dst[3] = src_area.y1 / f32(src_area.y2 - src_area.y1); - m_ubo.unmap(); - } - - void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override - { - VkRect2D region = { { s32(dst_area.x1), s32(dst_area.y1) },{ u32(dst_area.x2 - dst_area.x1), u32(dst_area.y2 - dst_area.y1) } }; - vkCmdSetScissor(cmd, 0, 1, ®ion); - - VkViewport vp{}; - vp.x = dst_area.x1; - vp.y = dst_area.y1; - vp.width = f32(region.extent.width); - vp.height = f32(region.extent.height); - vp.minDepth = 0.f; - vp.maxDepth = 1.f; - vkCmdSetViewport(cmd, 0, 1, &vp); - } - - void run(vk::command_buffer &cmd, const areaf& src_rect, const areaf& dst_rect, vk::image* target, vk::image* src, vk::image_view* src_view, VkRenderPass render_pass, std::list>& framebuffer_resources) - { - src_area = src_rect; - dst_area = dst_rect; - src_width = src->width(); - src_height = src->height(); - - overlay_pass::run(cmd, target->width(), target->height(), target, src_view, render_pass, framebuffer_resources); - } - }; - struct attachment_clear_pass : public overlay_pass { color4f clear_color = { 0.f, 0.f, 0.f, 0.f }; @@ -805,7 +741,7 @@ namespace vk { "#version 450\n" "#extension GL_ARB_separate_shader_objects : enable\n" - "layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n" + "layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n" "layout(location=0) out vec2 tc0;\n" "layout(location=1) out vec4 color;\n" "layout(location=2) out vec4 mask;\n" @@ -825,7 +761,7 @@ namespace vk { "#version 420\n" "#extension GL_ARB_separate_shader_objects : enable\n" - "layout(set=0, binding=0) uniform sampler2D fs0;\n" + "layout(set=0, binding=1) uniform sampler2D fs0;\n" "layout(location=0) in vec2 tc0;\n" "layout(location=1) in vec4 color;\n" "layout(location=2) in vec4 mask;\n" diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 232a9cc842..9547902903 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -19,7 +19,7 @@ namespace vk u16 surface_height = 0; VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; - std::unordered_map> views; + std::unordered_multimap> views; u64 frame_tag = 0; //frame id when invalidated, 0 if not invalid @@ -40,12 +40,16 @@ namespace vk mipmaps, layers, samples, initial_layout, tiling, usage, image_flags) {} - vk::image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap) + vk::image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, + VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) { - auto found = views.find(remap_encoding); - if (found != views.end()) + auto found = views.equal_range(remap_encoding); + for (auto It = found.first; It != found.second; ++It) { - return found->second.get(); + if (It->second->info.subresourceRange.aspectMask & mask) + { + return It->second.get(); + } } VkComponentMapping real_mapping = vk::apply_swizzle_remap @@ -55,10 +59,10 @@ namespace vk ); auto view = std::make_unique(*vk::get_current_renderer(), value, VK_IMAGE_VIEW_TYPE_2D, info.format, - real_mapping, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT))); + real_mapping, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & mask)); auto result = view.get(); - views[remap_encoding] = std::move(view); + views.emplace(remap_encoding, std::move(view)); return result; } diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index bc8895a9f6..de89466d91 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -55,30 +55,39 @@ namespace vk } } - void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect) + void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, + const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, + VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask) { + // NOTE: src_aspect should match dst_aspect according to spec but drivers seem to work just fine with the mismatch + // TODO: Implement separate pixel transfer for drivers that refuse this workaround + VkImageSubresourceLayers a_src = {}, a_dst = {}; - a_src.aspectMask = aspect; + a_src.aspectMask = src_aspect & src_transfer_mask; a_src.baseArrayLayer = 0; a_src.layerCount = 1; a_src.mipLevel = 0; a_dst = a_src; + a_dst.aspectMask = dst_aspect & dst_transfer_mask; VkImageCopy rgn = {}; rgn.extent.depth = 1; - rgn.extent.width = width; - rgn.extent.height = height; - rgn.dstOffset = { 0, 0, 0 }; - rgn.srcOffset = { 0, 0, 0 }; + rgn.extent.width = u32(src_rect.x2 - src_rect.x1); + rgn.extent.height = u32(src_rect.y2 - src_rect.y1); + rgn.dstOffset = { dst_rect.x1, dst_rect.y1, 0 }; + rgn.srcOffset = { src_rect.x1, src_rect.y1, 0 }; rgn.srcSubresource = a_src; rgn.dstSubresource = a_dst; - if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) - change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) - change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (srcLayout != preferred_src_format) + change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); + + if (dstLayout != preferred_dst_format) + change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { @@ -88,11 +97,11 @@ namespace vk rgn.dstSubresource.mipLevel++; } - if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) - change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (srcLayout != preferred_src_format) + change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) - change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (dstLayout != preferred_dst_format) + change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); } void copy_scaled_image(VkCommandBuffer cmd, @@ -100,7 +109,8 @@ namespace vk VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, - u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats) + u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats, + VkFilter filter, VkFormat src_format, VkFormat dst_format) { VkImageSubresourceLayers a_src = {}, a_dst = {}; a_src.aspectMask = aspect; @@ -110,22 +120,129 @@ namespace vk a_dst = a_src; + auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + //TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions - if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) - change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (srcLayout != preferred_src_format) + change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) - change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (dstLayout != preferred_dst_format) + change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - if (src_width != dst_width || src_height != dst_height || mipmaps > 1 || !compatible_formats) + if (compatible_formats && src_width == dst_width && src_height != dst_height) { - if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) - { - //Most depth/stencil formats cannot be scaled using hw blit - LOG_ERROR(RSX, "Cannot perform scaled blit for depth/stencil images"); - return; - } + VkImageCopy copy_rgn; + copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 }; + copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 }; + copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 }; + copy_rgn.extent = { src_width, src_height, 1 }; + vkCmdCopyImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, ©_rgn); + } + else if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) + { + //Most depth/stencil formats cannot be scaled using hw blit + if (src_format == VK_FORMAT_UNDEFINED || dst_width > 4096 || (src_height + dst_height) > 4096) + { + LOG_ERROR(RSX, "Could not blit depth/stencil image. src_fmt=0x%x, src=%dx%d, dst=%dx%d", + (u32)src_format, src_width, src_height, dst_width, dst_height); + } + else + { + auto stretch_image_typeless = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless, + const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) + { + const u32 src_w = u32(src_rect.x2 - src_rect.x1); + const u32 src_h = u32(src_rect.y2 - src_rect.y1); + const u32 dst_w = u32(dst_rect.x2 - dst_rect.x1); + const u32 dst_h = u32(dst_rect.y2 - dst_rect.y1); + + // Drivers are not very accepting of aspect COLOR -> aspect DEPTH or aspect STENCIL separately + // However, this works okay for D24S8 (nvidia-only format) + // To work around the problem we use the non-existent DEPTH/STENCIL/DEPTH_STENCIL aspect of the color texture instead + VkImageAspectFlags typeless_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + if (transfer_flags == VK_IMAGE_ASPECT_DEPTH_BIT || transfer_flags == VK_IMAGE_ASPECT_STENCIL_BIT) + { + // NOTE: This path is only taken for VK_FORMAT_D32_SFLOAT_S8_UINT as there is no 36-bit format available + // On Nvidia, the default format is VK_FORMAT_D24_UNORM_S8_UINT which does not require this workaround + switch (vk::get_driver_vendor()) + { + case driver_vendor::AMD: + // Quirks: This workaround allows proper transfer of stencil data + case driver_vendor::NVIDIA: + // Quirks: This workaround allows only transfer of depth data, stencil is ignored + typeless_aspect = aspect; + break; + default: + break; + } + } + + //1. Copy unscaled to typeless surface + copy_image(cmd, src, typeless, preferred_src_format, VK_IMAGE_LAYOUT_GENERAL, + src_rect, { 0, 0, (s32)src_w, (s32)src_h }, 1, aspect, typeless_aspect, transfer_flags, 0xFF); + + //2. Blit typeless surface to self + copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST); + + //3. Copy back the aspect bits + copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format, + {0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, typeless_aspect, aspect, 0xFF, transfer_flags); + }; + + areai src_rect = { (s32)src_x_offset, (s32)src_y_offset, s32(src_x_offset + src_width), s32(src_y_offset + src_height) }; + areai dst_rect = { (s32)dst_x_offset, (s32)dst_y_offset, s32(dst_x_offset + dst_width), s32(dst_y_offset + dst_height) }; + + switch (src_format) + { + case VK_FORMAT_D16_UNORM: + { + auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM); + change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); + stretch_image_typeless(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT); + break; + } + case VK_FORMAT_D24_UNORM_S8_UINT: + { + auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM); + change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); + stretch_image_typeless(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + break; + } + case VK_FORMAT_D32_SFLOAT_S8_UINT: + { + // NOTE: Typeless transfer (Depth/Stencil->Equivalent Color->Depth/Stencil) of single aspects does not work on AMD when done from a non-depth texture + // Since the typeless transfer itself violates spec, the only way to make it work is to use a D32S8 intermediate + // Copy from src->intermediate then intermediate->dst for each aspect separately + + auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT); + auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UINT); + change_image_layout(cmd, typeless_depth, VK_IMAGE_LAYOUT_GENERAL); + change_image_layout(cmd, typeless_stencil, VK_IMAGE_LAYOUT_GENERAL); + + auto intermediate = vk::get_typeless_helper(VK_FORMAT_D32_SFLOAT_S8_UINT); + change_image_layout(cmd, intermediate, preferred_dst_format); + + const areai intermediate_rect = { 0, 0, (s32)dst_width, (s32)dst_height }; + const VkImageAspectFlags depth_stencil = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + // Blit DEPTH aspect + stretch_image_typeless(src, intermediate->value, typeless_depth->value, src_rect, intermediate_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT); + copy_image(cmd, intermediate->value, dst, preferred_dst_format, preferred_dst_format, intermediate_rect, dst_rect, 1, depth_stencil, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT); + + // Blit STENCIL aspect + stretch_image_typeless(src, intermediate->value, typeless_stencil->value, src_rect, intermediate_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT); + copy_image(cmd, intermediate->value, dst, preferred_dst_format, preferred_dst_format, intermediate_rect, dst_rect, 1, depth_stencil, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT, VK_IMAGE_ASPECT_STENCIL_BIT); + break; + } + } + } + } + else + { VkImageBlit rgn = {}; rgn.srcOffsets[0] = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 }; rgn.srcOffsets[1] = { (int32_t)(src_width + src_x_offset), (int32_t)(src_height + src_y_offset), 1 }; @@ -136,29 +253,18 @@ namespace vk for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { - vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR); + vkCmdBlitImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, &rgn, filter); rgn.srcSubresource.mipLevel++; rgn.dstSubresource.mipLevel++; } } - else - { - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 }; - copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 }; - copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 }; - copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 }; - copy_rgn.extent = { src_width, src_height, 1 }; - vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_rgn); - } + if (srcLayout != preferred_src_format) + change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) - change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - - if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) - change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (dstLayout != preferred_dst_format) + change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); } void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index fc9a7fff2d..6034828e97 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1039,18 +1039,6 @@ namespace vk } } - public: - - struct vk_blit_op_result : public blit_op_result - { - bool deferred = false; - vk::image *src_image = nullptr; - vk::image *dst_image = nullptr; - vk::image_view *src_view = nullptr; - - using blit_op_result::blit_op_result; - }; - public: void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap) @@ -1133,14 +1121,14 @@ namespace vk return upload_texture(cmd, tex, m_rtts, cmd, const_cast(m_submit_queue)); } - vk::image *upload_image_simple(vk::command_buffer& /*cmd*/, u32 address, u32 width, u32 height) + vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height) { //Uploads a linear memory range as a BGRA8 texture auto image = std::make_unique(*m_device, m_memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, - width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_PREINITIALIZED, VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 0); VkImageSubresource subresource{}; @@ -1170,6 +1158,8 @@ namespace vk image->memory->unmap(); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + auto result = image.get(); const u32 resource_memory = width * height * 4; //Rough approximate m_discardable_storage.push_back(image); @@ -1179,22 +1169,16 @@ namespace vk return result; } - vk_blit_op_result blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) + bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) { struct blit_helper { vk::command_buffer* commands; + VkFormat format; blit_helper(vk::command_buffer *c) : commands(c) {} - bool deferred = false; - vk::image* deferred_op_src = nullptr; - vk::image* deferred_op_dst = nullptr; - - void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool /*interpolate*/, bool is_depth, const rsx::typeless_xfer& /*typeless*/) + void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& /*typeless*/) { - VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT; - if (is_depth) aspect = (VkImageAspectFlagBits)(src->info.format == VK_FORMAT_D16_UNORM ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); - //Checks if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1) { @@ -1214,54 +1198,35 @@ namespace vk return; } + const auto aspect = vk::get_aspect_flags(src->info.format); const auto src_width = src_area.x2 - src_area.x1; const auto src_height = src_area.y2 - src_area.y1; const auto dst_width = dst_area.x2 - dst_area.x1; const auto dst_height = dst_area.y2 - dst_area.y1; - deferred_op_src = src; - deferred_op_dst = dst; - - if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) - { - if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format) - { - //Scaled depth scaling - deferred = true; - } - } - - if (!deferred) - { - copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, - dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format); - } + copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, + dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format, + interpolate? VK_FILTER_LINEAR : VK_FILTER_NEAREST, src->info.format, dst->info.format); change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers}); + format = dst->info.format; } } helper(&cmd); auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast(m_submit_queue)); - vk_blit_op_result result = reply.succeeded; - result.real_dst_address = reply.real_dst_address; - result.real_dst_size = reply.real_dst_size; - result.is_depth = reply.is_depth; - result.deferred = helper.deferred; - result.dst_image = helper.deferred_op_dst; - result.src_image = helper.deferred_op_src; + if (reply.succeeded) + { + if (reply.real_dst_size) + { + flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue); + } - if (!helper.deferred) - return result; + return true; + } - VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 }; - auto tmp_view = std::make_unique(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D, - helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range); - - result.src_view = tmp_view.get(); - m_discardable_storage.push_back(tmp_view); - return result; + return false; } const u32 get_unreleased_textures_count() const override diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 3dfcfe770a..86a21f34e0 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -502,7 +502,7 @@ namespace rsx entries.push_back(tmp); } - if ((entry_count = entries.size()) <= 2) + if ((entry_count = (u32)entries.size()) <= 2) return; root.rewind();