From d25ba03e82c2f386568303015b67f2a451cf197e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 10 Mar 2020 14:05:50 +0300 Subject: [PATCH] vk: Lazy evaluate renderpass scope - Spamming the driver with renderpass open/close cycles is bad for performance. --- rpcs3/Emu/RSX/VK/VKCompute.h | 7 ++++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 45 +++++++++++------------- rpcs3/Emu/RSX/VK/VKGSRender.h | 5 +-- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 25 ++++++++++++++ rpcs3/Emu/RSX/VK/VKOverlays.h | 13 ++----- rpcs3/Emu/RSX/VK/VKRenderPass.cpp | 57 +++++++++++++++++++++++++++++++ rpcs3/Emu/RSX/VK/VKRenderPass.h | 7 ++++ rpcs3/Emu/RSX/VK/VKTextOut.h | 15 ++------ rpcs3/Emu/RSX/VK/VKTexture.cpp | 31 +++++++++++++++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 6 ++++ 10 files changed, 160 insertions(+), 51 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 63ceea4d47..2ea4b37765 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -1,5 +1,6 @@ #pragma once #include "VKHelpers.h" +#include "VKRenderPass.h" #include "Utilities/StrUtil.h" #define VK_MAX_COMPUTE_TASKS 4096 // Max number of jobs per frame @@ -201,6 +202,12 @@ namespace vk void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) { + // CmdDispatch is outside renderpass scope only + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + load_program(cmd); vkCmdDispatch(cmd, invocations_x, invocations_y, invocations_z); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 7b5e4dcad6..d277b61bba 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1062,31 +1062,26 @@ void VKGSRender::update_draw_state() void VKGSRender::begin_render_pass() { - if (m_render_pass_open) - return; - - const auto renderpass = (m_cached_renderpass)? m_cached_renderpass : vk::get_renderpass(*m_device, m_current_renderpass_key); - - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.renderPass = renderpass; - rp_begin.framebuffer = m_draw_fbo->value; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_draw_fbo->width(); - rp_begin.renderArea.extent.height = m_draw_fbo->height(); - - vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - m_render_pass_open = true; + vk::begin_renderpass( + *m_current_command_buffer, + get_render_pass(), + m_draw_fbo->value, + { positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()} }); } void VKGSRender::close_render_pass() { - if (!m_render_pass_open) - return; + vk::end_renderpass(*m_current_command_buffer); +} - vkCmdEndRenderPass(*m_current_command_buffer); - m_render_pass_open = false; +VkRenderPass VKGSRender::get_render_pass() +{ + if (!m_cached_renderpass) + { + m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key); + } + + return m_cached_renderpass; } void VKGSRender::emit_geometry(u32 sub_index) @@ -1199,7 +1194,7 @@ void VKGSRender::emit_geometry(u32 sub_index) m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); } - if (!m_render_pass_open) + if (!m_current_subdraw_id++) { vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); update_draw_state(); @@ -1857,6 +1852,8 @@ void VKGSRender::end() check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); u32 sub_index = 0; + m_current_subdraw_id = 0; + rsx::method_registers.current_draw_clause.begin(); do { @@ -1870,9 +1867,6 @@ void VKGSRender::end() m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render); } - // Close any open passes unconditionally - close_render_pass(); - m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); rsx::thread::end(); @@ -2157,7 +2151,6 @@ void VKGSRender::clear_surface(u32 mask) { begin_render_pass(); vkCmdClearAttachments(*m_current_command_buffer, ::size32(clear_descriptors), clear_descriptors.data(), 1, ®ion); - close_render_pass(); } } @@ -2728,7 +2721,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore #endif // End any active renderpasses; the caller should handle reopening - if (m_render_pass_open) + if (vk::is_renderpass_open(*m_current_command_buffer)) { close_render_pass(); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 698fbd0ca9..1b9a3eec4f 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -456,8 +456,8 @@ private: utils::address_range m_offloader_fault_range; rsx::invalidation_cause m_offloader_fault_cause; - bool m_render_pass_open = false; - u64 m_current_renderpass_key = 0; + u32 m_current_subdraw_id = 0; + u64 m_current_renderpass_key = 0; VkRenderPass m_cached_renderpass = VK_NULL_HANDLE; std::vector m_fbo_images; @@ -494,6 +494,7 @@ private: void begin_render_pass(); void close_render_pass(); + VkRenderPass get_render_pass(); void update_draw_state(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index a6bc046b38..53a7fa68e8 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -8,6 +8,7 @@ #include "VKResourceManager.h" #include "VKDMA.h" #include "VKCommandStream.h" +#include "VKRenderPass.h" #include "Utilities/mutex.h" #include "Utilities/lockless.h" @@ -567,6 +568,11 @@ namespace vk void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask) { + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + VkBufferMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.buffer = buffer; @@ -587,6 +593,11 @@ namespace vk VkAccessFlags src_mask, VkAccessFlags dst_mask, const VkImageSubresourceRange& range) { + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + VkImageMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.newLayout = new_layout; @@ -603,11 +614,21 @@ namespace vk void insert_execution_barrier(VkCommandBuffer cmd, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage) { + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 0, nullptr); } void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range) { + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + //Prepare an image to match the new layout.. VkImageMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -766,6 +787,10 @@ namespace vk // Transition to GENERAL if this resource is both input and output // TODO: This implicitly makes the target incompatible with the renderpass declaration; investigate a proper workaround // TODO: This likely throws out hw optimizations on the rest of the renderpass, manage carefully + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } VkAccessFlags src_access; VkPipelineStageFlags src_stage; diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 5fa0dbc0c0..92b9a1b8af 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -5,6 +5,7 @@ #include "VKRenderTargets.h" #include "VKFramebuffer.h" #include "VKResourceManager.h" +#include "VKRenderPass.h" #include "../Overlays/overlays.h" @@ -374,18 +375,8 @@ namespace vk load_program(cmd, render_pass, src); set_up_viewport(cmd, viewport.x1, viewport.y1, viewport.width(), viewport.height()); - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.renderPass = render_pass; - rp_begin.framebuffer = fbo->value; - rp_begin.renderArea.offset.x = static_cast(viewport.x1); - rp_begin.renderArea.offset.y = static_cast(viewport.y1); - rp_begin.renderArea.extent.width = viewport.width(); - rp_begin.renderArea.extent.height = viewport.height(); - - vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + vk::begin_renderpass(cmd, render_pass, fbo->value, viewport); emit_geometry(cmd); - vkCmdEndRenderPass(cmd); } void run(vk::command_buffer &cmd, const areau& viewport, vk::image* target, const std::vector& src, VkRenderPass render_pass) diff --git a/rpcs3/Emu/RSX/VK/VKRenderPass.cpp b/rpcs3/Emu/RSX/VK/VKRenderPass.cpp index 6d75deaff5..4bb2a477bf 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderPass.cpp +++ b/rpcs3/Emu/RSX/VK/VKRenderPass.cpp @@ -5,6 +5,16 @@ namespace vk { + struct active_renderpass_info_t + { + VkRenderPass pass = VK_NULL_HANDLE; + VkFramebuffer fbo = VK_NULL_HANDLE; + }; + + atomic_t g_cached_renderpass_key = 0; + VkRenderPass g_cached_renderpass = VK_NULL_HANDLE; + std::unordered_map g_current_renderpass; + shared_mutex g_renderpass_cache_mutex; std::unordered_map g_renderpass_cache; @@ -248,4 +258,51 @@ namespace vk g_renderpass_cache.clear(); } + + void begin_renderpass(VkCommandBuffer cmd, VkRenderPass pass, VkFramebuffer target, const coordu& framebuffer_region) + { + auto& renderpass_info = g_current_renderpass[cmd]; + if (renderpass_info.pass == pass && renderpass_info.fbo == target) + { + return; + } + else if (renderpass_info.pass != VK_NULL_HANDLE) + { + end_renderpass(cmd); + } + + VkRenderPassBeginInfo rp_begin = {}; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.renderPass = pass; + rp_begin.framebuffer = target; + rp_begin.renderArea.offset.x = static_cast(framebuffer_region.x); + rp_begin.renderArea.offset.y = static_cast(framebuffer_region.y); + rp_begin.renderArea.extent.width = framebuffer_region.width; + rp_begin.renderArea.extent.height = framebuffer_region.height; + + vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + renderpass_info = { pass, target }; + } + + void begin_renderpass(VkDevice dev, VkCommandBuffer cmd, u64 renderpass_key, VkFramebuffer target, const coordu& framebuffer_region) + { + if (renderpass_key != g_cached_renderpass_key) + { + g_cached_renderpass = get_renderpass(dev, renderpass_key); + g_cached_renderpass_key = renderpass_key; + } + + begin_renderpass(cmd, g_cached_renderpass, target, framebuffer_region); + } + + void end_renderpass(VkCommandBuffer cmd) + { + vkCmdEndRenderPass(cmd); + g_current_renderpass[cmd] = {}; + } + + bool is_renderpass_open(VkCommandBuffer cmd) + { + return g_current_renderpass[cmd].pass != VK_NULL_HANDLE; + } } diff --git a/rpcs3/Emu/RSX/VK/VKRenderPass.h b/rpcs3/Emu/RSX/VK/VKRenderPass.h index 47b2375c4c..02a96e5f87 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderPass.h +++ b/rpcs3/Emu/RSX/VK/VKRenderPass.h @@ -10,4 +10,11 @@ namespace vk VkRenderPass get_renderpass(VkDevice dev, u64 renderpass_key); void clear_renderpass_cache(VkDevice dev); + + // Renderpass scope management helpers. + // NOTE: These are not thread safe by design. + void begin_renderpass(VkDevice dev, VkCommandBuffer cmd, u64 renderpass_key, VkFramebuffer target, const coordu& framebuffer_region); + void begin_renderpass(VkCommandBuffer cmd, VkRenderPass pass, VkFramebuffer target, const coordu& framebuffer_region); + void end_renderpass(VkCommandBuffer cmd); + bool is_renderpass_open(VkCommandBuffer cmd); } diff --git a/rpcs3/Emu/RSX/VK/VKTextOut.h b/rpcs3/Emu/RSX/VK/VKTextOut.h index 2e67f06aca..d0ab782bf6 100644 --- a/rpcs3/Emu/RSX/VK/VKTextOut.h +++ b/rpcs3/Emu/RSX/VK/VKTextOut.h @@ -2,6 +2,7 @@ #include "VKHelpers.h" #include "VKVertexProgram.h" #include "VKFragmentProgram.h" +#include "VKRenderPass.h" #include "../Common/TextGlyphs.h" namespace vk @@ -346,23 +347,13 @@ namespace vk //TODO: Add drop shadow if deemed necessary for visibility load_program(cmd, scale_x, scale_y, shader_offsets.data(), counts.size(), color); - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.renderPass = m_render_pass; - rp_begin.framebuffer = target.value; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = target.width(); - rp_begin.renderArea.extent.height = target.height(); - - vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + const coordu viewport = { positionu{0u, 0u}, sizeu{target.width(), target.height() } }; + vk::begin_renderpass(cmd, m_render_pass, target.value, viewport); for (uint i = 0; i < counts.size(); ++i) { vkCmdDraw(cmd, counts[i], 1, offsets[i], i); } - - vkCmdEndRenderPass(cmd); } void reset_descriptors() diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 13c1e6f5de..b8b8935d3e 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -6,6 +6,7 @@ #include "../rsx_utils.h" #include "VKFormats.h" #include "VKCompute.h" +#include "VKRenderPass.h" namespace vk { @@ -62,6 +63,11 @@ namespace vk verify("Invalid image layout!" HERE), src->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src->current_layout == VK_IMAGE_LAYOUT_GENERAL; + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + switch (src->format()) { default: @@ -142,6 +148,11 @@ namespace vk verify("Invalid image layout!" HERE), dst->current_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || dst->current_layout == VK_IMAGE_LAYOUT_GENERAL; + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + switch (dst->format()) { default: @@ -211,6 +222,11 @@ namespace vk return; } + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + if (src != dst) [[likely]] { src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); @@ -331,6 +347,11 @@ namespace vk auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + if (srcLayout != preferred_src_format) change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); @@ -370,6 +391,11 @@ namespace vk auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + //TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions if (srcLayout != preferred_src_format) change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); @@ -713,6 +739,11 @@ namespace vk std::vector buffer_copies; copy_regions.reserve(subresource_layout.size()); + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + for (const rsx_subresource_layout &layout : subresource_layout) { if (!heap_align) [[likely]] diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 781edb8d44..fb16be36d9 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -5,6 +5,7 @@ #include "VKCompute.h" #include "VKResourceManager.h" #include "VKDMA.h" +#include "VKRenderPass.h" #include "../Common/TextureUtils.h" #include "Utilities/mutex.h" #include "../Common/texture_cache.h" @@ -176,6 +177,11 @@ namespace vk vk::get_resource_manager()->dispose(dma_fence); } + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); const auto internal_bpp = vk::get_format_texel_width(src->format());