vk: Lazy evaluate renderpass scope

- Spamming the driver with renderpass open/close cycles is bad for performance.
This commit is contained in:
kd-11 2020-03-10 14:05:50 +03:00 committed by Ivan
parent 7025985c0d
commit d25ba03e82
10 changed files with 160 additions and 51 deletions

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "VKHelpers.h" #include "VKHelpers.h"
#include "VKRenderPass.h"
#include "Utilities/StrUtil.h" #include "Utilities/StrUtil.h"
#define VK_MAX_COMPUTE_TASKS 4096 // Max number of jobs per frame #define VK_MAX_COMPUTE_TASKS 4096 // Max number of jobs per frame
@ -201,6 +202,12 @@ namespace vk
void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z)
{ {
// CmdDispatch is outside renderpass scope only
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
load_program(cmd); load_program(cmd);
vkCmdDispatch(cmd, invocations_x, invocations_y, invocations_z); vkCmdDispatch(cmd, invocations_x, invocations_y, invocations_z);
} }

View File

@ -1062,31 +1062,26 @@ void VKGSRender::update_draw_state()
void VKGSRender::begin_render_pass() void VKGSRender::begin_render_pass()
{ {
if (m_render_pass_open) vk::begin_renderpass(
return; *m_current_command_buffer,
get_render_pass(),
const auto renderpass = (m_cached_renderpass)? m_cached_renderpass : vk::get_renderpass(*m_device, m_current_renderpass_key); m_draw_fbo->value,
{ positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()} });
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = renderpass;
rp_begin.framebuffer = m_draw_fbo->value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = m_draw_fbo->width();
rp_begin.renderArea.extent.height = m_draw_fbo->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
m_render_pass_open = true;
} }
void VKGSRender::close_render_pass() void VKGSRender::close_render_pass()
{ {
if (!m_render_pass_open) vk::end_renderpass(*m_current_command_buffer);
return; }
vkCmdEndRenderPass(*m_current_command_buffer); VkRenderPass VKGSRender::get_render_pass()
m_render_pass_open = false; {
if (!m_cached_renderpass)
{
m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key);
}
return m_cached_renderpass;
} }
void VKGSRender::emit_geometry(u32 sub_index) void VKGSRender::emit_geometry(u32 sub_index)
@ -1199,7 +1194,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set);
} }
if (!m_render_pass_open) if (!m_current_subdraw_id++)
{ {
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
update_draw_state(); update_draw_state();
@ -1857,6 +1852,8 @@ void VKGSRender::end()
check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE);
u32 sub_index = 0; u32 sub_index = 0;
m_current_subdraw_id = 0;
rsx::method_registers.current_draw_clause.begin(); rsx::method_registers.current_draw_clause.begin();
do do
{ {
@ -1870,9 +1867,6 @@ void VKGSRender::end()
m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render); m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render);
} }
// Close any open passes unconditionally
close_render_pass();
m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled); m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled);
rsx::thread::end(); rsx::thread::end();
@ -2157,7 +2151,6 @@ void VKGSRender::clear_surface(u32 mask)
{ {
begin_render_pass(); begin_render_pass();
vkCmdClearAttachments(*m_current_command_buffer, ::size32(clear_descriptors), clear_descriptors.data(), 1, &region); vkCmdClearAttachments(*m_current_command_buffer, ::size32(clear_descriptors), clear_descriptors.data(), 1, &region);
close_render_pass();
} }
} }
@ -2728,7 +2721,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
#endif #endif
// End any active renderpasses; the caller should handle reopening // End any active renderpasses; the caller should handle reopening
if (m_render_pass_open) if (vk::is_renderpass_open(*m_current_command_buffer))
{ {
close_render_pass(); close_render_pass();
} }

View File

@ -456,7 +456,7 @@ private:
utils::address_range m_offloader_fault_range; utils::address_range m_offloader_fault_range;
rsx::invalidation_cause m_offloader_fault_cause; rsx::invalidation_cause m_offloader_fault_cause;
bool m_render_pass_open = false; u32 m_current_subdraw_id = 0;
u64 m_current_renderpass_key = 0; u64 m_current_renderpass_key = 0;
VkRenderPass m_cached_renderpass = VK_NULL_HANDLE; VkRenderPass m_cached_renderpass = VK_NULL_HANDLE;
std::vector<vk::image*> m_fbo_images; std::vector<vk::image*> m_fbo_images;
@ -494,6 +494,7 @@ private:
void begin_render_pass(); void begin_render_pass();
void close_render_pass(); void close_render_pass();
VkRenderPass get_render_pass();
void update_draw_state(); void update_draw_state();

View File

@ -8,6 +8,7 @@
#include "VKResourceManager.h" #include "VKResourceManager.h"
#include "VKDMA.h" #include "VKDMA.h"
#include "VKCommandStream.h" #include "VKCommandStream.h"
#include "VKRenderPass.h"
#include "Utilities/mutex.h" #include "Utilities/mutex.h"
#include "Utilities/lockless.h" #include "Utilities/lockless.h"
@ -567,6 +568,11 @@ namespace vk
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask) void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
{ {
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkBufferMemoryBarrier barrier = {}; VkBufferMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.buffer = buffer; barrier.buffer = buffer;
@ -587,6 +593,11 @@ namespace vk
VkAccessFlags src_mask, VkAccessFlags dst_mask, VkAccessFlags src_mask, VkAccessFlags dst_mask,
const VkImageSubresourceRange& range) const VkImageSubresourceRange& range)
{ {
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkImageMemoryBarrier barrier = {}; VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout; barrier.newLayout = new_layout;
@ -603,11 +614,21 @@ namespace vk
void insert_execution_barrier(VkCommandBuffer cmd, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage) void insert_execution_barrier(VkCommandBuffer cmd, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage)
{ {
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 0, nullptr); vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 0, nullptr);
} }
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range) void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{ {
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
//Prepare an image to match the new layout.. //Prepare an image to match the new layout..
VkImageMemoryBarrier barrier = {}; VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@ -766,6 +787,10 @@ namespace vk
// Transition to GENERAL if this resource is both input and output // Transition to GENERAL if this resource is both input and output
// TODO: This implicitly makes the target incompatible with the renderpass declaration; investigate a proper workaround // TODO: This implicitly makes the target incompatible with the renderpass declaration; investigate a proper workaround
// TODO: This likely throws out hw optimizations on the rest of the renderpass, manage carefully // TODO: This likely throws out hw optimizations on the rest of the renderpass, manage carefully
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
VkAccessFlags src_access; VkAccessFlags src_access;
VkPipelineStageFlags src_stage; VkPipelineStageFlags src_stage;

View File

@ -5,6 +5,7 @@
#include "VKRenderTargets.h" #include "VKRenderTargets.h"
#include "VKFramebuffer.h" #include "VKFramebuffer.h"
#include "VKResourceManager.h" #include "VKResourceManager.h"
#include "VKRenderPass.h"
#include "../Overlays/overlays.h" #include "../Overlays/overlays.h"
@ -374,18 +375,8 @@ namespace vk
load_program(cmd, render_pass, src); load_program(cmd, render_pass, src);
set_up_viewport(cmd, viewport.x1, viewport.y1, viewport.width(), viewport.height()); set_up_viewport(cmd, viewport.x1, viewport.y1, viewport.width(), viewport.height());
VkRenderPassBeginInfo rp_begin = {}; vk::begin_renderpass(cmd, render_pass, fbo->value, viewport);
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = render_pass;
rp_begin.framebuffer = fbo->value;
rp_begin.renderArea.offset.x = static_cast<s32>(viewport.x1);
rp_begin.renderArea.offset.y = static_cast<s32>(viewport.y1);
rp_begin.renderArea.extent.width = viewport.width();
rp_begin.renderArea.extent.height = viewport.height();
vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
emit_geometry(cmd); emit_geometry(cmd);
vkCmdEndRenderPass(cmd);
} }
void run(vk::command_buffer &cmd, const areau& viewport, vk::image* target, const std::vector<vk::image_view*>& src, VkRenderPass render_pass) void run(vk::command_buffer &cmd, const areau& viewport, vk::image* target, const std::vector<vk::image_view*>& src, VkRenderPass render_pass)

View File

@ -5,6 +5,16 @@
namespace vk namespace vk
{ {
struct active_renderpass_info_t
{
VkRenderPass pass = VK_NULL_HANDLE;
VkFramebuffer fbo = VK_NULL_HANDLE;
};
atomic_t<u64> g_cached_renderpass_key = 0;
VkRenderPass g_cached_renderpass = VK_NULL_HANDLE;
std::unordered_map<VkCommandBuffer, active_renderpass_info_t> g_current_renderpass;
shared_mutex g_renderpass_cache_mutex; shared_mutex g_renderpass_cache_mutex;
std::unordered_map<u64, VkRenderPass> g_renderpass_cache; std::unordered_map<u64, VkRenderPass> g_renderpass_cache;
@ -248,4 +258,51 @@ namespace vk
g_renderpass_cache.clear(); g_renderpass_cache.clear();
} }
void begin_renderpass(VkCommandBuffer cmd, VkRenderPass pass, VkFramebuffer target, const coordu& framebuffer_region)
{
auto& renderpass_info = g_current_renderpass[cmd];
if (renderpass_info.pass == pass && renderpass_info.fbo == target)
{
return;
}
else if (renderpass_info.pass != VK_NULL_HANDLE)
{
end_renderpass(cmd);
}
VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = pass;
rp_begin.framebuffer = target;
rp_begin.renderArea.offset.x = static_cast<int32_t>(framebuffer_region.x);
rp_begin.renderArea.offset.y = static_cast<int32_t>(framebuffer_region.y);
rp_begin.renderArea.extent.width = framebuffer_region.width;
rp_begin.renderArea.extent.height = framebuffer_region.height;
vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
renderpass_info = { pass, target };
}
void begin_renderpass(VkDevice dev, VkCommandBuffer cmd, u64 renderpass_key, VkFramebuffer target, const coordu& framebuffer_region)
{
if (renderpass_key != g_cached_renderpass_key)
{
g_cached_renderpass = get_renderpass(dev, renderpass_key);
g_cached_renderpass_key = renderpass_key;
}
begin_renderpass(cmd, g_cached_renderpass, target, framebuffer_region);
}
void end_renderpass(VkCommandBuffer cmd)
{
vkCmdEndRenderPass(cmd);
g_current_renderpass[cmd] = {};
}
bool is_renderpass_open(VkCommandBuffer cmd)
{
return g_current_renderpass[cmd].pass != VK_NULL_HANDLE;
}
} }

View File

@ -10,4 +10,11 @@ namespace vk
VkRenderPass get_renderpass(VkDevice dev, u64 renderpass_key); VkRenderPass get_renderpass(VkDevice dev, u64 renderpass_key);
void clear_renderpass_cache(VkDevice dev); void clear_renderpass_cache(VkDevice dev);
// Renderpass scope management helpers.
// NOTE: These are not thread safe by design.
void begin_renderpass(VkDevice dev, VkCommandBuffer cmd, u64 renderpass_key, VkFramebuffer target, const coordu& framebuffer_region);
void begin_renderpass(VkCommandBuffer cmd, VkRenderPass pass, VkFramebuffer target, const coordu& framebuffer_region);
void end_renderpass(VkCommandBuffer cmd);
bool is_renderpass_open(VkCommandBuffer cmd);
} }

View File

@ -2,6 +2,7 @@
#include "VKHelpers.h" #include "VKHelpers.h"
#include "VKVertexProgram.h" #include "VKVertexProgram.h"
#include "VKFragmentProgram.h" #include "VKFragmentProgram.h"
#include "VKRenderPass.h"
#include "../Common/TextGlyphs.h" #include "../Common/TextGlyphs.h"
namespace vk namespace vk
@ -346,23 +347,13 @@ namespace vk
//TODO: Add drop shadow if deemed necessary for visibility //TODO: Add drop shadow if deemed necessary for visibility
load_program(cmd, scale_x, scale_y, shader_offsets.data(), counts.size(), color); load_program(cmd, scale_x, scale_y, shader_offsets.data(), counts.size(), color);
VkRenderPassBeginInfo rp_begin = {}; const coordu viewport = { positionu{0u, 0u}, sizeu{target.width(), target.height() } };
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; vk::begin_renderpass(cmd, m_render_pass, target.value, viewport);
rp_begin.renderPass = m_render_pass;
rp_begin.framebuffer = target.value;
rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = target.width();
rp_begin.renderArea.extent.height = target.height();
vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
for (uint i = 0; i < counts.size(); ++i) for (uint i = 0; i < counts.size(); ++i)
{ {
vkCmdDraw(cmd, counts[i], 1, offsets[i], i); vkCmdDraw(cmd, counts[i], 1, offsets[i], i);
} }
vkCmdEndRenderPass(cmd);
} }
void reset_descriptors() void reset_descriptors()

View File

@ -6,6 +6,7 @@
#include "../rsx_utils.h" #include "../rsx_utils.h"
#include "VKFormats.h" #include "VKFormats.h"
#include "VKCompute.h" #include "VKCompute.h"
#include "VKRenderPass.h"
namespace vk namespace vk
{ {
@ -62,6 +63,11 @@ namespace vk
verify("Invalid image layout!" HERE), verify("Invalid image layout!" HERE),
src->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src->current_layout == VK_IMAGE_LAYOUT_GENERAL; src->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src->current_layout == VK_IMAGE_LAYOUT_GENERAL;
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
switch (src->format()) switch (src->format())
{ {
default: default:
@ -142,6 +148,11 @@ namespace vk
verify("Invalid image layout!" HERE), verify("Invalid image layout!" HERE),
dst->current_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || dst->current_layout == VK_IMAGE_LAYOUT_GENERAL; dst->current_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || dst->current_layout == VK_IMAGE_LAYOUT_GENERAL;
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
switch (dst->format()) switch (dst->format())
{ {
default: default:
@ -211,6 +222,11 @@ namespace vk
return; return;
} }
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
if (src != dst) [[likely]] if (src != dst) [[likely]]
{ {
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
@ -331,6 +347,11 @@ namespace vk
auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
if (srcLayout != preferred_src_format) if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
@ -370,6 +391,11 @@ namespace vk
auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
//TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions //TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions
if (srcLayout != preferred_src_format) if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
@ -713,6 +739,11 @@ namespace vk
std::vector<VkBufferCopy> buffer_copies; std::vector<VkBufferCopy> buffer_copies;
copy_regions.reserve(subresource_layout.size()); copy_regions.reserve(subresource_layout.size());
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
for (const rsx_subresource_layout &layout : subresource_layout) for (const rsx_subresource_layout &layout : subresource_layout)
{ {
if (!heap_align) [[likely]] if (!heap_align) [[likely]]

View File

@ -5,6 +5,7 @@
#include "VKCompute.h" #include "VKCompute.h"
#include "VKResourceManager.h" #include "VKResourceManager.h"
#include "VKDMA.h" #include "VKDMA.h"
#include "VKRenderPass.h"
#include "../Common/TextureUtils.h" #include "../Common/TextureUtils.h"
#include "Utilities/mutex.h" #include "Utilities/mutex.h"
#include "../Common/texture_cache.h" #include "../Common/texture_cache.h"
@ -176,6 +177,11 @@ namespace vk
vk::get_resource_manager()->dispose(dma_fence); vk::get_resource_manager()->dispose(dma_fence);
} }
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
const auto internal_bpp = vk::get_format_texel_width(src->format()); const auto internal_bpp = vk::get_format_texel_width(src->format());