From 8249d51aa8ab58eddd914b3d3aedc62de34f159f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 19 Jun 2019 00:26:25 +0300 Subject: [PATCH] vk: Optimize occlusion pool management - Do not consume a slot every draw call, instead batch as many draws as possible - Since renderpasses are dispatched per-draw-clause, keeping occlusion queries outside the renderpasses works fine - If renderpasses are reorganized, occlusion tasks will have to be reorganized again --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 95 ++++++++++++++++++--------------- rpcs3/Emu/RSX/VK/VKGSRender.h | 2 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 15 +++--- 3 files changed, 62 insertions(+), 50 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 9c2ca2a336..0cedb91276 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -473,7 +473,9 @@ VKGSRender::VKGSRender() : GSRender() //Occlusion m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE); - for (int n = 0; n < 128; ++n) + m_occlusion_map.resize(occlusion_query_count); + + for (int n = 0; n < occlusion_query_count; ++n) m_occlusion_query_data[n].driver_handle = n; //Generate frame contexts @@ -1667,10 +1669,9 @@ void VKGSRender::end() m_textures_upload_time += m_profiler.duration(); - u32 occlusion_id = 0; - if (m_occlusion_query_active) + if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task) { - occlusion_id = m_occlusion_query_pool.find_free_slot(); + u32 occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) { m_tsc += 100; @@ -1683,21 +1684,21 @@ void VKGSRender::end() if (m_current_task) m_current_task->result = 1; } } + + // Begin query + m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); + + auto &data = m_occlusion_map[m_active_query_info->driver_handle]; + data.indices.push_back(occlusion_id); + data.command_buffer_to_wait = m_current_command_buffer; + + m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task; + m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query); } bool primitive_emulated = false; vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); - if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) - { - //Begin query - m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); - m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id); - m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer; - - m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task; - } - // Apply write memory barriers if (true)//g_cfg.video.strict_rendering_mode) { @@ -1768,12 +1769,6 @@ void VKGSRender::end() close_render_pass(); vk::leave_uninterruptible(); - if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) - { - //End query - m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id); - } - m_rtts.on_write(); rsx::thread::end(); @@ -2126,7 +2121,6 @@ void VKGSRender::clear_surface(u32 mask) void VKGSRender::flush_command_queue(bool hard_sync) { - rsx::g_dma_manager.sync(); close_and_submit_command_buffer(m_current_command_buffer->submit_fence); if (hard_sync) @@ -2165,6 +2159,11 @@ void VKGSRender::flush_command_queue(bool hard_sync) check_present_status(); } + if (m_occlusion_query_active) + { + m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task; + } + open_command_buffer(); } @@ -2781,6 +2780,9 @@ void VKGSRender::write_buffers() void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags) { + // Wait before sync block below + rsx::g_dma_manager.sync(); + if (m_attrib_ring_info.dirty() || m_fragment_env_ring_info.dirty() || m_vertex_env_ring_info.dirty() || @@ -2810,6 +2812,13 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); } + // End open queries. Flags will be automatically reset by the submit routine + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query) + { + auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back(); + m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query); + } + m_current_command_buffer->end(); m_current_command_buffer->tag(); @@ -3473,18 +3482,33 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) { + verify(HERE), !m_occlusion_query_active; + query->result = 0; //query->sync_timestamp = get_system_time(); m_active_query_info = query; m_occlusion_query_active = true; + m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task; } void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query) { - m_occlusion_query_active = false; - m_active_query_info = nullptr; + verify(HERE), query == m_active_query_info; // NOTE: flushing the queue is very expensive, do not flush just because query stopped + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query) + { + // End query + auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back(); + m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query); + m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query; + } + + // Clear occlusion load flag + m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task; + + m_occlusion_query_active = false; + m_active_query_info = nullptr; } bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query) @@ -3492,11 +3516,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info if (!query->num_draws) return true; - auto found = m_occlusion_map.find(query->driver_handle); - if (found == m_occlusion_map.end()) - return true; - - auto &data = found->second; + auto &data = m_occlusion_map[query->driver_handle]; if (data.indices.empty()) return true; @@ -3522,11 +3542,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query) { - auto found = m_occlusion_map.find(query->driver_handle); - if (found == m_occlusion_map.end()) - return; - - auto &data = found->second; + auto &data = m_occlusion_map[query->driver_handle]; if (data.indices.empty()) return; @@ -3561,27 +3577,22 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* } m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); - m_occlusion_map.erase(query->driver_handle); + data.indices.clear(); } void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query) { if (m_active_query_info == query) { - m_occlusion_query_active = false; - m_active_query_info = nullptr; + end_occlusion_query(query); } - auto found = m_occlusion_map.find(query->driver_handle); - if (found == m_occlusion_map.end()) - return; - - auto &data = found->second; + auto &data = m_occlusion_map[query->driver_handle]; if (data.indices.empty()) return; m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); - m_occlusion_map.erase(query->driver_handle); + data.indices.clear(); } bool VKGSRender::on_decompiler_task() diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index c6e7a5e3e2..c0955c022e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -400,7 +400,7 @@ private: vk::occlusion_query_pool m_occlusion_query_pool; bool m_occlusion_query_active = false; rsx::reports::occlusion_query_info *m_active_query_info = nullptr; - std::unordered_map m_occlusion_map; + std::vector m_occlusion_map; shared_mutex m_secondary_cb_guard; vk::command_pool m_secondary_command_buffer_pool; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 2b056b63bd..0e76e4b004 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -891,7 +891,9 @@ private: { cb_has_occlusion_task = 1, cb_has_blit_transfer = 2, - cb_has_dma_transfer = 4 + cb_has_dma_transfer = 4, + cb_has_open_query = 8, + cb_load_occluson_task = 16 }; u32 flags = 0; @@ -2725,7 +2727,7 @@ public: VkQueryPool query_pool = VK_NULL_HANDLE; vk::render_device* owner = nullptr; - std::deque available_slots; + std::stack available_slots; std::vector query_active_status; public: @@ -2740,11 +2742,10 @@ public: owner = &dev; query_active_status.resize(num_entries, false); - available_slots.resize(num_entries); for (u32 n = 0; n < num_entries; ++n) { - available_slots[n] = n; + available_slots.push(n); } } @@ -2807,7 +2808,7 @@ public: vkCmdResetQueryPool(cmd, query_pool, index, 1); query_active_status[index] = false; - available_slots.push_back(index); + available_slots.push(index); } } @@ -2834,8 +2835,8 @@ public: return ~0u; } - u32 result = available_slots.front(); - available_slots.pop_front(); + u32 result = available_slots.top(); + available_slots.pop(); verify(HERE), !query_active_status[result]; return result;