diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 66d589487e..a6efa3fae8 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -443,6 +443,7 @@ if(TARGET 3rdparty_vulkan) RSX/VK/VKMemAlloc.cpp RSX/VK/VKPresent.cpp RSX/VK/VKProgramPipeline.cpp + RSX/VK/VKQueryPool.cpp RSX/VK/VKRenderPass.cpp RSX/VK/VKResolveHelper.cpp RSX/VK/VKResourceManager.cpp diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 9760858c25..1a11016f81 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -943,14 +943,14 @@ void VKGSRender::end() if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task) { - u32 occlusion_id = m_occlusion_query_pool.find_free_slot(*m_current_command_buffer); + u32 occlusion_id = m_occlusion_query_manager->allocate_query(*m_current_command_buffer); if (occlusion_id == UINT32_MAX) { // Force flush rsx_log.error("[Performance Warning] Out of free occlusion slots. Forcing hard sync."); ZCULL_control::sync(this); - occlusion_id = m_occlusion_query_pool.find_free_slot(*m_current_command_buffer); + occlusion_id = m_occlusion_query_manager->allocate_query(*m_current_command_buffer); if (occlusion_id == UINT32_MAX) { //rsx_log.error("Occlusion pool overflow"); @@ -959,7 +959,7 @@ void VKGSRender::end() } // Begin query - m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); + m_occlusion_query_manager->begin_query(*m_current_command_buffer, occlusion_id); auto &data = m_occlusion_map[m_active_query_info->driver_handle]; data.indices.push_back(occlusion_id); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d3dcb30dbe..ba4e0f075e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -408,7 +408,7 @@ VKGSRender::VKGSRender() : GSRender() std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); //Occlusion - m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE); + m_occlusion_query_manager = std::make_unique(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); m_occlusion_map.resize(occlusion_query_count); for (u32 n = 0; n < occlusion_query_count; ++n) @@ -519,8 +519,6 @@ VKGSRender::VKGSRender() : GSRender() vk::get_overlay_pass()->init(*m_current_command_buffer, m_texture_upload_buffer_ring_info); - m_occlusion_query_pool.initialize(*m_current_command_buffer); - if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only) { m_shader_interpreter.init(*m_device); @@ -625,7 +623,7 @@ VKGSRender::~VKGSRender() vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); //Queries - m_occlusion_query_pool.destroy(); + m_occlusion_query_manager.reset(); m_cond_render_buffer.reset(); //Command buffer @@ -1965,7 +1963,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query) { auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back(); - m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query); + m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query); m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query; } @@ -2267,7 +2265,7 @@ void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query) { // End query auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back(); - m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query); + m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query); m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query; } @@ -2291,7 +2289,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info return false; u32 oldest = data.indices.front(); - return m_occlusion_query_pool.check_query_status(oldest); + return m_occlusion_query_manager->check_query_status(oldest); } void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query) @@ -2322,7 +2320,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* for (const auto occlusion_id : data.indices) { // We only need one hit - if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id)) + if (auto value = m_occlusion_query_manager->get_query_result(occlusion_id)) { query->result = 1; break; @@ -2330,7 +2328,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* } } - m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); + m_occlusion_query_manager->free_queries(*m_current_command_buffer, data.indices); data.indices.clear(); } @@ -2345,7 +2343,7 @@ void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* que if (data.indices.empty()) return; - m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); + m_occlusion_query_manager->free_queries(*m_current_command_buffer, data.indices); data.indices.clear(); } @@ -2356,7 +2354,7 @@ void VKGSRender::emergency_query_cleanup(vk::command_buffer* commands) if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query) { auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back(); - m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query); + m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query); m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query; } } @@ -2427,7 +2425,7 @@ void VKGSRender::begin_conditional_rendering(const std::vectorvalue, 0); + m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0); vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage, @@ -2457,7 +2455,7 @@ void VKGSRender::begin_conditional_rendering(const std::vectordriver_handle]; for (const auto& index : query_info.indices) { - m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset); + m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset); dst_offset += 4; } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 90ad06b3e1..9f07565623 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -9,6 +9,7 @@ #include "VKProgramBuffer.h" #include "VKFramebuffer.h" #include "VKShaderInterpreter.h" +#include "VKQueryPool.h" #include "../GCM.h" #include @@ -397,7 +398,7 @@ private: //Vulkan internals vk::command_pool m_command_buffer_pool; - vk::occlusion_query_pool m_occlusion_query_pool; + std::unique_ptr m_occlusion_query_manager; bool m_occlusion_query_active = false; rsx::reports::occlusion_query_info *m_active_query_info = nullptr; std::vector m_occlusion_map; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 67df171a25..b7bb43cd36 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -3224,193 +3224,33 @@ public: } }; - class occlusion_query_pool + class query_pool : public rsx::ref_counted { - struct query_slot_info - { - bool any_passed; - bool active; - bool ready; - }; - - VkQueryPool query_pool = VK_NULL_HANDLE; - vk::render_device* owner = nullptr; - - std::deque available_slots; - std::vector query_slot_status; - - inline bool poke_query(query_slot_info& query, u32 index, VkQueryResultFlags flags) - { - // Query is ready if: - // 1. Any sample has been determined to have passed the Z test - // 2. The backend has fully processed the query and found no hits - - u32 result[2] = { 0, 0 }; - switch (const auto error = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) - { - case VK_SUCCESS: - { - if (result[0]) - { - query.any_passed = true; - query.ready = true; - return true; - } - else if (result[1]) - { - query.any_passed = false; - query.ready = true; - return true; - } - - return false; - } - case VK_NOT_READY: - { - if (result[0]) - { - query.any_passed = true; - query.ready = true; - return true; - } - - return false; - } - default: - die_with_error(HERE, error); - return false; - } - } + VkQueryPool m_query_pool; + VkDevice m_device; public: - - void create(vk::render_device &dev, u32 num_entries) + query_pool(VkDevice dev, VkQueryType type, u32 size) + : m_query_pool(VK_NULL_HANDLE), m_device(dev) { - VkQueryPoolCreateInfo info = {}; + VkQueryPoolCreateInfo info{}; info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - info.queryType = VK_QUERY_TYPE_OCCLUSION; - info.queryCount = num_entries; + info.queryType = type; + info.queryCount = size; + vkCreateQueryPool(dev, &info, nullptr, &m_query_pool); - CHECK_RESULT(vkCreateQueryPool(dev, &info, nullptr, &query_pool)); - owner = &dev; - - // From spec: "After query pool creation, each query must be reset before it is used." - query_slot_status.resize(num_entries, {}); + // Take 'size' references on this object + ref_count.release(static_cast(size)); } - void destroy() + ~query_pool() { - if (query_pool) - { - vkDestroyQueryPool(*owner, query_pool, nullptr); - - owner = nullptr; - query_pool = VK_NULL_HANDLE; - } + vkDestroyQueryPool(m_device, m_query_pool, nullptr); } - void initialize(vk::command_buffer &cmd) + operator VkQueryPool() { - const u32 count = ::size32(query_slot_status); - vkCmdResetQueryPool(cmd, query_pool, 0, count); - - query_slot_info value{}; - std::fill(query_slot_status.begin(), query_slot_status.end(), value); - - for (u32 n = 0; n < count; ++n) - { - available_slots.push_back(n); - } - } - - void begin_query(vk::command_buffer &cmd, u32 index) - { - verify(HERE), query_slot_status[index].active == false; - - vkCmdBeginQuery(cmd, query_pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT); - query_slot_status[index].active = true; - } - - void end_query(vk::command_buffer &cmd, u32 index) - { - vkCmdEndQuery(cmd, query_pool, index); - } - - bool check_query_status(u32 index) - { - return poke_query(query_slot_status[index], index, VK_QUERY_RESULT_PARTIAL_BIT); - } - - u32 get_query_result(u32 index) - { - // Check for cached result - auto& query_info = query_slot_status[index]; - - while (!query_info.ready) - { - poke_query(query_info, index, VK_QUERY_RESULT_PARTIAL_BIT); - } - - return query_info.any_passed ? 1 : 0; - } - - void get_query_result_indirect(vk::command_buffer &cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset) - { - vkCmdCopyQueryPoolResults(cmd, query_pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT); - } - - void reset_query(vk::command_buffer &/*cmd*/, u32 index) - { - if (query_slot_status[index].active) - { - // Actual reset is handled later on demand - available_slots.push_back(index); - } - } - - template class _List> - void reset_queries(vk::command_buffer &cmd, _List &list) - { - for (const auto index : list) - reset_query(cmd, index); - } - - void reset_all(vk::command_buffer &cmd) - { - for (u32 n = 0; n < query_slot_status.size(); n++) - { - if (query_slot_status[n].active) - reset_query(cmd, n); - } - } - - u32 find_free_slot(vk::command_buffer& cmd) - { - if (available_slots.empty()) - { - return ~0u; - } - - const u32 result = available_slots.front(); - if (query_slot_status[result].active) - { - // Trigger reset if round robin allocation has gone back to the first item - if (vk::is_renderpass_open(cmd)) - { - vk::end_renderpass(cmd); - } - - // At this point, the first available slot is not reset which means they're all active - for (auto It = available_slots.cbegin(); It != available_slots.cend(); ++It) - { - const auto index = *It; - vkCmdResetQueryPool(cmd, query_pool, index, 1); - query_slot_status[index] = {}; - } - } - - available_slots.pop_front(); - return result; + return m_query_pool; } }; diff --git a/rpcs3/Emu/RSX/VK/VKQueryPool.cpp b/rpcs3/Emu/RSX/VK/VKQueryPool.cpp new file mode 100644 index 0000000000..913fd58ad8 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKQueryPool.cpp @@ -0,0 +1,209 @@ +#include "stdafx.h" +#include "VKQueryPool.h" +#include "VKResourceManager.h" + +namespace vk +{ + inline bool query_pool_manager::poke_query(query_slot_info& query, u32 index, VkQueryResultFlags flags) + { + // Query is ready if: + // 1. Any sample has been determined to have passed the Z test + // 2. The backend has fully processed the query and found no hits + + u32 result[2] = { 0, 0 }; + switch (const auto error = vkGetQueryPoolResults(*owner, *query.pool, index, 1, 8, result, 8, flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) + { + case VK_SUCCESS: + { + if (result[0]) + { + query.any_passed = true; + query.ready = true; + return true; + } + else if (result[1]) + { + query.any_passed = false; + query.ready = true; + return true; + } + + return false; + } + case VK_NOT_READY: + { + if (result[0]) + { + query.any_passed = true; + query.ready = true; + return true; + } + + return false; + } + default: + die_with_error(HERE, error); + return false; + } + } + + query_pool_manager::query_pool_manager(vk::render_device& dev, VkQueryType type, u32 num_entries) + { + verify(HERE), num_entries > 0; + + owner = &dev; + query_type = type; + query_slot_status.resize(num_entries, {}); + + for (unsigned i = 0; i < num_entries; ++i) + { + m_available_slots.push_back(i); + } + } + + query_pool_manager::~query_pool_manager() + { + if (m_current_query_pool) + { + m_current_query_pool.reset(); + owner = nullptr; + } + } + + void query_pool_manager::allocate_new_pool(vk::command_buffer& cmd) + { + verify(HERE), !m_current_query_pool; + + const u32 count = ::size32(query_slot_status); + m_current_query_pool = std::make_unique(*owner, query_type, count); + + // From spec: "After query pool creation, each query must be reset before it is used." + vkCmdResetQueryPool(cmd, *m_current_query_pool.get(), 0, count); + + m_pool_lifetime_counter = count; + } + + void query_pool_manager::reallocate_pool(vk::command_buffer& cmd) + { + if (m_current_query_pool) + { + if (!m_current_query_pool->has_refs()) + { + vk::get_resource_manager()->dispose(m_current_query_pool); + } + else + { + m_consumed_pools.emplace_back(std::move(m_current_query_pool)); + + // Sanity check + if (m_consumed_pools.size() > 3) + { + rsx_log.error("[Robustness warning] Query pool discard pile size is now %llu. Are we leaking??", m_consumed_pools.size()); + } + } + } + + allocate_new_pool(cmd); + } + + void query_pool_manager::run_pool_cleanup() + { + for (auto It = m_consumed_pools.begin(); It != m_consumed_pools.end();) + { + if (!(*It)->has_refs()) + { + vk::get_resource_manager()->dispose(*It); + It = m_consumed_pools.erase(It); + } + else + { + It++; + } + } + } + + void query_pool_manager::begin_query(vk::command_buffer& cmd, u32 index) + { + verify(HERE), query_slot_status[index].active == false; + + auto& query_info = query_slot_status[index]; + query_info.pool = m_current_query_pool.get(); + query_info.active = true; + + vkCmdBeginQuery(cmd, *query_info.pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT); + } + + void query_pool_manager::end_query(vk::command_buffer& cmd, u32 index) + { + vkCmdEndQuery(cmd, *query_slot_status[index].pool, index); + } + + bool query_pool_manager::check_query_status(u32 index) + { + return poke_query(query_slot_status[index], index, VK_QUERY_RESULT_PARTIAL_BIT); + } + + u32 query_pool_manager::get_query_result(u32 index) + { + // Check for cached result + auto& query_info = query_slot_status[index]; + + while (!query_info.ready) + { + poke_query(query_info, index, VK_QUERY_RESULT_PARTIAL_BIT); + } + + return query_info.any_passed ? 1 : 0; + } + + void query_pool_manager::get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset) + { + vkCmdCopyQueryPoolResults(cmd, *query_slot_status[index].pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT); + } + + void query_pool_manager::free_query(vk::command_buffer&/*cmd*/, u32 index) + { + // Release reference and discard + auto& query = query_slot_status[index]; + + verify(HERE), query.active; + query.pool->release(); + + if (!query.pool->has_refs()) + { + // No more refs held, remove if in discard pile + run_pool_cleanup(); + } + + query = {}; + m_available_slots.push_back(index); + } + + u32 query_pool_manager::allocate_query(vk::command_buffer& cmd) + { + if (!m_pool_lifetime_counter) + { + // Pool is exhaused, create a new one + // This is basically a driver-level pool reset without synchronization + // TODO: Alternatively, use VK_EXT_host_pool_reset to reset an old pool with no references and swap that in + if (vk::is_renderpass_open(cmd)) + { + vk::end_renderpass(cmd); + } + + reallocate_pool(cmd); + } + + if (!m_available_slots.empty()) + { + m_pool_lifetime_counter--; + + const auto result = m_available_slots.front(); + m_available_slots.pop_front(); + return result; + } + + return ~0u; + } +} + diff --git a/rpcs3/Emu/RSX/VK/VKQueryPool.h b/rpcs3/Emu/RSX/VK/VKQueryPool.h new file mode 100644 index 0000000000..a7fae15b8b --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKQueryPool.h @@ -0,0 +1,53 @@ +#pragma once +#include "VKHelpers.h" + +namespace vk +{ + class query_pool_manager + { + struct query_slot_info + { + query_pool* pool; + bool any_passed; + bool active; + bool ready; + }; + + std::vector> m_consumed_pools; + std::unique_ptr m_current_query_pool; + std::deque m_available_slots; + u32 m_pool_lifetime_counter = 0; + VkQueryType query_type = VK_QUERY_TYPE_OCCLUSION; + + vk::render_device* owner = nullptr; + std::vector query_slot_status; + + bool poke_query(query_slot_info& query, u32 index, VkQueryResultFlags flags); + void allocate_new_pool(vk::command_buffer& cmd); + void reallocate_pool(vk::command_buffer& cmd); + void run_pool_cleanup(); + + public: + query_pool_manager(vk::render_device& dev, VkQueryType type, u32 num_entries); + ~query_pool_manager(); + + void begin_query(vk::command_buffer& cmd, u32 index); + void end_query(vk::command_buffer& cmd, u32 index); + + bool check_query_status(u32 index); + u32 get_query_result(u32 index); + void get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset); + + u32 allocate_query(vk::command_buffer& cmd); + void free_query(vk::command_buffer&/*cmd*/, u32 index); + + template class _List> + void free_queries(vk::command_buffer& cmd, _List& list) + { + for (const auto index : list) + { + free_query(cmd, index); + } + } + }; +}; diff --git a/rpcs3/Emu/RSX/VK/VKResourceManager.h b/rpcs3/Emu/RSX/VK/VKResourceManager.h index e7ebbee85e..9cb8091f79 100644 --- a/rpcs3/Emu/RSX/VK/VKResourceManager.h +++ b/rpcs3/Emu/RSX/VK/VKResourceManager.h @@ -15,6 +15,7 @@ namespace vk std::vector> m_disposed_image_views; std::vector> m_disposed_images; std::vector> m_disposed_events; + std::vector> m_disposed_query_pools; eid_scope_t(u64 _eid): eid(_eid), m_device(vk::get_current_renderer()) @@ -31,6 +32,7 @@ namespace vk m_disposed_events.clear(); m_disposed_image_views.clear(); m_disposed_images.clear(); + m_disposed_query_pools.clear(); } }; @@ -148,6 +150,11 @@ namespace vk event = VK_NULL_HANDLE; } + void dispose(std::unique_ptr& pool) + { + get_current_eid_scope().m_disposed_query_pools.emplace_back(std::move(pool)); + } + void eid_completed(u64 eid) { while (!m_eid_map.empty()) diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 199c0315ef..728b39bf1a 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -42,6 +42,7 @@ namespace rsx //Base for resources with reference counting class ref_counted { + protected: atomic_t ref_count{ 0 }; // References held atomic_t idle_time{ 0 }; // Number of times the resource has been tagged idle diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index df6f306585..4fda1c4b34 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -34,6 +34,7 @@ + @@ -56,6 +57,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 91db33afcc..1e758287bd 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -20,6 +20,7 @@ + @@ -42,5 +43,6 @@ + - + \ No newline at end of file