vk: Reimplement GPU query management

This commit is contained in:
kd-11 2020-10-04 20:50:45 +03:00 committed by kd-11
parent 2a258dc21e
commit d5f7e7b179
11 changed files with 307 additions and 193 deletions

View File

@ -443,6 +443,7 @@ if(TARGET 3rdparty_vulkan)
RSX/VK/VKMemAlloc.cpp
RSX/VK/VKPresent.cpp
RSX/VK/VKProgramPipeline.cpp
RSX/VK/VKQueryPool.cpp
RSX/VK/VKRenderPass.cpp
RSX/VK/VKResolveHelper.cpp
RSX/VK/VKResourceManager.cpp

View File

@ -943,14 +943,14 @@ void VKGSRender::end()
if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task)
{
u32 occlusion_id = m_occlusion_query_pool.find_free_slot(*m_current_command_buffer);
u32 occlusion_id = m_occlusion_query_manager->allocate_query(*m_current_command_buffer);
if (occlusion_id == UINT32_MAX)
{
// Force flush
rsx_log.error("[Performance Warning] Out of free occlusion slots. Forcing hard sync.");
ZCULL_control::sync(this);
occlusion_id = m_occlusion_query_pool.find_free_slot(*m_current_command_buffer);
occlusion_id = m_occlusion_query_manager->allocate_query(*m_current_command_buffer);
if (occlusion_id == UINT32_MAX)
{
//rsx_log.error("Occlusion pool overflow");
@ -959,7 +959,7 @@ void VKGSRender::end()
}
// Begin query
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
m_occlusion_query_manager->begin_query(*m_current_command_buffer, occlusion_id);
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
data.indices.push_back(occlusion_id);

View File

@ -408,7 +408,7 @@ VKGSRender::VKGSRender() : GSRender()
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
//Occlusion
m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE);
m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE);
m_occlusion_map.resize(occlusion_query_count);
for (u32 n = 0; n < occlusion_query_count; ++n)
@ -519,8 +519,6 @@ VKGSRender::VKGSRender() : GSRender()
vk::get_overlay_pass<vk::ui_overlay_renderer>()->init(*m_current_command_buffer, m_texture_upload_buffer_ring_info);
m_occlusion_query_pool.initialize(*m_current_command_buffer);
if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only)
{
m_shader_interpreter.init(*m_device);
@ -625,7 +623,7 @@ VKGSRender::~VKGSRender()
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
//Queries
m_occlusion_query_pool.destroy();
m_occlusion_query_manager.reset();
m_cond_render_buffer.reset();
//Command buffer
@ -1965,7 +1963,7 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
@ -2267,7 +2265,7 @@ void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
{
// End query
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
@ -2291,7 +2289,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
return false;
u32 oldest = data.indices.front();
return m_occlusion_query_pool.check_query_status(oldest);
return m_occlusion_query_manager->check_query_status(oldest);
}
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
@ -2322,7 +2320,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
for (const auto occlusion_id : data.indices)
{
// We only need one hit
if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id))
if (auto value = m_occlusion_query_manager->get_query_result(occlusion_id))
{
query->result = 1;
break;
@ -2330,7 +2328,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
}
}
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_query_manager->free_queries(*m_current_command_buffer, data.indices);
data.indices.clear();
}
@ -2345,7 +2343,7 @@ void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* que
if (data.indices.empty())
return;
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_query_manager->free_queries(*m_current_command_buffer, data.indices);
data.indices.clear();
}
@ -2356,7 +2354,7 @@ void VKGSRender::emergency_query_cleanup(vk::command_buffer* commands)
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
m_occlusion_query_manager->end_query(*m_current_command_buffer, open_query);
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
}
@ -2427,7 +2425,7 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
if (query_info.indices.size() == 1)
{
const auto& index = query_info.indices.front();
m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage,
@ -2457,7 +2455,7 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
for (const auto& index : query_info.indices)
{
m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset);
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset);
dst_offset += 4;
}
}

View File

@ -9,6 +9,7 @@
#include "VKProgramBuffer.h"
#include "VKFramebuffer.h"
#include "VKShaderInterpreter.h"
#include "VKQueryPool.h"
#include "../GCM.h"
#include <thread>
@ -397,7 +398,7 @@ private:
//Vulkan internals
vk::command_pool m_command_buffer_pool;
vk::occlusion_query_pool m_occlusion_query_pool;
std::unique_ptr<vk::query_pool_manager> m_occlusion_query_manager;
bool m_occlusion_query_active = false;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::vector<vk::occlusion_data> m_occlusion_map;

View File

@ -3224,193 +3224,33 @@ public:
}
};
class occlusion_query_pool
class query_pool : public rsx::ref_counted
{
struct query_slot_info
{
bool any_passed;
bool active;
bool ready;
};
VkQueryPool query_pool = VK_NULL_HANDLE;
vk::render_device* owner = nullptr;
std::deque<u32> available_slots;
std::vector<query_slot_info> query_slot_status;
inline bool poke_query(query_slot_info& query, u32 index, VkQueryResultFlags flags)
{
// Query is ready if:
// 1. Any sample has been determined to have passed the Z test
// 2. The backend has fully processed the query and found no hits
u32 result[2] = { 0, 0 };
switch (const auto error = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))
{
case VK_SUCCESS:
{
if (result[0])
{
query.any_passed = true;
query.ready = true;
return true;
}
else if (result[1])
{
query.any_passed = false;
query.ready = true;
return true;
}
return false;
}
case VK_NOT_READY:
{
if (result[0])
{
query.any_passed = true;
query.ready = true;
return true;
}
return false;
}
default:
die_with_error(HERE, error);
return false;
}
}
VkQueryPool m_query_pool;
VkDevice m_device;
public:
void create(vk::render_device &dev, u32 num_entries)
query_pool(VkDevice dev, VkQueryType type, u32 size)
: m_query_pool(VK_NULL_HANDLE), m_device(dev)
{
VkQueryPoolCreateInfo info = {};
VkQueryPoolCreateInfo info{};
info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
info.queryType = VK_QUERY_TYPE_OCCLUSION;
info.queryCount = num_entries;
info.queryType = type;
info.queryCount = size;
vkCreateQueryPool(dev, &info, nullptr, &m_query_pool);
CHECK_RESULT(vkCreateQueryPool(dev, &info, nullptr, &query_pool));
owner = &dev;
// From spec: "After query pool creation, each query must be reset before it is used."
query_slot_status.resize(num_entries, {});
// Take 'size' references on this object
ref_count.release(static_cast<s32>(size));
}
void destroy()
~query_pool()
{
if (query_pool)
{
vkDestroyQueryPool(*owner, query_pool, nullptr);
owner = nullptr;
query_pool = VK_NULL_HANDLE;
}
vkDestroyQueryPool(m_device, m_query_pool, nullptr);
}
void initialize(vk::command_buffer &cmd)
operator VkQueryPool()
{
const u32 count = ::size32(query_slot_status);
vkCmdResetQueryPool(cmd, query_pool, 0, count);
query_slot_info value{};
std::fill(query_slot_status.begin(), query_slot_status.end(), value);
for (u32 n = 0; n < count; ++n)
{
available_slots.push_back(n);
}
}
void begin_query(vk::command_buffer &cmd, u32 index)
{
verify(HERE), query_slot_status[index].active == false;
vkCmdBeginQuery(cmd, query_pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT);
query_slot_status[index].active = true;
}
void end_query(vk::command_buffer &cmd, u32 index)
{
vkCmdEndQuery(cmd, query_pool, index);
}
bool check_query_status(u32 index)
{
return poke_query(query_slot_status[index], index, VK_QUERY_RESULT_PARTIAL_BIT);
}
u32 get_query_result(u32 index)
{
// Check for cached result
auto& query_info = query_slot_status[index];
while (!query_info.ready)
{
poke_query(query_info, index, VK_QUERY_RESULT_PARTIAL_BIT);
}
return query_info.any_passed ? 1 : 0;
}
void get_query_result_indirect(vk::command_buffer &cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset)
{
vkCmdCopyQueryPoolResults(cmd, query_pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT);
}
void reset_query(vk::command_buffer &/*cmd*/, u32 index)
{
if (query_slot_status[index].active)
{
// Actual reset is handled later on demand
available_slots.push_back(index);
}
}
template<template<class> class _List>
void reset_queries(vk::command_buffer &cmd, _List<u32> &list)
{
for (const auto index : list)
reset_query(cmd, index);
}
void reset_all(vk::command_buffer &cmd)
{
for (u32 n = 0; n < query_slot_status.size(); n++)
{
if (query_slot_status[n].active)
reset_query(cmd, n);
}
}
u32 find_free_slot(vk::command_buffer& cmd)
{
if (available_slots.empty())
{
return ~0u;
}
const u32 result = available_slots.front();
if (query_slot_status[result].active)
{
// Trigger reset if round robin allocation has gone back to the first item
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
// At this point, the first available slot is not reset which means they're all active
for (auto It = available_slots.cbegin(); It != available_slots.cend(); ++It)
{
const auto index = *It;
vkCmdResetQueryPool(cmd, query_pool, index, 1);
query_slot_status[index] = {};
}
}
available_slots.pop_front();
return result;
return m_query_pool;
}
};

View File

@ -0,0 +1,209 @@
#include "stdafx.h"
#include "VKQueryPool.h"
#include "VKResourceManager.h"
namespace vk
{
inline bool query_pool_manager::poke_query(query_slot_info& query, u32 index, VkQueryResultFlags flags)
{
// Query is ready if:
// 1. Any sample has been determined to have passed the Z test
// 2. The backend has fully processed the query and found no hits
u32 result[2] = { 0, 0 };
switch (const auto error = vkGetQueryPoolResults(*owner, *query.pool, index, 1, 8, result, 8, flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))
{
case VK_SUCCESS:
{
if (result[0])
{
query.any_passed = true;
query.ready = true;
return true;
}
else if (result[1])
{
query.any_passed = false;
query.ready = true;
return true;
}
return false;
}
case VK_NOT_READY:
{
if (result[0])
{
query.any_passed = true;
query.ready = true;
return true;
}
return false;
}
default:
die_with_error(HERE, error);
return false;
}
}
query_pool_manager::query_pool_manager(vk::render_device& dev, VkQueryType type, u32 num_entries)
{
verify(HERE), num_entries > 0;
owner = &dev;
query_type = type;
query_slot_status.resize(num_entries, {});
for (unsigned i = 0; i < num_entries; ++i)
{
m_available_slots.push_back(i);
}
}
query_pool_manager::~query_pool_manager()
{
if (m_current_query_pool)
{
m_current_query_pool.reset();
owner = nullptr;
}
}
void query_pool_manager::allocate_new_pool(vk::command_buffer& cmd)
{
verify(HERE), !m_current_query_pool;
const u32 count = ::size32(query_slot_status);
m_current_query_pool = std::make_unique<query_pool>(*owner, query_type, count);
// From spec: "After query pool creation, each query must be reset before it is used."
vkCmdResetQueryPool(cmd, *m_current_query_pool.get(), 0, count);
m_pool_lifetime_counter = count;
}
void query_pool_manager::reallocate_pool(vk::command_buffer& cmd)
{
if (m_current_query_pool)
{
if (!m_current_query_pool->has_refs())
{
vk::get_resource_manager()->dispose(m_current_query_pool);
}
else
{
m_consumed_pools.emplace_back(std::move(m_current_query_pool));
// Sanity check
if (m_consumed_pools.size() > 3)
{
rsx_log.error("[Robustness warning] Query pool discard pile size is now %llu. Are we leaking??", m_consumed_pools.size());
}
}
}
allocate_new_pool(cmd);
}
void query_pool_manager::run_pool_cleanup()
{
for (auto It = m_consumed_pools.begin(); It != m_consumed_pools.end();)
{
if (!(*It)->has_refs())
{
vk::get_resource_manager()->dispose(*It);
It = m_consumed_pools.erase(It);
}
else
{
It++;
}
}
}
void query_pool_manager::begin_query(vk::command_buffer& cmd, u32 index)
{
verify(HERE), query_slot_status[index].active == false;
auto& query_info = query_slot_status[index];
query_info.pool = m_current_query_pool.get();
query_info.active = true;
vkCmdBeginQuery(cmd, *query_info.pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT);
}
void query_pool_manager::end_query(vk::command_buffer& cmd, u32 index)
{
vkCmdEndQuery(cmd, *query_slot_status[index].pool, index);
}
bool query_pool_manager::check_query_status(u32 index)
{
return poke_query(query_slot_status[index], index, VK_QUERY_RESULT_PARTIAL_BIT);
}
u32 query_pool_manager::get_query_result(u32 index)
{
// Check for cached result
auto& query_info = query_slot_status[index];
while (!query_info.ready)
{
poke_query(query_info, index, VK_QUERY_RESULT_PARTIAL_BIT);
}
return query_info.any_passed ? 1 : 0;
}
void query_pool_manager::get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset)
{
vkCmdCopyQueryPoolResults(cmd, *query_slot_status[index].pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT);
}
void query_pool_manager::free_query(vk::command_buffer&/*cmd*/, u32 index)
{
// Release reference and discard
auto& query = query_slot_status[index];
verify(HERE), query.active;
query.pool->release();
if (!query.pool->has_refs())
{
// No more refs held, remove if in discard pile
run_pool_cleanup();
}
query = {};
m_available_slots.push_back(index);
}
u32 query_pool_manager::allocate_query(vk::command_buffer& cmd)
{
if (!m_pool_lifetime_counter)
{
// Pool is exhaused, create a new one
// This is basically a driver-level pool reset without synchronization
// TODO: Alternatively, use VK_EXT_host_pool_reset to reset an old pool with no references and swap that in
if (vk::is_renderpass_open(cmd))
{
vk::end_renderpass(cmd);
}
reallocate_pool(cmd);
}
if (!m_available_slots.empty())
{
m_pool_lifetime_counter--;
const auto result = m_available_slots.front();
m_available_slots.pop_front();
return result;
}
return ~0u;
}
}

View File

@ -0,0 +1,53 @@
#pragma once
#include "VKHelpers.h"
namespace vk
{
class query_pool_manager
{
struct query_slot_info
{
query_pool* pool;
bool any_passed;
bool active;
bool ready;
};
std::vector<std::unique_ptr<query_pool>> m_consumed_pools;
std::unique_ptr<query_pool> m_current_query_pool;
std::deque<u32> m_available_slots;
u32 m_pool_lifetime_counter = 0;
VkQueryType query_type = VK_QUERY_TYPE_OCCLUSION;
vk::render_device* owner = nullptr;
std::vector<query_slot_info> query_slot_status;
bool poke_query(query_slot_info& query, u32 index, VkQueryResultFlags flags);
void allocate_new_pool(vk::command_buffer& cmd);
void reallocate_pool(vk::command_buffer& cmd);
void run_pool_cleanup();
public:
query_pool_manager(vk::render_device& dev, VkQueryType type, u32 num_entries);
~query_pool_manager();
void begin_query(vk::command_buffer& cmd, u32 index);
void end_query(vk::command_buffer& cmd, u32 index);
bool check_query_status(u32 index);
u32 get_query_result(u32 index);
void get_query_result_indirect(vk::command_buffer& cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset);
u32 allocate_query(vk::command_buffer& cmd);
void free_query(vk::command_buffer&/*cmd*/, u32 index);
template<template<class> class _List>
void free_queries(vk::command_buffer& cmd, _List<u32>& list)
{
for (const auto index : list)
{
free_query(cmd, index);
}
}
};
};

View File

@ -15,6 +15,7 @@ namespace vk
std::vector<std::unique_ptr<vk::image_view>> m_disposed_image_views;
std::vector<std::unique_ptr<vk::image>> m_disposed_images;
std::vector<std::unique_ptr<vk::event>> m_disposed_events;
std::vector<std::unique_ptr<vk::query_pool>> m_disposed_query_pools;
eid_scope_t(u64 _eid):
eid(_eid), m_device(vk::get_current_renderer())
@ -31,6 +32,7 @@ namespace vk
m_disposed_events.clear();
m_disposed_image_views.clear();
m_disposed_images.clear();
m_disposed_query_pools.clear();
}
};
@ -148,6 +150,11 @@ namespace vk
event = VK_NULL_HANDLE;
}
void dispose(std::unique_ptr<vk::query_pool>& pool)
{
get_current_eid_scope().m_disposed_query_pools.emplace_back(std::move(pool));
}
void eid_completed(u64 eid)
{
while (!m_eid_map.empty())

View File

@ -42,6 +42,7 @@ namespace rsx
//Base for resources with reference counting
class ref_counted
{
protected:
atomic_t<s32> ref_count{ 0 }; // References held
atomic_t<u8> idle_time{ 0 }; // Number of times the resource has been tagged idle

View File

@ -34,6 +34,7 @@
<ClInclude Include="Emu\RSX\VK\VKHelpers.h" />
<ClInclude Include="Emu\RSX\VK\VKOverlays.h" />
<ClInclude Include="Emu\RSX\VK\VKProgramBuffer.h" />
<ClInclude Include="Emu\RSX\VK\VKQueryPool.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderPass.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderTargets.h" />
<ClInclude Include="Emu\RSX\VK\VKResolveHelper.h" />
@ -56,6 +57,7 @@
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPresent.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResourceManager.cpp" />

View File

@ -20,6 +20,7 @@
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
<ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
@ -42,5 +43,6 @@
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
<ClInclude Include="Emu\RSX\VK\VKQueryPool.h" />
</ItemGroup>
</Project>
</Project>