vk: Optimize occlusion pool management

- Do not consume a slot every draw call, instead batch as many draws as possible
- Since renderpasses are dispatched per-draw-clause, keeping occlusion queries outside the renderpasses works fine
- If renderpasses are reorganized, occlusion tasks will have to be reorganized again
This commit is contained in:
kd-11 2019-06-19 00:26:25 +03:00 committed by kd-11
parent 1ee675e1f4
commit 8249d51aa8
3 changed files with 62 additions and 50 deletions

View File

@ -473,7 +473,9 @@ VKGSRender::VKGSRender() : GSRender()
//Occlusion
m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE);
for (int n = 0; n < 128; ++n)
m_occlusion_map.resize(occlusion_query_count);
for (int n = 0; n < occlusion_query_count; ++n)
m_occlusion_query_data[n].driver_handle = n;
//Generate frame contexts
@ -1667,10 +1669,9 @@ void VKGSRender::end()
m_textures_upload_time += m_profiler.duration();
u32 occlusion_id = 0;
if (m_occlusion_query_active)
if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task)
{
occlusion_id = m_occlusion_query_pool.find_free_slot();
u32 occlusion_id = m_occlusion_query_pool.find_free_slot();
if (occlusion_id == UINT32_MAX)
{
m_tsc += 100;
@ -1683,21 +1684,21 @@ void VKGSRender::end()
if (m_current_task) m_current_task->result = 1;
}
}
// Begin query
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
data.indices.push_back(occlusion_id);
data.command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
}
bool primitive_emulated = false;
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{
//Begin query
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
}
// Apply write memory barriers
if (true)//g_cfg.video.strict_rendering_mode)
{
@ -1768,12 +1769,6 @@ void VKGSRender::end()
close_render_pass();
vk::leave_uninterruptible();
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{
//End query
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
}
m_rtts.on_write();
rsx::thread::end();
@ -2126,7 +2121,6 @@ void VKGSRender::clear_surface(u32 mask)
void VKGSRender::flush_command_queue(bool hard_sync)
{
rsx::g_dma_manager.sync();
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
if (hard_sync)
@ -2165,6 +2159,11 @@ void VKGSRender::flush_command_queue(bool hard_sync)
check_present_status();
}
if (m_occlusion_query_active)
{
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
}
open_command_buffer();
}
@ -2781,6 +2780,9 @@ void VKGSRender::write_buffers()
void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags)
{
// Wait before sync block below
rsx::g_dma_manager.sync();
if (m_attrib_ring_info.dirty() ||
m_fragment_env_ring_info.dirty() ||
m_vertex_env_ring_info.dirty() ||
@ -2810,6 +2812,13 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
}
// End open queries. Flags will be automatically reset by the submit routine
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
}
m_current_command_buffer->end();
m_current_command_buffer->tag();
@ -3473,18 +3482,33 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
{
verify(HERE), !m_occlusion_query_active;
query->result = 0;
//query->sync_timestamp = get_system_time();
m_active_query_info = query;
m_occlusion_query_active = true;
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
}
void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
{
m_occlusion_query_active = false;
m_active_query_info = nullptr;
verify(HERE), query == m_active_query_info;
// NOTE: flushing the queue is very expensive, do not flush just because query stopped
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
{
// End query
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
// Clear occlusion load flag
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
m_occlusion_query_active = false;
m_active_query_info = nullptr;
}
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
@ -3492,11 +3516,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
if (!query->num_draws)
return true;
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return true;
auto &data = found->second;
auto &data = m_occlusion_map[query->driver_handle];
if (data.indices.empty())
return true;
@ -3522,11 +3542,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
{
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return;
auto &data = found->second;
auto &data = m_occlusion_map[query->driver_handle];
if (data.indices.empty())
return;
@ -3561,27 +3577,22 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
}
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle);
data.indices.clear();
}
void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
{
if (m_active_query_info == query)
{
m_occlusion_query_active = false;
m_active_query_info = nullptr;
end_occlusion_query(query);
}
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return;
auto &data = found->second;
auto &data = m_occlusion_map[query->driver_handle];
if (data.indices.empty())
return;
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle);
data.indices.clear();
}
bool VKGSRender::on_decompiler_task()

View File

@ -400,7 +400,7 @@ private:
vk::occlusion_query_pool m_occlusion_query_pool;
bool m_occlusion_query_active = false;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::unordered_map<u32, occlusion_data> m_occlusion_map;
std::vector<occlusion_data> m_occlusion_map;
shared_mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;

View File

@ -891,7 +891,9 @@ private:
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4
cb_has_dma_transfer = 4,
cb_has_open_query = 8,
cb_load_occluson_task = 16
};
u32 flags = 0;
@ -2725,7 +2727,7 @@ public:
VkQueryPool query_pool = VK_NULL_HANDLE;
vk::render_device* owner = nullptr;
std::deque<u32> available_slots;
std::stack<u32> available_slots;
std::vector<bool> query_active_status;
public:
@ -2740,11 +2742,10 @@ public:
owner = &dev;
query_active_status.resize(num_entries, false);
available_slots.resize(num_entries);
for (u32 n = 0; n < num_entries; ++n)
{
available_slots[n] = n;
available_slots.push(n);
}
}
@ -2807,7 +2808,7 @@ public:
vkCmdResetQueryPool(cmd, query_pool, index, 1);
query_active_status[index] = false;
available_slots.push_back(index);
available_slots.push(index);
}
}
@ -2834,8 +2835,8 @@ public:
return ~0u;
}
u32 result = available_slots.front();
available_slots.pop_front();
u32 result = available_slots.top();
available_slots.pop();
verify(HERE), !query_active_status[result];
return result;