mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 08:11:51 +00:00
vk: Optimize occlusion pool management
- Do not consume a slot every draw call, instead batch as many draws as possible - Since renderpasses are dispatched per-draw-clause, keeping occlusion queries outside the renderpasses works fine - If renderpasses are reorganized, occlusion tasks will have to be reorganized again
This commit is contained in:
parent
1ee675e1f4
commit
8249d51aa8
@ -473,7 +473,9 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
|
||||
//Occlusion
|
||||
m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE);
|
||||
for (int n = 0; n < 128; ++n)
|
||||
m_occlusion_map.resize(occlusion_query_count);
|
||||
|
||||
for (int n = 0; n < occlusion_query_count; ++n)
|
||||
m_occlusion_query_data[n].driver_handle = n;
|
||||
|
||||
//Generate frame contexts
|
||||
@ -1667,10 +1669,9 @@ void VKGSRender::end()
|
||||
|
||||
m_textures_upload_time += m_profiler.duration();
|
||||
|
||||
u32 occlusion_id = 0;
|
||||
if (m_occlusion_query_active)
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task)
|
||||
{
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
u32 occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
m_tsc += 100;
|
||||
@ -1683,21 +1684,21 @@ void VKGSRender::end()
|
||||
if (m_current_task) m_current_task->result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Begin query
|
||||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||
|
||||
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
|
||||
data.indices.push_back(occlusion_id);
|
||||
data.command_buffer_to_wait = m_current_command_buffer;
|
||||
|
||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
||||
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
|
||||
}
|
||||
|
||||
bool primitive_emulated = false;
|
||||
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
||||
|
||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||
{
|
||||
//Begin query
|
||||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
|
||||
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
||||
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
|
||||
}
|
||||
|
||||
// Apply write memory barriers
|
||||
if (true)//g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
@ -1768,12 +1769,6 @@ void VKGSRender::end()
|
||||
close_render_pass();
|
||||
vk::leave_uninterruptible();
|
||||
|
||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||
{
|
||||
//End query
|
||||
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
|
||||
}
|
||||
|
||||
m_rtts.on_write();
|
||||
|
||||
rsx::thread::end();
|
||||
@ -2126,7 +2121,6 @@ void VKGSRender::clear_surface(u32 mask)
|
||||
|
||||
void VKGSRender::flush_command_queue(bool hard_sync)
|
||||
{
|
||||
rsx::g_dma_manager.sync();
|
||||
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
|
||||
|
||||
if (hard_sync)
|
||||
@ -2165,6 +2159,11 @@ void VKGSRender::flush_command_queue(bool hard_sync)
|
||||
check_present_status();
|
||||
}
|
||||
|
||||
if (m_occlusion_query_active)
|
||||
{
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
|
||||
}
|
||||
|
||||
open_command_buffer();
|
||||
}
|
||||
|
||||
@ -2781,6 +2780,9 @@ void VKGSRender::write_buffers()
|
||||
|
||||
void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags)
|
||||
{
|
||||
// Wait before sync block below
|
||||
rsx::g_dma_manager.sync();
|
||||
|
||||
if (m_attrib_ring_info.dirty() ||
|
||||
m_fragment_env_ring_info.dirty() ||
|
||||
m_vertex_env_ring_info.dirty() ||
|
||||
@ -2810,6 +2812,13 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
|
||||
VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
}
|
||||
|
||||
// End open queries. Flags will be automatically reset by the submit routine
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
|
||||
{
|
||||
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
|
||||
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
|
||||
}
|
||||
|
||||
m_current_command_buffer->end();
|
||||
m_current_command_buffer->tag();
|
||||
|
||||
@ -3473,18 +3482,33 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
||||
|
||||
void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
verify(HERE), !m_occlusion_query_active;
|
||||
|
||||
query->result = 0;
|
||||
//query->sync_timestamp = get_system_time();
|
||||
m_active_query_info = query;
|
||||
m_occlusion_query_active = true;
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_load_occluson_task;
|
||||
}
|
||||
|
||||
void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
m_occlusion_query_active = false;
|
||||
m_active_query_info = nullptr;
|
||||
verify(HERE), query == m_active_query_info;
|
||||
|
||||
// NOTE: flushing the queue is very expensive, do not flush just because query stopped
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_open_query)
|
||||
{
|
||||
// End query
|
||||
auto open_query = m_occlusion_map[m_active_query_info->driver_handle].indices.back();
|
||||
m_occlusion_query_pool.end_query(*m_current_command_buffer, open_query);
|
||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
|
||||
}
|
||||
|
||||
// Clear occlusion load flag
|
||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
||||
|
||||
m_occlusion_query_active = false;
|
||||
m_active_query_info = nullptr;
|
||||
}
|
||||
|
||||
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
||||
@ -3492,11 +3516,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
|
||||
if (!query->num_draws)
|
||||
return true;
|
||||
|
||||
auto found = m_occlusion_map.find(query->driver_handle);
|
||||
if (found == m_occlusion_map.end())
|
||||
return true;
|
||||
|
||||
auto &data = found->second;
|
||||
auto &data = m_occlusion_map[query->driver_handle];
|
||||
if (data.indices.empty())
|
||||
return true;
|
||||
|
||||
@ -3522,11 +3542,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
|
||||
|
||||
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
auto found = m_occlusion_map.find(query->driver_handle);
|
||||
if (found == m_occlusion_map.end())
|
||||
return;
|
||||
|
||||
auto &data = found->second;
|
||||
auto &data = m_occlusion_map[query->driver_handle];
|
||||
if (data.indices.empty())
|
||||
return;
|
||||
|
||||
@ -3561,27 +3577,22 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
|
||||
}
|
||||
|
||||
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
|
||||
m_occlusion_map.erase(query->driver_handle);
|
||||
data.indices.clear();
|
||||
}
|
||||
|
||||
void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
if (m_active_query_info == query)
|
||||
{
|
||||
m_occlusion_query_active = false;
|
||||
m_active_query_info = nullptr;
|
||||
end_occlusion_query(query);
|
||||
}
|
||||
|
||||
auto found = m_occlusion_map.find(query->driver_handle);
|
||||
if (found == m_occlusion_map.end())
|
||||
return;
|
||||
|
||||
auto &data = found->second;
|
||||
auto &data = m_occlusion_map[query->driver_handle];
|
||||
if (data.indices.empty())
|
||||
return;
|
||||
|
||||
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
|
||||
m_occlusion_map.erase(query->driver_handle);
|
||||
data.indices.clear();
|
||||
}
|
||||
|
||||
bool VKGSRender::on_decompiler_task()
|
||||
|
@ -400,7 +400,7 @@ private:
|
||||
vk::occlusion_query_pool m_occlusion_query_pool;
|
||||
bool m_occlusion_query_active = false;
|
||||
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
|
||||
std::unordered_map<u32, occlusion_data> m_occlusion_map;
|
||||
std::vector<occlusion_data> m_occlusion_map;
|
||||
|
||||
shared_mutex m_secondary_cb_guard;
|
||||
vk::command_pool m_secondary_command_buffer_pool;
|
||||
|
@ -891,7 +891,9 @@ private:
|
||||
{
|
||||
cb_has_occlusion_task = 1,
|
||||
cb_has_blit_transfer = 2,
|
||||
cb_has_dma_transfer = 4
|
||||
cb_has_dma_transfer = 4,
|
||||
cb_has_open_query = 8,
|
||||
cb_load_occluson_task = 16
|
||||
};
|
||||
u32 flags = 0;
|
||||
|
||||
@ -2725,7 +2727,7 @@ public:
|
||||
VkQueryPool query_pool = VK_NULL_HANDLE;
|
||||
vk::render_device* owner = nullptr;
|
||||
|
||||
std::deque<u32> available_slots;
|
||||
std::stack<u32> available_slots;
|
||||
std::vector<bool> query_active_status;
|
||||
public:
|
||||
|
||||
@ -2740,11 +2742,10 @@ public:
|
||||
owner = &dev;
|
||||
|
||||
query_active_status.resize(num_entries, false);
|
||||
available_slots.resize(num_entries);
|
||||
|
||||
for (u32 n = 0; n < num_entries; ++n)
|
||||
{
|
||||
available_slots[n] = n;
|
||||
available_slots.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2807,7 +2808,7 @@ public:
|
||||
vkCmdResetQueryPool(cmd, query_pool, index, 1);
|
||||
|
||||
query_active_status[index] = false;
|
||||
available_slots.push_back(index);
|
||||
available_slots.push(index);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2834,8 +2835,8 @@ public:
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
u32 result = available_slots.front();
|
||||
available_slots.pop_front();
|
||||
u32 result = available_slots.top();
|
||||
available_slots.pop();
|
||||
|
||||
verify(HERE), !query_active_status[result];
|
||||
return result;
|
||||
|
Loading…
Reference in New Issue
Block a user