mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-29 22:20:48 +00:00
rsx: Zcull synchronization tuning
- Also fixes a bug where sync_hint would erroneously update the sync tag even for old lookups (e.g conditional render using older query)
This commit is contained in:
parent
fdb638436f
commit
ed2bdb8e0c
@ -2220,9 +2220,9 @@ namespace rsx
|
|||||||
//verify (HERE), async_tasks_pending.load() == 0;
|
//verify (HERE), async_tasks_pending.load() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::sync_hint(FIFO_hint /*hint*/, void* /*args*/)
|
void thread::sync_hint(FIFO_hint /*hint*/, void* args)
|
||||||
{
|
{
|
||||||
zcull_ctrl->on_sync_hint();
|
zcull_ctrl->on_sync_hint(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::flush_fifo()
|
void thread::flush_fifo()
|
||||||
@ -2639,7 +2639,7 @@ namespace rsx
|
|||||||
end_occlusion_query(m_current_task);
|
end_occlusion_query(m_current_task);
|
||||||
m_current_task->active = false;
|
m_current_task->active = false;
|
||||||
m_current_task->pending = true;
|
m_current_task->pending = true;
|
||||||
m_current_task->sync_tag = ++m_timer;
|
m_current_task->sync_tag = m_timer++;
|
||||||
m_current_task->timestamp = m_tsc;
|
m_current_task->timestamp = m_tsc;
|
||||||
|
|
||||||
m_pending_writes.push_back({});
|
m_pending_writes.push_back({});
|
||||||
@ -2670,7 +2670,7 @@ namespace rsx
|
|||||||
m_current_task->active = false;
|
m_current_task->active = false;
|
||||||
m_current_task->pending = true;
|
m_current_task->pending = true;
|
||||||
m_current_task->timestamp = m_tsc;
|
m_current_task->timestamp = m_tsc;
|
||||||
m_current_task->sync_tag = ++m_timer;
|
m_current_task->sync_tag = m_timer++;
|
||||||
m_pending_writes.back().query = m_current_task;
|
m_pending_writes.back().query = m_current_task;
|
||||||
|
|
||||||
allocate_new_query(ptimer);
|
allocate_new_query(ptimer);
|
||||||
@ -2800,12 +2800,16 @@ namespace rsx
|
|||||||
void ZCULL_control::on_draw()
|
void ZCULL_control::on_draw()
|
||||||
{
|
{
|
||||||
if (m_current_task)
|
if (m_current_task)
|
||||||
|
{
|
||||||
m_current_task->num_draws++;
|
m_current_task->num_draws++;
|
||||||
|
m_current_task->sync_tag = m_timer++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::on_sync_hint()
|
void ZCULL_control::on_sync_hint(void* args)
|
||||||
{
|
{
|
||||||
m_sync_tag = ++m_timer;
|
auto query = static_cast<occlusion_query_info*>(args);
|
||||||
|
m_sync_tag = std::max(m_sync_tag, query->sync_tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value)
|
void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value)
|
||||||
@ -2974,7 +2978,7 @@ namespace rsx
|
|||||||
if (It->query->num_draws && It->query->sync_tag > m_sync_tag)
|
if (It->query->num_draws && It->query->sync_tag > m_sync_tag)
|
||||||
{
|
{
|
||||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query);
|
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query);
|
||||||
verify(HERE), It->query->sync_tag < m_sync_tag;
|
verify(HERE), It->query->sync_tag <= m_sync_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -2991,20 +2995,19 @@ namespace rsx
|
|||||||
// Schedule ahead
|
// Schedule ahead
|
||||||
m_next_tsc = m_tsc + min_zcull_tick_us;
|
m_next_tsc = m_tsc + min_zcull_tick_us;
|
||||||
|
|
||||||
#if 0
|
|
||||||
// Schedule a queue flush if needed
|
// Schedule a queue flush if needed
|
||||||
if (front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag)
|
if (!g_cfg.video.relaxed_zcull_sync &&
|
||||||
|
front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag)
|
||||||
{
|
{
|
||||||
const auto elapsed = m_tsc - front.query->timestamp;
|
const auto elapsed = m_tsc - front.query->timestamp;
|
||||||
if (elapsed > max_zcull_delay_us)
|
if (elapsed > max_zcull_delay_us)
|
||||||
{
|
{
|
||||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast<uintptr_t>(front.query));
|
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, front.query);
|
||||||
verify(HERE), front.query->sync_tag < m_sync_tag;
|
verify(HERE), front.query->sync_tag <= m_sync_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3176,7 +3179,7 @@ namespace rsx
|
|||||||
if (UNLIKELY(query->sync_tag > m_sync_tag))
|
if (UNLIKELY(query->sync_tag > m_sync_tag))
|
||||||
{
|
{
|
||||||
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query);
|
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query);
|
||||||
verify(HERE), m_sync_tag > query->sync_tag;
|
verify(HERE), m_sync_tag >= query->sync_tag;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -375,9 +375,8 @@ namespace rsx
|
|||||||
struct ZCULL_control
|
struct ZCULL_control
|
||||||
{
|
{
|
||||||
// Delay before a report update operation is forced to retire
|
// Delay before a report update operation is forced to retire
|
||||||
const u32 max_zcull_delay_us = 4000;
|
const u32 max_zcull_delay_us = 300;
|
||||||
const u32 min_zcull_delay_us = 50;
|
const u32 min_zcull_tick_us = 100;
|
||||||
const u32 min_zcull_tick_us = 500;
|
|
||||||
|
|
||||||
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
||||||
const u32 occlusion_query_count = 1024;
|
const u32 occlusion_query_count = 1024;
|
||||||
@ -437,7 +436,7 @@ namespace rsx
|
|||||||
void on_draw();
|
void on_draw();
|
||||||
|
|
||||||
// Sync hint notification
|
// Sync hint notification
|
||||||
void on_sync_hint();
|
void on_sync_hint(void* args);
|
||||||
|
|
||||||
// Check for pending writes
|
// Check for pending writes
|
||||||
bool has_pending() const { return !m_pending_writes.empty(); }
|
bool has_pending() const { return !m_pending_writes.empty(); }
|
||||||
|
@ -1784,7 +1784,7 @@ void VKGSRender::end()
|
|||||||
|
|
||||||
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
|
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
|
||||||
data.indices.push_back(occlusion_id);
|
data.indices.push_back(occlusion_id);
|
||||||
data.command_buffer_to_wait = m_current_command_buffer;
|
data.set_sync_command_buffer(m_current_command_buffer);
|
||||||
|
|
||||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
||||||
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
|
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
|
||||||
@ -2232,7 +2232,9 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
|
|||||||
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
|
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
|
||||||
auto& data = m_occlusion_map[occlusion_info->driver_handle];
|
auto& data = m_occlusion_map[occlusion_info->driver_handle];
|
||||||
|
|
||||||
if (data.command_buffer_to_wait != m_current_command_buffer || data.indices.empty())
|
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
|
||||||
|
// This is caused by async compiler and should be removed when ubershaders are added in
|
||||||
|
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Occlusion test result evaluation is coming up, avoid a hard sync
|
// Occlusion test result evaluation is coming up, avoid a hard sync
|
||||||
@ -3707,7 +3709,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
|
|||||||
if (data.indices.empty())
|
if (data.indices.empty())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (data.command_buffer_to_wait == m_current_command_buffer)
|
if (data.is_current(m_current_command_buffer))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
u32 oldest = data.indices.front();
|
u32 oldest = data.indices.front();
|
||||||
@ -3722,7 +3724,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
|
|||||||
|
|
||||||
if (query->num_draws)
|
if (query->num_draws)
|
||||||
{
|
{
|
||||||
if (data.command_buffer_to_wait == m_current_command_buffer)
|
if (data.is_current(m_current_command_buffer))
|
||||||
{
|
{
|
||||||
std::lock_guard lock(m_flush_queue_mutex);
|
std::lock_guard lock(m_flush_queue_mutex);
|
||||||
flush_command_queue();
|
flush_command_queue();
|
||||||
@ -3736,8 +3738,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
|
|||||||
busy_wait();
|
busy_wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
|
data.sync();
|
||||||
data.command_buffer_to_wait->flush();
|
|
||||||
|
|
||||||
// Gather data
|
// Gather data
|
||||||
for (const auto occlusion_id : data.indices)
|
for (const auto occlusion_id : data.indices)
|
||||||
|
@ -72,6 +72,7 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||||||
|
|
||||||
std::atomic_bool pending = { false };
|
std::atomic_bool pending = { false };
|
||||||
u64 eid_tag = 0;
|
u64 eid_tag = 0;
|
||||||
|
u64 reset_id = 0;
|
||||||
shared_mutex guard_mutex;
|
shared_mutex guard_mutex;
|
||||||
|
|
||||||
command_buffer_chunk() = default;
|
command_buffer_chunk() = default;
|
||||||
@ -101,6 +102,7 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||||||
if (pending)
|
if (pending)
|
||||||
wait(FRAME_PRESENT_TIMEOUT);
|
wait(FRAME_PRESENT_TIMEOUT);
|
||||||
|
|
||||||
|
++reset_id;
|
||||||
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,6 +166,27 @@ struct occlusion_data
|
|||||||
{
|
{
|
||||||
rsx::simple_array<u32> indices;
|
rsx::simple_array<u32> indices;
|
||||||
command_buffer_chunk* command_buffer_to_wait = nullptr;
|
command_buffer_chunk* command_buffer_to_wait = nullptr;
|
||||||
|
u64 command_buffer_sync_id = 0;
|
||||||
|
|
||||||
|
bool is_current(command_buffer_chunk* cmd) const
|
||||||
|
{
|
||||||
|
return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_sync_command_buffer(command_buffer_chunk* cmd)
|
||||||
|
{
|
||||||
|
command_buffer_to_wait = cmd;
|
||||||
|
command_buffer_sync_id = cmd->reset_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sync()
|
||||||
|
{
|
||||||
|
if (command_buffer_to_wait->reset_id == command_buffer_sync_id)
|
||||||
|
{
|
||||||
|
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
|
||||||
|
command_buffer_to_wait->flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
enum frame_context_state : u32
|
enum frame_context_state : u32
|
||||||
|
Loading…
x
Reference in New Issue
Block a user