From 5be7f08965d5125463b3270e4ce20d84ef0058c4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 30 Nov 2019 15:44:47 +0300 Subject: [PATCH] rsx: Restructure ZCULL report retirement - Prefer lazy retire model. Sync commands are sent out and the reports will be retired when they are available without forcing. - To make this work with conditional rendering, hardware support is required where the backend will automatically determine visibility by itself during rendering. --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 +- rpcs3/Emu/RSX/RSXThread.cpp | 378 +++++++++++++++++++++------ rpcs3/Emu/RSX/RSXThread.h | 103 ++++++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 48 ++-- rpcs3/Emu/RSX/VK/VKGSRender.h | 4 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 1 - rpcs3/Emu/RSX/rsx_methods.cpp | 10 +- 7 files changed, 403 insertions(+), 147 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 5e19ae3ef6..51ea91c676 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -176,8 +176,7 @@ void GLGSRender::begin() { rsx::thread::begin(); - if (skip_current_frame || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || cond_render_ctrl.disable_rendering()) return; init_buffers(rsx::framebuffer_creation_context::context_draw); @@ -187,8 +186,7 @@ void GLGSRender::end() { m_profiler.start(); - if (skip_current_frame || !framebuffer_status_valid || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || !framebuffer_status_valid || cond_render_ctrl.disable_rendering()) { execute_nop_draw(); rsx::thread::end(); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index b8ebba05c1..de4dc6656f 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -294,13 +294,33 @@ namespace rsx void thread::begin() { - if (conditional_render_enabled && conditional_render_test_address) + if (cond_render_ctrl.hw_cond_active) { - // Evaluate conditional rendering test - zcull_ctrl->read_barrier(this, conditional_render_test_address, 4, reports::sync_no_notify); - vm::ptr result = vm::cast(conditional_render_test_address); - conditional_render_test_failed = (result->value == 0); - conditional_render_test_address = 0; + if (!cond_render_ctrl.eval_pending()) + { + // End conditional rendering if still active + end_conditional_rendering(); + } + + // If hw cond render is enabled and evalutation is still pending, do nothing + } + else if (cond_render_ctrl.eval_pending()) + { + // Evaluate conditional rendering test or enable hw cond render until results are available + if (backend_config.supports_hw_conditional_render) + { + // In this mode, it is possible to skip the cond render while the backend is still processing data. + // The backend guarantees that any draw calls emitted during this time will NOT generate any ROP writes + verify(HERE), !cond_render_ctrl.hw_cond_active; + + // Pending evaluation, use hardware test + begin_conditional_rendering(); + } + else + { + zcull_ctrl->read_barrier(this, cond_render_ctrl.eval_address, 4, reports::sync_no_notify); + cond_render_ctrl.eval_result(this); + } } if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty) @@ -2134,6 +2154,45 @@ namespace rsx return zcull_ctrl->copy_reports_to(memory_range_start, memory_range, destination); } + void thread::enable_conditional_rendering(vm::addr_t ref) + { + cond_render_ctrl.enable_conditional_render(this, ref); + + auto result = zcull_ctrl->find_query(ref); + if (result.found) + { + if (result.query) + { + cond_render_ctrl.set_sync_tag(result.query->sync_tag); + sync_hint(FIFO_hint::hint_conditional_render_eval, result.query); + } + else + { + bool failed = (result.raw_zpass_result == 0); + cond_render_ctrl.set_eval_result(this, failed); + } + } + else + { + cond_render_ctrl.eval_result(this); + } + } + + void thread::disable_conditional_rendering() + { + cond_render_ctrl.disable_conditional_render(this); + } + + void thread::begin_conditional_rendering() + { + cond_render_ctrl.hw_cond_active = true; + } + + void thread::end_conditional_rendering() + { + cond_render_ctrl.hw_cond_active = false; + } + void thread::sync() { zcull_ctrl->sync(this); @@ -2149,6 +2208,11 @@ namespace rsx //verify (HERE), async_tasks_pending.load() == 0; } + void thread::sync_hint(FIFO_hint /*hint*/, void* /*args*/) + { + zcull_ctrl->on_sync_hint(); + } + void thread::flush_fifo() { // Make sure GET value is exposed before sync points @@ -2369,7 +2433,7 @@ namespace rsx } // Reset zcull ctrl - zcull_ctrl->set_active(this, false); + zcull_ctrl->set_active(this, false, true); zcull_ctrl->clear(this); if (zcull_ctrl->has_pending()) @@ -2525,18 +2589,29 @@ namespace rsx namespace reports { - void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state) + ZCULL_control::ZCULL_control() + { + for (auto& query : m_occlusion_query_data) + { + m_free_occlusion_pool.push(&query); + } + } + + ZCULL_control::~ZCULL_control() + {} + + void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue) { if (state != enabled) { enabled = state; if (active && !enabled) - set_active(ptimer, false); + set_active(ptimer, false, flush_queue); } } - void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state) + void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue) { if (state != active) { @@ -2556,6 +2631,8 @@ namespace rsx end_occlusion_query(m_current_task); m_current_task->active = false; m_current_task->pending = true; + m_current_task->sync_tag = ++m_timer; + m_current_task->timestamp = m_tsc; m_pending_writes.push_back({}); m_pending_writes.back().query = m_current_task; @@ -2564,10 +2641,12 @@ namespace rsx else { discard_occlusion_query(m_current_task); + free_query(m_current_task); m_current_task->active = false; } m_current_task = nullptr; + update(ptimer, 0u, flush_queue); } } } @@ -2582,6 +2661,8 @@ namespace rsx m_current_task->active = false; m_current_task->pending = true; + m_current_task->timestamp = m_tsc; + m_current_task->sync_tag = ++m_timer; m_pending_writes.back().query = m_current_task; allocate_new_query(ptimer); @@ -2589,8 +2670,16 @@ namespace rsx } else { - //Spam; send null query down the pipeline to copy the last result - //Might be used to capture a timestamp (verify) + // Spam; send null query down the pipeline to copy the last result + // Might be used to capture a timestamp (verify) + + if (m_pending_writes.empty()) + { + // No need to queue this if there is no pending request in the pipeline anyway + write(sink, ptimer->timestamp(), type, m_statistics_map[m_statistics_tag_id]); + return; + } + m_pending_writes.push_back({}); } @@ -2600,13 +2689,15 @@ namespace rsx if (!It->sink) { It->counter_tag = m_statistics_tag_id; - It->due_tsc = get_system_time() + m_cycles_delay; It->sink = sink; It->type = type; if (forwarder != &(*It)) { - //Not the last one in the chain, forward the writing operation to the last writer + // Not the last one in the chain, forward the writing operation to the last writer + // Usually comes from truncated queries caused by disabling the testing + verify(HERE), It->query; + It->forwarder = forwarder; It->query->owned = true; } @@ -2625,53 +2716,46 @@ namespace rsx int retries = 0; while (true) { - for (u32 n = 0; n < occlusion_query_count; ++n) + if (!m_free_occlusion_pool.empty()) { - if (m_occlusion_query_data[n].pending || m_occlusion_query_data[n].active) - continue; + m_current_task = m_free_occlusion_pool.top(); + m_free_occlusion_pool.pop(); - m_current_task = &m_occlusion_query_data[n]; m_current_task->num_draws = 0; m_current_task->result = 0; - m_current_task->sync_timestamp = 0; m_current_task->active = true; m_current_task->owned = false; - m_current_task->hint = false; + m_current_task->sync_tag = 0; + m_current_task->timestamp = 0; return; } if (retries > 0) { - LOG_ERROR(RSX, "ZCULL report queue is overflowing!!"); - m_statistics_map[m_statistics_tag_id] = 1; - - verify(HERE), m_pending_writes.front().sink == 0; - m_pending_writes.clear(); - - for (auto &query : m_occlusion_query_data) - { - discard_occlusion_query(&query); - query.pending = false; - } - - m_current_task = &m_occlusion_query_data[0]; - m_current_task->num_draws = 0; - m_current_task->result = 0; - m_current_task->sync_timestamp = 0; - m_current_task->active = true; - m_current_task->owned = false; - m_current_task->hint = false; - return; + fmt::throw_exception("Allocation failed!"); } - //All slots are occupied, try to pop the earliest entry - m_tsc += max_zcull_delay_us; - update(ptimer); + // All slots are occupied, try to pop the earliest entry + + if (!m_pending_writes.front().query) + { + // If this happens, the assert above will fire. There should never be a queue header with no work to be done + LOG_ERROR(RSX, "Close to our death."); + } + + m_next_tsc = 0; + update(ptimer, m_pending_writes.front().sink); retries++; } } + void ZCULL_control::free_query(occlusion_query_info* query) + { + query->pending = false; + m_free_occlusion_pool.push(query); + } + void ZCULL_control::clear(class ::rsx::thread* ptimer) { if (!m_pending_writes.empty()) @@ -2683,7 +2767,7 @@ namespace rsx if (!It->sink) { discard_occlusion_query(It->query); - It->query->pending = false; + free_query(It->query); valid_size--; ptimer->async_tasks_pending--; continue; @@ -2703,8 +2787,11 @@ namespace rsx { if (m_current_task) m_current_task->num_draws++; + } - m_cycles_delay = max_zcull_delay_us; + void ZCULL_control::on_sync_hint() + { + m_sync_tag = ++m_timer; } void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value) @@ -2745,6 +2832,21 @@ namespace rsx { if (!m_pending_writes.empty()) { + // Quick reverse scan to push commands ahead of time + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It) + { + if (It->query && It->query->num_draws) + { + if (It->query->sync_tag > m_sync_tag) + { + // LOG_TRACE(RSX, "[Performance warning] Query hint emit during sync command."); + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query); + } + + break; + } + } + u32 processed = 0; const bool has_unclaimed = (m_pending_writes.back().sink == 0); @@ -2778,13 +2880,19 @@ namespace rsx discard_occlusion_query(query); } - query->pending = false; + free_query(query); } if (!writer.forwarder) { // No other queries in the chain, write result write(&writer, ptimer->timestamp(), result); + + if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag) + { + const bool eval_failed = (result == 0); + ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed); + } } processed++; @@ -2824,19 +2932,9 @@ namespace rsx //Decrement jobs counter ptimer->async_tasks_pending -= processed; } - - if (ptimer->conditional_render_enabled && ptimer->conditional_render_test_address) - { - ptimer->conditional_render_test_failed = vm::read32(ptimer->conditional_render_test_address) == 0; - ptimer->conditional_render_test_address = 0; - } - - //Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available - m_cycles_delay = min_zcull_delay_us; - m_tsc = std::max(m_tsc, get_system_time()); } - void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address) + void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address, bool hint) { if (m_pending_writes.empty()) { @@ -2850,26 +2948,51 @@ namespace rsx return; } - // Update timestamp and proceed with processing only if there is work to be done - m_tsc = std::max(m_tsc, get_system_time()); - if (!sync_address) { - if (m_tsc < front.due_tsc) + if (hint || ptimer->async_tasks_pending >= max_safe_queue_depth) { - if (front.query && !front.query->hint && (front.due_tsc - m_tsc) <= m_backend_warn_threshold) + verify(HERE), !active || !hint; + + // Prepare the whole queue for reading. This happens when zcull activity is disabled or queue is too long + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It) { - if (front.type == CELL_GCM_ZPASS_PIXEL_CNT || front.type == CELL_GCM_ZCULL_STATS3) + if (It->query) + { + if (It->query->num_draws && It->query->sync_tag > m_sync_tag) + { + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query); + verify(HERE), It->query->sync_tag < m_sync_tag; + } + + break; + } + } + } + + if (m_tsc = get_system_time(); m_tsc < m_next_tsc) + { + return; + } + else + { + // Schedule ahead + m_next_tsc = m_tsc + min_zcull_tick_us; + +#if 0 + // Schedule a queue flush if needed + if (front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag) + { + const auto elapsed = m_tsc - front.query->timestamp; + if (elapsed > max_zcull_delay_us) { - // Imminent read ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(front.query)); + verify(HERE), front.query->sync_tag < m_sync_tag; } - front.query->hint = true; + return; } - - // Avoid spamming backend with report status updates - return; +#endif } } @@ -2904,7 +3027,7 @@ namespace rsx verify(HERE), query->pending; const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3); - if (force_read || writer.due_tsc < m_tsc) + if (force_read) { if (implemented && !result && query->num_draws) { @@ -2938,13 +3061,6 @@ namespace rsx } else { - if (!query->hint && (writer.due_tsc - m_tsc) <= m_backend_warn_threshold) - { - // Imminent read - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(query)); - query->hint = true; - } - //Too early; abort break; } @@ -2956,7 +3072,7 @@ namespace rsx } } - query->pending = false; + free_query(query); } stat_tag_to_remove = writer.counter_tag; @@ -2966,6 +3082,12 @@ namespace rsx { // No other queries in the chain, write result write(&writer, ptimer->timestamp(), result); + + if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag) + { + const bool eval_failed = (result == 0); + ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed); + } } processed++; @@ -3039,7 +3161,11 @@ namespace rsx { if (!(flags & sync_no_notify)) { - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(query)); + if (UNLIKELY(query->sync_tag > m_sync_tag)) + { + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query); + verify(HERE), m_sync_tag > query->sync_tag; + } } update(ptimer, sync_address); @@ -3049,15 +3175,36 @@ namespace rsx return result_zcull_intr; } - occlusion_query_info* ZCULL_control::find_query(vm::addr_t sink_address) + query_search_result ZCULL_control::find_query(vm::addr_t sink_address) { - for (auto &writer : m_pending_writes) + u32 stat_id = 0; + for (auto It = m_pending_writes.crbegin(); It != m_pending_writes.crend(); ++It) { - if (writer.sink == sink_address) - return writer.query; + if (UNLIKELY(stat_id)) + { + if (It->counter_tag != stat_id) + { + // Zcull stats were cleared between this query and the required one + return { true, 0, nullptr }; + } + + if (It->query) + { + return { true, 0, It->query }; + } + } + else if (It->sink == sink_address) + { + if (It->query) + { + return { true, 0, It->query }; + } + + stat_id = It->counter_tag; + } } - return nullptr; + return {}; } u32 ZCULL_control::copy_reports_to(u32 start, u32 range, u32 dest) @@ -3078,5 +3225,70 @@ namespace rsx return bytes_to_write; } + + + // Conditional rendering helpers + bool conditional_render_eval::disable_rendering() const + { + return (enabled && eval_failed); + } + + bool conditional_render_eval::eval_pending() const + { + return (enabled && eval_address); + } + + void conditional_render_eval::enable_conditional_render(::rsx::thread* pthr, u32 address) + { + if (hw_cond_active) + { + verify(HERE), enabled; + pthr->end_conditional_rendering(); + } + + enabled = true; + eval_failed = false; + eval_address = address; + sync_tag = 0; + } + + void conditional_render_eval::disable_conditional_render(::rsx::thread* pthr) + { + if (hw_cond_active) + { + verify(HERE), enabled; + pthr->end_conditional_rendering(); + } + + enabled = false; + eval_failed = false; + eval_address = 0; + sync_tag = 0; + } + + void conditional_render_eval::set_sync_tag(u64 value) + { + sync_tag = value; + } + + void conditional_render_eval::set_eval_result(::rsx::thread* pthr, bool failed) + { + if (hw_cond_active) + { + verify(HERE), enabled; + pthr->end_conditional_rendering(); + } + + eval_failed = failed; + eval_address = 0; + sync_tag = 0; + } + + void conditional_render_eval::eval_result(::rsx::thread* pthr) + { + vm::ptr result = vm::cast(eval_address); + const bool failed = (result->value == 0); + set_eval_result(pthr, failed); + } } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index cd05907843..c55dc7ce09 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -2,6 +2,8 @@ #include #include +#include + #include "GCM.h" #include "rsx_cache.h" #include "RSXFIFO.h" @@ -338,12 +340,11 @@ namespace rsx u32 driver_handle; u32 result; u32 num_draws; - bool hint; + u64 sync_tag; + u64 timestamp; bool pending; bool active; bool owned; - - u64 sync_timestamp; }; struct queued_report_write @@ -355,8 +356,13 @@ namespace rsx vm::addr_t sink; // Memory location of the report std::vector sink_alias; // Aliased memory addresses + }; - u64 due_tsc; + struct query_search_result + { + bool found; + u32 raw_zpass_result; + occlusion_query_info* query; }; enum sync_control @@ -369,31 +375,39 @@ namespace rsx struct ZCULL_control { // Delay before a report update operation is forced to retire - const u32 max_zcull_delay_us = 500; + const u32 max_zcull_delay_us = 4000; const u32 min_zcull_delay_us = 50; + const u32 min_zcull_tick_us = 500; // Number of occlusion query slots available. Real hardware actually has far fewer units before choking - const u32 occlusion_query_count = 128; + const u32 occlusion_query_count = 1024; + const u32 max_safe_queue_depth = 892; bool active = false; bool enabled = false; - std::array m_occlusion_query_data = {}; + std::array m_occlusion_query_data = {}; + std::stack m_free_occlusion_pool; occlusion_query_info* m_current_task = nullptr; u32 m_statistics_tag_id = 0; + + // Scheduling clock. Granunlarity is min_zcull_tick value. u64 m_tsc = 0; - u32 m_cycles_delay = max_zcull_delay_us; - u32 m_backend_warn_threshold = max_zcull_delay_us / 2; + u64 m_next_tsc = 0; + + // Incremental tag used for tracking sync events. Hardware clock resolution is too low for the job. + u64 m_sync_tag = 0; + u64 m_timer = 0; std::vector m_pending_writes; std::unordered_map m_statistics_map; - ZCULL_control() = default; - ~ZCULL_control() = default; + ZCULL_control(); + ~ZCULL_control(); - void set_enabled(class ::rsx::thread* ptimer, bool state); - void set_active(class ::rsx::thread* ptimer, bool state); + void set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue = false); + void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue = false); void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value); void write(queued_report_write* writer, u64 timestamp, u32 value); @@ -404,6 +418,9 @@ namespace rsx // Sets up a new query slot and sets it to the current task void allocate_new_query(class ::rsx::thread* ptimer); + // Free a query slot in use + void free_query(occlusion_query_info* query); + // Clears current stat block and increments stat_tag_id void clear(class ::rsx::thread* ptimer); @@ -414,16 +431,19 @@ namespace rsx flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags); // Call once every 'tick' to update, optional address provided to partially sync until address is processed - void update(class ::rsx::thread* ptimer, u32 sync_address = 0); + void update(class ::rsx::thread* ptimer, u32 sync_address = 0, bool hint = false); // Draw call notification void on_draw(); + // Sync hint notification + void on_sync_hint(); + // Check for pending writes bool has_pending() const { return !m_pending_writes.empty(); } // Search for query synchronized at address - occlusion_query_info* find_query(vm::addr_t sink_address); + query_search_result find_query(vm::addr_t sink_address); // Copies queries in range rebased from source range to destination range u32 copy_reports_to(u32 start, u32 range, u32 dest); @@ -435,6 +455,38 @@ namespace rsx virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; } virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {} }; + + // Helper class for conditional rendering + struct conditional_render_eval + { + bool enabled = false; + bool eval_failed = false; + bool hw_cond_active = false; + bool reserved = false; + u32 eval_address = 0; + u64 sync_tag = 0; + + // Returns true if rendering is disabled as per conditional render test + bool disable_rendering() const; + + // Returns true if a conditional render is active but not yet evaluated + bool eval_pending() const; + + // Enable conditional rendering + void enable_conditional_render(thread* pthr, u32 address); + + // Disable conditional rendering + void disable_conditional_render(thread* pthr); + + // Sets up the zcull sync tag + void set_sync_tag(u64 value); + + // Sets evaluation result. Result is true if conditional evaluation failed + void set_eval_result(thread* pthr, bool failed); + + // Evaluates the condition by accessing memory directly + void eval_result(thread* pthr); + }; } struct frame_statistics_t @@ -489,10 +541,11 @@ namespace rsx struct backend_configuration { - bool supports_multidraw; // Draw call batching - bool supports_hw_a2c; // Alpha to coverage - bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour - bool supports_hw_a2one; // Alpha to one + bool supports_multidraw; // Draw call batching + bool supports_hw_a2c; // Alpha to coverage + bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour + bool supports_hw_a2one; // Alpha to one + bool supports_hw_conditional_render; // Conditional render }; struct sampled_image_descriptor_base; @@ -655,13 +708,12 @@ namespace rsx atomic_t async_tasks_pending{ 0 }; - u32 conditional_render_test_address = 0; - bool conditional_render_test_failed = false; - bool conditional_render_enabled = false; bool zcull_stats_enabled = false; bool zcull_rendering_enabled = false; bool zcull_pixel_cnt_enabled = false; + reports::conditional_render_eval cond_render_ctrl; + void operator()(); virtual u64 get_cycles() = 0; virtual ~thread(); @@ -708,10 +760,15 @@ namespace rsx void get_zcull_stats(u32 type, vm::addr_t sink); u32 copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination); + void enable_conditional_rendering(vm::addr_t ref); + void disable_conditional_rendering(); + virtual void begin_conditional_rendering(); + virtual void end_conditional_rendering(); + // sync void sync(); flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); - virtual void sync_hint(FIFO_hint /*hint*/, u64 /*arg*/) {} + virtual void sync_hint(FIFO_hint hint, void* args); gsl::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index bcad7d031e..23a45e2bf2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -543,6 +543,9 @@ VKGSRender::VKGSRender() : GSRender() // NOTE: On NVIDIA cards going back decades (including the PS3) there is a slight normalization inaccuracy in compressed formats. // Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data. backend_config.supports_hw_renormalization = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA); + + // Stub + backend_config.supports_hw_conditional_render = true; } VKGSRender::~VKGSRender() @@ -935,8 +938,7 @@ void VKGSRender::begin() { rsx::thread::begin(); - if (skip_current_frame || swapchain_unavailable || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering()) return; init_buffers(rsx::framebuffer_creation_context::context_draw); @@ -1202,8 +1204,7 @@ void VKGSRender::emit_geometry(u32 sub_index) void VKGSRender::end() { - if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering()) { execute_nop_draw(); rsx::thread::end(); @@ -1737,8 +1738,9 @@ void VKGSRender::end() u32 occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) { - m_tsc += 100; - update(this); + // Force flush + LOG_ERROR(RSX, "[Performance Warning] Out of free occlusion slots. Forcing hard sync."); + ZCULL_control::sync(this); occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) @@ -2181,8 +2183,11 @@ void VKGSRender::flush_command_queue(bool hard_sync) open_command_buffer(); } -void VKGSRender::sync_hint(rsx::FIFO_hint hint, u64 arg) +void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) { + verify(HERE), args; + rsx::thread::sync_hint(hint, args); + // Occlusion test result evaluation is coming up, avoid a hard sync switch (hint) { @@ -2197,15 +2202,14 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, u64 arg) return; // Check if the required report is synced to this CB - if (auto occlusion_info = zcull_ctrl->find_query(vm::cast(arg))) + auto occlusion_info = static_cast(args); + auto& data = m_occlusion_map[occlusion_info->driver_handle]; + + if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) { - auto& data = m_occlusion_map[occlusion_info->driver_handle]; - if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) - { - // Confirmed hard sync coming up, post a sync request - m_flush_requests.post(false); - m_flush_requests.remove_one(); - } + // Confirmed hard sync coming up, post a sync request + m_flush_requests.post(false); + m_flush_requests.remove_one(); } break; @@ -2215,7 +2219,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, u64 arg) if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)) return; - auto occlusion_info = reinterpret_cast(arg); + auto occlusion_info = static_cast(args); auto& data = m_occlusion_map[occlusion_info->driver_handle]; if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) @@ -3666,17 +3670,9 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* { m_flush_requests.clear_pending_flag(); } - } - // Fast wait. Avoids heavyweight routines - while (!data.command_buffer_to_wait->poke()) - { - _mm_pause(); - - if (Emu.IsStopped()) - { - return; - } + LOG_ERROR(RSX, "[Performance warning] Unexpected ZCULL read caused a hard sync"); + busy_wait(); } // Gather data diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index ee010ec91a..1e0e36f24b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -44,7 +44,7 @@ namespace vk #define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 16 #define VK_INDEX_RING_BUFFER_SIZE_M 16 -#define VK_MAX_ASYNC_CB_COUNT 64 +#define VK_MAX_ASYNC_CB_COUNT 256 #define VK_MAX_ASYNC_FRAMES 2 using rsx::flags32_t; @@ -465,7 +465,7 @@ public: void set_scissor(bool clip_viewport); void bind_viewport(); - void sync_hint(rsx::FIFO_hint hint, u64 arg) override; + void sync_hint(rsx::FIFO_hint hint, void* args) override; void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 332ba362f4..8fdfa4f50c 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -6,7 +6,6 @@ #include "VKHelpers.h" #include "../Common/GLSLCommon.h" -#pragma optimize("", off) std::string VKVertexDecompilerThread::getFloatTypeName(size_t elementCount) { diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index b0893c08a2..757f7c4996 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -580,14 +580,11 @@ namespace rsx switch (mode) { case 1: - rsx->conditional_render_enabled = false; - rsx->conditional_render_test_failed = false; + rsx->disable_conditional_rendering(); return; case 2: - rsx->conditional_render_enabled = true; break; default: - rsx->conditional_render_enabled = false; LOG_ERROR(RSX, "Unknown render mode %d", mode); return; } @@ -597,15 +594,12 @@ namespace rsx if (!address_ptr) { - rsx->conditional_render_test_failed = false; LOG_ERROR(RSX, "Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg); return; } // Defer conditional render evaluation - rsx->sync_hint(FIFO_hint::hint_conditional_render_eval, address_ptr); - rsx->conditional_render_test_address = address_ptr; - rsx->conditional_render_test_failed = false; + rsx->enable_conditional_rendering(address_ptr); } void set_zcull_render_enable(thread* rsx, u32, u32 arg)