diff --git a/rpcs3/Emu/RSX/Capture/rsx_capture.cpp b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp index dbe1235716..46f4b4f7a9 100644 --- a/rpcs3/Emu/RSX/Capture/rsx_capture.cpp +++ b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp @@ -307,7 +307,7 @@ namespace rsx u8* pixels_src = (u8*)vm::base(src_address); const u32 src_size = in_pitch * (in_h - 1) + (in_w * in_bpp); - rsx->read_barrier(src_address, src_size); + rsx->read_barrier(src_address, src_size, true); frame_capture_data::memory_block_data block_data; block_data.data.resize(src_size); @@ -328,7 +328,7 @@ namespace rsx u32 src_dma = method_registers.nv0039_input_location(); u32 src_addr = get_address(src_offset, src_dma); - rsx->read_barrier(src_addr, in_pitch * (line_count - 1) + line_length); + rsx->read_barrier(src_addr, in_pitch * (line_count - 1) + line_length, true); const u8* src = vm::_ptr(src_addr); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fd4d993533..974c2d424e 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -296,7 +296,7 @@ namespace rsx if (conditional_render_enabled && conditional_render_test_address) { // Evaluate conditional rendering test - zcull_ctrl->read_barrier(this, conditional_render_test_address, 4); + zcull_ctrl->read_barrier(this, conditional_render_test_address, 4, reports::sync_no_notify); vm::ptr result = vm::cast(conditional_render_test_address); conditional_render_test_failed = (result->value == 0); conditional_render_test_address = 0; @@ -2348,6 +2348,11 @@ namespace rsx vm::_ref>(sink).store({ timestamp(), value, 0}); } + u32 thread::copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination) + { + return zcull_ctrl->copy_reports_to(memory_range_start, memory_range, destination); + } + void thread::sync() { zcull_ctrl->sync(this); @@ -2384,9 +2389,10 @@ namespace rsx return fifo_ctrl->last_cmd(); } - void thread::read_barrier(u32 memory_address, u32 memory_range) + flags32_t thread::read_barrier(u32 memory_address, u32 memory_range, bool unconditional) { - zcull_ctrl->read_barrier(this, memory_address, memory_range); + flags32_t zcull_flags = (unconditional)? reports::sync_none : reports::sync_defer_copy; + return zcull_ctrl->read_barrier(this, memory_address, memory_range, zcull_flags); } void thread::notify_zcull_info_changed() @@ -2938,6 +2944,16 @@ namespace rsx vm::_ref>(sink).store({ timestamp, value, 0}); } + void ZCULL_control::write(queued_report_write* writer, u64 timestamp, u32 value) + { + write(writer->sink, timestamp, writer->type, value); + + for (auto &addr : writer->sink_alias) + { + write(addr, timestamp, writer->type, value); + } + } + void ZCULL_control::sync(::rsx::thread* ptimer) { if (!m_pending_writes.empty()) @@ -2979,8 +2995,10 @@ namespace rsx } if (!writer.forwarder) - //No other queries in the chain, write result - write(writer.sink, ptimer->timestamp(), writer.type, result); + { + // No other queries in the chain, write result + write(&writer, ptimer->timestamp(), result); + } processed++; } @@ -2997,7 +3015,7 @@ namespace rsx if (remaining == 1) { - m_pending_writes.front() = m_pending_writes.back(); + m_pending_writes[0] = std::move(m_pending_writes.back()); m_pending_writes.resize(1); } else @@ -3156,10 +3174,12 @@ namespace rsx stat_tag_to_remove = writer.counter_tag; - //only zpass supported right now + // only zpass supported right now if (!writer.forwarder) - //No other queries in the chain, write result - write(writer.sink, ptimer->timestamp(), writer.type, result); + { + // No other queries in the chain, write result + write(&writer, ptimer->timestamp(), result); + } processed++; } @@ -3172,7 +3192,7 @@ namespace rsx auto remaining = m_pending_writes.size() - processed; if (remaining == 1) { - m_pending_writes.front() = m_pending_writes.back(); + m_pending_writes[0] = std::move(m_pending_writes.back()); m_pending_writes.resize(1); } else if (remaining) @@ -3189,10 +3209,10 @@ namespace rsx } } - void ZCULL_control::read_barrier(::rsx::thread* ptimer, u32 memory_address, u32 memory_range) + flags32_t ZCULL_control::read_barrier(::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags) { if (m_pending_writes.empty()) - return; + return result_none; const auto memory_end = memory_address + memory_range; u32 sync_address = 0; @@ -3208,11 +3228,21 @@ namespace rsx } } - if (sync_address) + if (!sync_address) + return result_none; + + if (!(flags & sync_defer_copy)) { - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(query)); + if (!(flags & sync_no_notify)) + { + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(query)); + } + update(ptimer, sync_address); + return result_none; } + + return result_zcull_intr; } occlusion_query_info* ZCULL_control::find_query(vm::addr_t sink_address) @@ -3225,5 +3255,24 @@ namespace rsx return nullptr; } + + u32 ZCULL_control::copy_reports_to(u32 start, u32 range, u32 dest) + { + u32 bytes_to_write = 0; + const auto memory_range = utils::address_range::start_length(start, range); + for (auto &writer : m_pending_writes) + { + if (!writer.sink) + break; + + if (!writer.forwarder && memory_range.overlaps(writer.sink)) + { + u32 address = (writer.sink - start) + dest; + writer.sink_alias.push_back(vm::cast(address)); + } + } + + return bytes_to_write; + } } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index a69753f820..f481f564bf 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -101,6 +101,13 @@ namespace rsx hint_zcull_sync = 2 }; + enum result_flags: u8 + { + result_none = 0, + result_error = 1, + result_zcull_intr = 2 + }; + u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size); u32 get_address(u32 offset, u32 location); @@ -345,11 +352,20 @@ namespace rsx u32 counter_tag; occlusion_query_info* query; queued_report_write* forwarder; - vm::addr_t sink; + + vm::addr_t sink; // Memory location of the report + std::vector sink_alias; // Aliased memory addresses u64 due_tsc; }; + enum sync_control + { + sync_none = 0, + sync_defer_copy = 1, // If set, return a zcull intr code instead of forcefully reading zcull data + sync_no_notify = 2 // If set, backend hint notifications will not be made + }; + struct ZCULL_control { // Delay before a report update operation is forced to retire @@ -380,6 +396,7 @@ namespace rsx void set_active(class ::rsx::thread* ptimer, bool state); void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value); + void write(queued_report_write* writer, u64 timestamp, u32 value); // Read current zcull statistics into the address provided void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type); @@ -394,7 +411,7 @@ namespace rsx void sync(class ::rsx::thread* ptimer); // Conditionally sync any pending writes if range overlaps - void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range); + flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags); // Call once every 'tick' to update, optional address provided to partially sync until address is processed void update(class ::rsx::thread* ptimer, u32 sync_address = 0); @@ -408,6 +425,9 @@ namespace rsx // Search for query synchronized at address occlusion_query_info* find_query(vm::addr_t sink_address); + // Copies queries in range rebased from source range to destination range + u32 copy_reports_to(u32 start, u32 range, u32 dest); + // Backend methods (optional, will return everything as always visible by default) virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {} virtual void end_occlusion_query(occlusion_query_info* /*query*/) {} @@ -687,10 +707,11 @@ namespace rsx void clear_zcull_stats(u32 type); void check_zcull_status(bool framebuffer_swap); void get_zcull_stats(u32 type, vm::addr_t sink); + u32 copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination); // sync void sync(); - void read_barrier(u32 memory_address, u32 memory_range); + flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); virtual void sync_hint(FIFO_hint /*hint*/, u64 /*arg*/) {} gsl::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 3f9f326b29..65375580f7 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -920,6 +920,7 @@ namespace rsx rsx::blit_engine::transfer_destination_format dst_color_format; u32 out_pitch = 0; u32 out_alignment = 64; + bool is_block_transfer = false; switch (method_registers.blit_engine_context_surface()) { @@ -930,6 +931,7 @@ namespace rsx dst_color_format = method_registers.blit_engine_nv3062_color_format(); out_pitch = method_registers.blit_engine_output_pitch_nv3062(); out_alignment = method_registers.blit_engine_output_alignment_nv3062(); + is_block_transfer = fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f); break; } case blit_engine::context_surface::swizzle2d: @@ -954,8 +956,7 @@ namespace rsx if (UNLIKELY(in_x == 1 || in_y == 1)) { - const bool is_graphics_op = scale_x < 0.f || scale_y < 0.f || in_bpp != out_bpp || !rsx::fcmp(scale_x, 1.f) || !rsx::fcmp(scale_y, 1.f); - if (!is_graphics_op) + if (is_block_transfer && in_bpp == out_bpp) { // No scaling factor, so size in src == size in dst // Check for texel wrapping where (offset + size) > size by 1 pixel @@ -968,6 +969,8 @@ namespace rsx // Graphics operation, ignore subpixel correction offsets if (in_x == 1) in_x = 0; if (in_y == 1) in_y = 0; + + is_block_transfer = false; } } @@ -977,12 +980,31 @@ namespace rsx const u32 src_address = get_address(src_offset, src_dma); const u32 dst_address = get_address(dst_offset, dst_dma); + const u32 src_line_length = (in_w * in_bpp); + if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch))) + { + const u32 nb_lines = std::min(clip_h, in_h); + const u32 data_length = nb_lines * src_line_length; + + if (const auto result = rsx->read_barrier(src_address, data_length, false); + result == rsx::result_zcull_intr) + { + if (rsx->copy_zcull_stats(src_address, data_length, dst_address) == data_length) + { + // All writes deferred + return; + } + } + } + else + { + const u32 data_length = in_pitch * (in_h - 1) + src_line_length; + rsx->read_barrier(src_address, dst_address, true); + } + u8* pixels_src = vm::_ptr(src_address + in_offset); u8* pixels_dst = vm::_ptr(dst_address + out_offset); - const auto read_address = get_address(src_offset, src_dma); - rsx->read_barrier(read_address, in_pitch * (in_h - 1) + (in_w * in_bpp)); - if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 && dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8) { @@ -1310,15 +1332,28 @@ namespace rsx u32 dst_offset = method_registers.nv0039_output_offset(); u32 dst_dma = method_registers.nv0039_output_location(); + const bool is_block_transfer = (in_pitch == out_pitch && out_pitch == line_length); const auto read_address = get_address(src_offset, src_dma); - rsx->read_barrier(read_address, in_pitch * (line_count - 1) + line_length); + const auto write_address = get_address(dst_offset, dst_dma); + const auto data_length = in_pitch * (line_count - 1) + line_length; - u8 *dst = vm::_ptr(get_address(dst_offset, dst_dma)); + if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer); + result == rsx::result_zcull_intr) + { + // This transfer overlaps will zcull data pool + if (rsx->copy_zcull_stats(read_address, data_length, write_address) == data_length) + { + // All writes deferred + return; + } + } + + u8 *dst = vm::_ptr(write_address); const u8 *src = vm::_ptr(read_address); const bool is_overlapping = dst_dma == src_dma && [&]() -> bool { - const u32 src_max = src_offset + (in_pitch * (line_count - 1) + line_length); + const u32 src_max = src_offset + data_length; const u32 dst_max = dst_offset + (out_pitch * (line_count - 1) + line_length); return (src_offset >= dst_offset && src_offset < dst_max) || (dst_offset >= src_offset && dst_offset < src_max); @@ -1326,7 +1361,7 @@ namespace rsx if (is_overlapping) { - if (in_pitch == out_pitch && out_pitch == line_length) + if (is_block_transfer) { std::memmove(dst, src, line_length * line_count); } @@ -1354,7 +1389,7 @@ namespace rsx } else { - if (in_pitch == out_pitch && out_pitch == line_length) + if (is_block_transfer) { std::memcpy(dst, src, line_length * line_count); }