From a6dfc3be2f20d383f960446b0ae3f07651a0ca4d Mon Sep 17 00:00:00 2001 From: Eladash Date: Sun, 9 Oct 2022 11:06:40 +0300 Subject: [PATCH] SPU: Enable the MFC list optimization for Atomic RSX FIFO --- rpcs3/Emu/Cell/SPUThread.cpp | 22 ++++++----------- rpcs3/Emu/RSX/RSXThread.h | 47 ++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e1e48139b5..c2615d7cd4 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2721,10 +2721,8 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) { optimization_compatible = 0; } - else if (optimization_compatible == MFC_PUT_CMD && (g_cfg.video.strict_rendering_mode || g_cfg.core.rsx_fifo_accuracy)) - { - optimization_compatible &= ~MFC_PUT_CMD; - } + + rsx::reservation_lock rsx_lock(0, 128, optimization_compatible == MFC_PUT_CMD && (g_cfg.video.strict_rendering_mode || (g_cfg.core.rsx_fifo_accuracy && !g_cfg.core.spu_accurate_dma))); constexpr u32 ts_mask = 0x7fff; @@ -3038,16 +3036,6 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) const u32 size = items[index].ts & ts_mask; const u32 addr = items[index].ea; - auto check_carry_16 = [](u16 addr, u16 size) - { -#ifdef _MSC_VER - u16 out; - return _addcarry_u16(0, addr, size - 1, &out); -#else - return ((addr + size - 1) >> 16) != 0; -#endif - }; - // Try to inline the transfer if (addr < RAW_SPU_BASE_ADDR && size && optimization_compatible == MFC_GET_CMD) { @@ -3120,8 +3108,10 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) arg_lsa += utils::align(size, 16); } // Avoid inlining huge transfers because it intentionally drops range lock unlock - else if (addr < RAW_SPU_BASE_ADDR && size - 1 <= 0x400 - 1 && optimization_compatible == MFC_PUT_CMD && !check_carry_16(static_cast(addr), static_cast(size))) + else if (addr < RAW_SPU_BASE_ADDR && size - 1 <= 0x400 - 1 && optimization_compatible == MFC_PUT_CMD && (addr % 0x10000 + (size - 1)) < 0x10000) { + rsx_lock.update_if_enabled(addr, size, range_lock); + if (!g_use_rtm) { vm::range_lock(range_lock, addr & -128, utils::align(addr + size, 128) - (addr & -128)); @@ -3198,6 +3188,8 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) else if (size) { range_lock->release(0); + rsx_lock.unlock(); + spu_log.trace("LIST: item=0x%016x, lsa=0x%05x", std::bit_cast>(items[index]), arg_lsa | (addr & 0xf)); transfer.eal = addr; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index fb5351f283..861447fedb 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -42,9 +42,11 @@ namespace rsx struct rsx_iomap_table { + static constexpr u32 c_lock_stride = 8096; + std::array, 4096> ea; std::array, 4096> io; - std::array rs; + std::array rs; rsx_iomap_table() noexcept; @@ -892,15 +894,18 @@ namespace rsx template class reservation_lock { - u32 addr = 0, length = 0; - bool locked = false; + u32 addr = 0; + u32 length = 0; inline void lock_range(u32 addr, u32 length) { + if (!get_current_renderer()->iomap_table.lock(addr, length, get_current_cpu_thread())) + { + length = 0; + } + this->addr = addr; this->length = length; - - this->locked = get_current_renderer()->iomap_table.lock(addr, length, get_current_cpu_thread()); } public: @@ -948,13 +953,41 @@ namespace rsx } } - ~reservation_lock() + // Very special utility for batched transfers (SPU related) + template + void update_if_enabled(u32 addr, u32 _length, const std::add_pointer_t& lock_release = std::add_pointer_t{}) { - if (locked) + // This check is not perfect but it covers the important cases fast (this check is only an optimization - forcing true disables it) + if (length && (this->addr / rsx_iomap_table::c_lock_stride != addr / rsx_iomap_table::c_lock_stride || (addr % rsx_iomap_table::c_lock_stride + _length) > rsx_iomap_table::c_lock_stride)) + { + if constexpr (!std::is_void_v) + { + // See SPUThread.cpp + lock_release->release(0); + } + + unlock(); + lock_range(addr, _length); + } + } + + void unlock(bool destructor = false) + { + if (length) { get_current_renderer()->iomap_table.unlock(addr, length); + + if (!destructor) + { + length = 0; + } } } + + ~reservation_lock() + { + unlock(true); + } }; class eng_lock