diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 9b99cb2f80..26601ae1f4 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1807,6 +1807,9 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) // Align address: we do not need the lower 7 bits anymore addr &= -128; + // Wait for range locks to clear + vm::clear_range_locks(addr, 128); + // Cache line data auto& cline_data = vm::_ref(addr); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 7effe13e9c..9ae33cbe0e 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1985,7 +1985,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* } // Obtain range lock as normal store - vm::range_lock(range_lock, eal, size0); + vm::range_lock(res, range_lock, eal, size0); switch (size0) { @@ -2057,32 +2057,35 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* perf_meter<"DMA_PUT"_u64> perf2; + // TODO: split range-locked stores in cache lines for consistency + auto& res = vm::reservation_acquire(eal, args.size); + switch (u32 size = args.size) { case 1: { - vm::range_lock(range_lock, eal, 1); + vm::range_lock(res, range_lock, eal, 1); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; } case 2: { - vm::range_lock(range_lock, eal, 2); + vm::range_lock(res, range_lock, eal, 2); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; } case 4: { - vm::range_lock(range_lock, eal, 4); + vm::range_lock(res, range_lock, eal, 4); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; } case 8: { - vm::range_lock(range_lock, eal, 8); + vm::range_lock(res, range_lock, eal, 8); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; @@ -2091,7 +2094,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* { if (((eal & 127) + size) <= 128) { - vm::range_lock(range_lock, eal, size); + vm::range_lock(res, range_lock, eal, size); while (size) { @@ -2117,7 +2120,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* size -= size0; // Split locking + transfer in two parts (before 64K border, and after it) - vm::range_lock(range_lock, range_addr, size0); + vm::range_lock(res, range_lock, range_addr, size0); // Avoid unaligned stores in mov_rdata_avx if (reinterpret_cast(dst) & 0x10) @@ -2151,7 +2154,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* range_addr = nexta; } - vm::range_lock(range_lock, range_addr, range_end - range_addr); + vm::range_lock(res, range_lock, range_addr, range_end - range_addr); // Avoid unaligned stores in mov_rdata_avx if (reinterpret_cast(dst) & 0x10) @@ -2511,6 +2514,9 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) return false; } + // Wait for range locks to clear + vm::clear_range_locks(addr, 128); + vm::_ref>(addr) += 0; auto& super_data = *vm::get_super_ptr(addr); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 13bb3ee37d..1294542c99 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -226,11 +226,8 @@ namespace vm return result; } - static void _lock_shareable_cache(u8 value, u32 addr, u32 size) + void clear_range_locks(u32 addr, u32 size) { - // Block new range locks - g_addr_lock = addr | u64{size} << 32; - ASSUME(size); const auto range = utils::address_range::start_length(addr, size); @@ -259,6 +256,14 @@ namespace vm } } + static void _lock_shareable_cache(u8 value, u32 addr, u32 size) + { + // Block new range locks + g_addr_lock = addr | u64{size} << 32; + + clear_range_locks(addr, size); + } + void passive_lock(cpu_thread& cpu) { bool ok = true; diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index ae87c017ef..9815a69882 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -24,7 +24,7 @@ namespace vm void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size); // Lock memory range - FORCE_INLINE void range_lock(atomic_t* range_lock, u32 begin, u32 size) + FORCE_INLINE void range_lock(atomic_t& res, atomic_t* range_lock, u32 begin, u32 size) { const u64 lock_val = g_addr_lock.load(); const u64 lock_addr = static_cast(lock_val); // -> u64 @@ -37,14 +37,14 @@ namespace vm addr = addr & 0xffff; } - if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]] + if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !(res.load() & 127)) [[likely]] { // Optimistic locking range_lock->release(begin | (u64{size} << 32)); const u64 new_lock_val = g_addr_lock.load(); - if (!new_lock_val || new_lock_val == lock_val) [[likely]] + if ((!new_lock_val || new_lock_val == lock_val) && !(res.load() & 127)) [[likely]] { return; } @@ -56,6 +56,9 @@ namespace vm range_lock_internal(range_lock, begin, size); } + // Wait for all range locks to release in specified range + void clear_range_locks(u32 addr, u32 size); + // Release it void free_range_lock(atomic_t*) noexcept;