From 80530e8aef742399008d6e8ec3bed8ab398403fa Mon Sep 17 00:00:00 2001 From: Nekotekina <nekotekina@gmail.com> Date: Sat, 31 Oct 2020 13:33:27 +0300 Subject: [PATCH] vm: rename g_addr_lock to g_range_lock Reduce size to 29 bits and use 3 bits to communicate some information. This information can be used to to a very cheap lock-free access tests. --- rpcs3/Emu/Memory/vm.cpp | 58 ++++++++++++++++++++++------------- rpcs3/Emu/Memory/vm_locking.h | 25 ++++++++++++--- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 4f350885e0..e28053e676 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -63,8 +63,8 @@ namespace vm // Memory mutex acknowledgement thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr; - // Currently locked cache line - atomic_t<u64> g_addr_lock = 0; + // "Unique locked" range lock, as opposed to "shared" range locks from set + atomic_t<u64> g_range_lock = 0; // Memory mutex: passive locks std::array<atomic_t<cpu_thread*>, g_cfg.core.ppu_threads.max> g_locks{}; @@ -252,10 +252,16 @@ namespace vm } } - static void _lock_shareable_cache(u8 value, u32 addr, u32 size) + static void _lock_shareable_cache(u64 flags, u32 addr, u32 size) { + // Can't do 512 MiB or more at once + if (size >= 1024 * 1024 * 512) + { + fmt::throw_exception("Failed to lock range (flags=0x%x, addr=0x%x, size=0x%x)" HERE, flags >> 32, addr, size); + } + // Block new range locks - g_addr_lock = addr | u64{size} << 32; + g_range_lock = addr | u64{size} << 35 | flags; clear_range_locks(addr, size); } @@ -423,7 +429,7 @@ namespace vm addr = addr & 0xffff; } - g_addr_lock = addr | (u64{128} << 32); + g_range_lock = addr | (u64{128} << 35) | range_updated; const auto range = utils::address_range::start_length(addr, 128); @@ -473,7 +479,7 @@ namespace vm writer_lock::~writer_lock() { - g_addr_lock.release(0); + g_range_lock.release(0); g_mutex.unlock(); } @@ -595,7 +601,7 @@ namespace vm if (shm && shm->flags() != 0 && shm->info++) { // Memory mirror found, map its range as shareable - _lock_shareable_cache(1, addr, size); + _lock_shareable_cache(range_allocated, addr, size); for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++) { @@ -614,8 +620,8 @@ namespace vm { auto& [size2, ptr] = pp->second; - // Relock cache - _lock_shareable_cache(1, pp->first, size2); + // Relock cache (TODO: check page flags for this range) + _lock_shareable_cache(range_updated, pp->first, size2); for (u32 i = pp->first / 65536; i < pp->first / 65536 + size2 / 65536; i++) { @@ -627,7 +633,7 @@ namespace vm } // Unlock - g_addr_lock.release(0); + g_range_lock.release(0); } // Notify rsx that range has become valid @@ -697,6 +703,7 @@ namespace vm u8 start_value = 0xff; u8 shareable = 0; + u8 old_val = 0; for (u32 start = addr / 4096, end = start + size / 4096, i = start; i < end + 1; i++) { @@ -707,23 +714,30 @@ namespace vm new_val = g_pages[i].flags; new_val |= flags_set; new_val &= ~flags_clear; - - shareable = g_shareable[i / 16]; } - if (new_val != start_value || g_shareable[i / 16] != shareable) + if (new_val != start_value || g_shareable[i / 16] != shareable || g_pages[i].flags != old_val) { if (u32 page_size = (i - start) * 4096) { + u64 safe_bits = 0; + + if (old_val & new_val & page_readable) + safe_bits |= range_readable; + if (old_val & new_val & page_writable && safe_bits & range_readable) + safe_bits |= range_writable; + if (old_val & new_val & page_executable && safe_bits & range_readable) + safe_bits |= range_executable; + // Protect range locks from observing changes in memory protection if (shareable) { - // Unoptimized - _lock_shareable_cache(2, 0, 0x10000); + // TODO + _lock_shareable_cache(range_deallocated, 0, 0x10000); } else { - _lock_shareable_cache(2, start * 4096, page_size); + _lock_shareable_cache(safe_bits, start * 4096, page_size); } for (u32 j = start; j < i; j++) @@ -738,12 +752,14 @@ namespace vm } } + old_val = g_pages[i].flags; + shareable = g_shareable[i / 16]; start_value = new_val; start = i; } } - g_addr_lock.release(0); + g_range_lock.release(0); return true; } @@ -781,8 +797,8 @@ namespace vm if (shm && shm->flags() != 0 && (--shm->info || g_shareable[addr >> 16])) { - // Remove mirror from shareable cache - _lock_shareable_cache(3, 0, 0x10000); + // Remove mirror from shareable cache (TODO) + _lock_shareable_cache(range_updated, 0, 0x10000); for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++) { @@ -791,7 +807,7 @@ namespace vm } // Protect range locks from actual memory protection changes - _lock_shareable_cache(3, addr, size); + _lock_shareable_cache(range_deallocated, addr, size); for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++) { @@ -834,7 +850,7 @@ namespace vm } // Unlock - g_addr_lock.release(0); + g_range_lock.release(0); return size; } diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index 9815a69882..0d6bf71610 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -11,7 +11,24 @@ namespace vm extern thread_local atomic_t<cpu_thread*>* g_tls_locked; - extern atomic_t<u64> g_addr_lock; + enum range_lock_flags : u64 + { + /* flags (3 bits) */ + + range_readable = 1ull << 32, + range_writable = 2ull << 32, + range_executable = 4ull << 32, + range_all_mask = 7ull << 32, + + /* flag combinations with special meaning */ + + range_normal = 3ull << 32, // R+W + range_updated = 2ull << 32, // R+W as well but do not + range_allocated = 4ull << 32, // No safe access + range_deallocated = 0, // No safe access + }; + + extern atomic_t<u64> g_range_lock; extern atomic_t<u8> g_shareable[]; @@ -26,9 +43,9 @@ namespace vm // Lock memory range FORCE_INLINE void range_lock(atomic_t<u64>& res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size) { - const u64 lock_val = g_addr_lock.load(); + const u64 lock_val = g_range_lock.load(); const u64 lock_addr = static_cast<u32>(lock_val); // -> u64 - const u32 lock_size = static_cast<u32>(lock_val >> 32); + const u32 lock_size = static_cast<u32>(lock_val >> 35); u64 addr = begin; @@ -42,7 +59,7 @@ namespace vm // Optimistic locking range_lock->release(begin | (u64{size} << 32)); - const u64 new_lock_val = g_addr_lock.load(); + const u64 new_lock_val = g_range_lock.load(); if ((!new_lock_val || new_lock_val == lock_val) && !(res.load() & 127)) [[likely]] {