From 80530e8aef742399008d6e8ec3bed8ab398403fa Mon Sep 17 00:00:00 2001
From: Nekotekina <nekotekina@gmail.com>
Date: Sat, 31 Oct 2020 13:33:27 +0300
Subject: [PATCH] vm: rename g_addr_lock to g_range_lock

Reduce size to 29 bits and use 3 bits to communicate some information.
This information can be used to to a very cheap lock-free access tests.
---
 rpcs3/Emu/Memory/vm.cpp       | 58 ++++++++++++++++++++++-------------
 rpcs3/Emu/Memory/vm_locking.h | 25 ++++++++++++---
 2 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp
index 4f350885e0..e28053e676 100644
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@@ -63,8 +63,8 @@ namespace vm
 	// Memory mutex acknowledgement
 	thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr;
 
-	// Currently locked cache line
-	atomic_t<u64> g_addr_lock = 0;
+	// "Unique locked" range lock, as opposed to "shared" range locks from set
+	atomic_t<u64> g_range_lock = 0;
 
 	// Memory mutex: passive locks
 	std::array<atomic_t<cpu_thread*>, g_cfg.core.ppu_threads.max> g_locks{};
@@ -252,10 +252,16 @@ namespace vm
 		}
 	}
 
-	static void _lock_shareable_cache(u8 value, u32 addr, u32 size)
+	static void _lock_shareable_cache(u64 flags, u32 addr, u32 size)
 	{
+		// Can't do 512 MiB or more at once
+		if (size >= 1024 * 1024 * 512)
+		{
+			fmt::throw_exception("Failed to lock range (flags=0x%x, addr=0x%x, size=0x%x)" HERE, flags >> 32, addr, size);
+		}
+
 		// Block new range locks
-		g_addr_lock = addr | u64{size} << 32;
+		g_range_lock = addr | u64{size} << 35 | flags;
 
 		clear_range_locks(addr, size);
 	}
@@ -423,7 +429,7 @@ namespace vm
 				addr = addr & 0xffff;
 			}
 
-			g_addr_lock = addr | (u64{128} << 32);
+			g_range_lock = addr | (u64{128} << 35) | range_updated;
 
 			const auto range = utils::address_range::start_length(addr, 128);
 
@@ -473,7 +479,7 @@ namespace vm
 
 	writer_lock::~writer_lock()
 	{
-		g_addr_lock.release(0);
+		g_range_lock.release(0);
 		g_mutex.unlock();
 	}
 
@@ -595,7 +601,7 @@ namespace vm
 		if (shm && shm->flags() != 0 && shm->info++)
 		{
 			// Memory mirror found, map its range as shareable
-			_lock_shareable_cache(1, addr, size);
+			_lock_shareable_cache(range_allocated, addr, size);
 
 			for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
 			{
@@ -614,8 +620,8 @@ namespace vm
 						{
 							auto& [size2, ptr] = pp->second;
 
-							// Relock cache
-							_lock_shareable_cache(1, pp->first, size2);
+							// Relock cache (TODO: check page flags for this range)
+							_lock_shareable_cache(range_updated, pp->first, size2);
 
 							for (u32 i = pp->first / 65536; i < pp->first / 65536 + size2 / 65536; i++)
 							{
@@ -627,7 +633,7 @@ namespace vm
 			}
 
 			// Unlock
-			g_addr_lock.release(0);
+			g_range_lock.release(0);
 		}
 
 		// Notify rsx that range has become valid
@@ -697,6 +703,7 @@ namespace vm
 
 		u8 start_value = 0xff;
 		u8 shareable = 0;
+		u8 old_val = 0;
 
 		for (u32 start = addr / 4096, end = start + size / 4096, i = start; i < end + 1; i++)
 		{
@@ -707,23 +714,30 @@ namespace vm
 				new_val = g_pages[i].flags;
 				new_val |= flags_set;
 				new_val &= ~flags_clear;
-
-				shareable = g_shareable[i / 16];
 			}
 
-			if (new_val != start_value || g_shareable[i / 16] != shareable)
+			if (new_val != start_value || g_shareable[i / 16] != shareable || g_pages[i].flags != old_val)
 			{
 				if (u32 page_size = (i - start) * 4096)
 				{
+					u64 safe_bits = 0;
+
+					if (old_val & new_val & page_readable)
+						safe_bits |= range_readable;
+					if (old_val & new_val & page_writable && safe_bits & range_readable)
+						safe_bits |= range_writable;
+					if (old_val & new_val & page_executable && safe_bits & range_readable)
+						safe_bits |= range_executable;
+
 					// Protect range locks from observing changes in memory protection
 					if (shareable)
 					{
-						// Unoptimized
-						_lock_shareable_cache(2, 0, 0x10000);
+						// TODO
+						_lock_shareable_cache(range_deallocated, 0, 0x10000);
 					}
 					else
 					{
-						_lock_shareable_cache(2, start * 4096, page_size);
+						_lock_shareable_cache(safe_bits, start * 4096, page_size);
 					}
 
 					for (u32 j = start; j < i; j++)
@@ -738,12 +752,14 @@ namespace vm
 					}
 				}
 
+				old_val = g_pages[i].flags;
+				shareable = g_shareable[i / 16];
 				start_value = new_val;
 				start = i;
 			}
 		}
 
-		g_addr_lock.release(0);
+		g_range_lock.release(0);
 
 		return true;
 	}
@@ -781,8 +797,8 @@ namespace vm
 
 		if (shm && shm->flags() != 0 && (--shm->info || g_shareable[addr >> 16]))
 		{
-			// Remove mirror from shareable cache
-			_lock_shareable_cache(3, 0, 0x10000);
+			// Remove mirror from shareable cache (TODO)
+			_lock_shareable_cache(range_updated, 0, 0x10000);
 
 			for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
 			{
@@ -791,7 +807,7 @@ namespace vm
 		}
 
 		// Protect range locks from actual memory protection changes
-		_lock_shareable_cache(3, addr, size);
+		_lock_shareable_cache(range_deallocated, addr, size);
 
 		for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
 		{
@@ -834,7 +850,7 @@ namespace vm
 		}
 
 		// Unlock
-		g_addr_lock.release(0);
+		g_range_lock.release(0);
 
 		return size;
 	}
diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h
index 9815a69882..0d6bf71610 100644
--- a/rpcs3/Emu/Memory/vm_locking.h
+++ b/rpcs3/Emu/Memory/vm_locking.h
@@ -11,7 +11,24 @@ namespace vm
 
 	extern thread_local atomic_t<cpu_thread*>* g_tls_locked;
 
-	extern atomic_t<u64> g_addr_lock;
+	enum range_lock_flags : u64
+	{
+		/* flags (3 bits) */
+
+		range_readable = 1ull << 32,
+		range_writable = 2ull << 32,
+		range_executable = 4ull << 32,
+		range_all_mask = 7ull << 32,
+
+		/* flag combinations with special meaning */
+
+		range_normal = 3ull << 32, // R+W
+		range_updated = 2ull << 32, // R+W as well but do not
+		range_allocated = 4ull << 32, // No safe access
+		range_deallocated = 0, // No safe access
+	};
+
+	extern atomic_t<u64> g_range_lock;
 
 	extern atomic_t<u8> g_shareable[];
 
@@ -26,9 +43,9 @@ namespace vm
 	// Lock memory range
 	FORCE_INLINE void range_lock(atomic_t<u64>& res, atomic_t<u64, 64>* range_lock, u32 begin, u32 size)
 	{
-		const u64 lock_val = g_addr_lock.load();
+		const u64 lock_val = g_range_lock.load();
 		const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
-		const u32 lock_size = static_cast<u32>(lock_val >> 32);
+		const u32 lock_size = static_cast<u32>(lock_val >> 35);
 
 		u64 addr = begin;
 
@@ -42,7 +59,7 @@ namespace vm
 			// Optimistic locking
 			range_lock->release(begin | (u64{size} << 32));
 
-			const u64 new_lock_val = g_addr_lock.load();
+			const u64 new_lock_val = g_range_lock.load();
 
 			if ((!new_lock_val || new_lock_val == lock_val) && !(res.load() & 127)) [[likely]]
 			{