Revert "Remove shared_cond and simplify reservation waiting"

This reverts commit 0a96497e13.
2024-11-16 23:17:29 +00:00 · 2019-09-13 23:54:43 +03:00 · 2019-09-13 23:54:43 +03:00 · b70c08a2e8
commit b70c08a2e8
parent c9170bbb88
7 changed files with 349 additions and 3 deletions
--- a/Utilities/cond.cpp
+++ b/Utilities/cond.cpp
@ -56,3 +56,80 @@ void cond_variable::imp_wake(u32 _count) noexcept
 		m_value.notify_one();
 	}
 }
+
+bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept
+{
+	if (slot >= 32)
+	{
+		// Invalid argument, assume notified
+		return true;
+	}
+
+	const u64 wait_bit = c_wait << slot;
+	const u64 lock_bit = c_lock << slot;
+
+	// Change state from c_lock to c_wait
+	const u64 old_ = m_cvx32.fetch_op([=](u64& cvx32)
+	{
+		if (cvx32 & wait_bit)
+		{
+			// c_lock -> c_wait
+			cvx32 &= ~(lock_bit & ~wait_bit);
+		}
+		else
+		{
+			// c_sig -> c_lock
+			cvx32 |= lock_bit;
+		}
+	});
+
+	if ((old_ & wait_bit) == 0)
+	{
+		// Already signaled, return without waiting
+		return true;
+	}
+
+	return balanced_wait_until(m_cvx32, _timeout, [&](u64& cvx32, auto... ret) -> int
+	{
+		if ((cvx32 & wait_bit) == 0)
+		{
+			// c_sig -> c_lock
+			cvx32 |= lock_bit;
+			return +1;
+		}
+
+		if constexpr (sizeof...(ret))
+		{
+			// Retire
+			cvx32 |= lock_bit;
+			return -1;
+		}
+
+		return 0;
+	});
+}
+
+void shared_cond::imp_notify() noexcept
+{
+	auto [old, ok] = m_cvx32.fetch_op([](u64& cvx32)
+	{
+		if (const u64 sig_mask = cvx32 & 0xffffffff)
+		{
+			cvx32 &= 0xffffffffull << 32;
+			cvx32 |= sig_mask << 32;
+			return true;
+		}
+
+		return false;
+	});
+
+	// Determine if some waiters need a syscall notification
+	const u64 wait_mask = old & (~old >> 32);
+
+	if (UNLIKELY(!ok || !wait_mask))
+	{
+		return;
+	}
+
+	balanced_awaken<true>(m_cvx32, utils::popcnt32(wait_mask));
+}
--- a/Utilities/cond.h
+++ b/Utilities/cond.h
@ -94,3 +94,98 @@ public:

 	static constexpr u64 max_timeout = UINT64_MAX / 1000;
 };
+
+// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers).
+class shared_cond
+{
+	// For information, shouldn't modify
+	enum : u64
+	{
+		// Wait bit is aligned for compatibility with 32-bit futex.
+		c_wait = 1,
+		c_sig  = 1ull << 32,
+		c_lock = 1ull << 32 | 1,
+	};
+
+	// Split in 32-bit parts for convenient bit combining
+	atomic_t<u64> m_cvx32{0};
+
+	class shared_lock
+	{
+		shared_cond* m_this;
+		u32 m_slot;
+
+		friend class shared_cond;
+
+	public:
+		shared_lock(shared_cond* _this) noexcept
+			: m_this(_this)
+		{
+			// Lock and remember obtained slot index
+			m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32)
+			{
+				// Combine used bits and invert to find least significant bit unused
+				const u32 slot = static_cast<u32>(utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true));
+
+				// Set lock bits (does nothing if all slots are used)
+				const u64 bit = (1ull << slot) & 0xffffffff;
+				cvx32 |= bit | (bit << 32);
+				return slot;
+			});
+		}
+
+		shared_lock(const shared_lock&) = delete;
+
+		shared_lock(shared_lock&& rhs)
+			: m_this(rhs.m_this)
+			, m_slot(rhs.m_slot)
+		{
+			rhs.m_slot = 32;
+		}
+
+		shared_lock& operator=(const shared_lock&) = delete;
+
+		~shared_lock()
+		{
+			// Clear the slot (does nothing if all slots are used)
+			const u64 bit = (1ull << m_slot) & 0xffffffff;
+			m_this->m_cvx32 &= ~(bit | (bit << 32));
+		}
+
+		explicit operator bool() const noexcept
+		{
+			// Check success
+			return m_slot < 32;
+		}
+
+		bool wait(u64 usec_timeout = -1) const noexcept
+		{
+			return m_this->wait(*this, usec_timeout);
+		}
+	};
+
+	bool imp_wait(u32 slot, u64 _timeout) noexcept;
+	void imp_notify() noexcept;
+
+public:
+	constexpr shared_cond() = default;
+
+	shared_lock try_shared_lock() noexcept
+	{
+		return shared_lock(this);
+	}
+
+	bool wait(shared_lock const& lock, u64 usec_timeout = -1) noexcept
+	{
+		AUDIT(lock.m_this == this);
+		return imp_wait(lock.m_slot, usec_timeout);
+	}
+
+	void notify_all() noexcept
+	{
+		if (LIKELY(!m_cvx32))
+			return;
+
+		imp_notify();
+	}
+};
--- a/Utilities/sync.h
+++ b/Utilities/sync.h
@ -149,3 +149,160 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
 	return g_futex(uaddr, futex_op, val, timeout, mask);
 #endif
 }
+
+template <typename T, typename Pred>
+bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
+{
+	static_assert(sizeof(T) == 4 || sizeof(T) == 8);
+
+	const bool is_inf = usec_timeout > u64{UINT32_MAX / 1000} * 1000000;
+
+	// Optional second argument indicates that the predicate should try to retire
+	auto test_pred = [&](T& _new, auto... args)
+	{
+		T old = var.load();
+
+		while (true)
+		{
+			_new = old;
+
+			// Zero indicates failure without modifying the value
+			// Negative indicates failure but modifies the value
+			auto ret = std::invoke(std::forward<Pred>(pred), _new, args...);
+
+			if (LIKELY(!ret || var.compare_exchange(old, _new)))
+			{
+				return ret > 0;
+			}
+		}
+	};
+
+	T value;
+
+#ifdef _WIN32
+	if (OptWaitOnAddress)
+	{
+		while (!test_pred(value))
+		{
+			if (OptWaitOnAddress(&var, &value, sizeof(T), is_inf ? INFINITE : usec_timeout / 1000))
+			{
+				if (!test_pred(value, nullptr))
+				{
+					return false;
+				}
+
+				break;
+			}
+
+			if (GetLastError() == ERROR_TIMEOUT)
+			{
+				// Retire
+				return test_pred(value, nullptr);
+			}
+		}
+
+		return true;
+	}
+
+	LARGE_INTEGER timeout;
+	timeout.QuadPart = usec_timeout * -10;
+
+	if (!usec_timeout || NtWaitForKeyedEvent(nullptr, &var, false, is_inf ? nullptr : &timeout))
+	{
+		// Timed out: retire
+		if (!test_pred(value, nullptr))
+		{
+			return false;
+		}
+
+		// Signaled in the last moment: restore balance
+		NtWaitForKeyedEvent(nullptr, &var, false, nullptr);
+		return true;
+	}
+
+	if (!test_pred(value, nullptr))
+	{
+		// Stolen notification: restore balance
+		NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
+		return false;
+	}
+
+	return true;
+#else
+	struct timespec timeout;
+	timeout.tv_sec  = usec_timeout / 1000000;
+	timeout.tv_nsec = (usec_timeout % 1000000) * 1000;
+
+	char* ptr = reinterpret_cast<char*>(&var);
+
+	if constexpr (sizeof(T) == 8)
+	{
+		ptr += 4 * IS_BE_MACHINE;
+	}
+
+	while (!test_pred(value))
+	{
+		if (futex(ptr, FUTEX_WAIT_PRIVATE, static_cast<u32>(value), is_inf ? nullptr : &timeout) == 0)
+		{
+			if (!test_pred(value, nullptr))
+			{
+				return false;
+			}
+
+			break;
+		}
+
+		switch (errno)
+		{
+		case EAGAIN: break;
+		case ETIMEDOUT: return test_pred(value, nullptr);
+		default: verify("Unknown futex error" HERE), 0;
+		}
+	}
+
+	return true;
+#endif
+}
+
+template <bool All = false, typename T>
+void balanced_awaken(atomic_t<T>& var, u32 weight)
+{
+	static_assert(sizeof(T) == 4 || sizeof(T) == 8);
+
+#ifdef _WIN32
+	if (OptWaitOnAddress)
+	{
+		if (All || weight > 3)
+		{
+			OptWakeByAddressAll(&var);
+			return;
+		}
+
+		for (u32 i = 0; i < weight; i++)
+		{
+			OptWakeByAddressSingle(&var);
+		}
+
+		return;
+	}
+
+	for (u32 i = 0; i < weight; i++)
+	{
+		NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
+	}
+#else
+	char* ptr = reinterpret_cast<char*>(&var);
+
+	if constexpr (sizeof(T) == 8)
+	{
+		ptr += 4 * IS_BE_MACHINE;
+	}
+
+	if (All || weight)
+	{
+		futex(ptr, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min<u32>(INT_MAX, weight));
+	}
+
+	return;
+#endif
+}
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -2385,6 +2385,13 @@ s64 spu_thread::get_ch_value(u32 ch)
 				fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1);
 			}

+			const auto pseudo_lock = vm::reservation_notifier(raddr, 128).try_shared_lock();
+
+			if (!pseudo_lock)
+			{
+				fmt::throw_exception("Unexpected: reservation notifier lock failed");
+			}
+
 			while (res = get_events(), !res)
 			{
 				state += cpu_flag::wait;
@ -2394,7 +2401,7 @@ s64 spu_thread::get_ch_value(u32 ch)
 					return -1;
 				}

-				vm::reservation_notifier(raddr, 128).wait<UINT64_MAX & -128>(rtime, atomic_wait_timeout{30000});
+				pseudo_lock.wait(100);
 			}

 			check_state();
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@ -17,6 +17,8 @@
 #include <thread>
 #include <deque>

+static_assert(sizeof(shared_cond) == 8, "Unexpected size of shared_cond");
+
 namespace vm
 {
 	static u8* memory_reserve_4GiB(std::uintptr_t _addr = 0)
@ -48,6 +50,9 @@ namespace vm
 	// Reservation stats (compressed x16)
 	u8* const g_reservations = memory_reserve_4GiB((std::uintptr_t)g_stat_addr);

+	// Reservation sync variables
+	u8* const g_reservations2 = g_reservations + 0x10000000;
+
 	// Memory locations
 	std::vector<std::shared_ptr<block_t>> g_locations;

@ -629,9 +634,11 @@ namespace vm
 			if (addr == 0x10000)
 			{
 				utils::memory_commit(g_reservations, 0x1000);
+				utils::memory_commit(g_reservations2, 0x1000);
 			}

 			utils::memory_commit(g_reservations + addr / 16, size / 16);
+			utils::memory_commit(g_reservations2 + addr / 16, size / 16);
 		}
 		else
 		{
@ -639,10 +646,12 @@ namespace vm
 			for (u32 i = 0; i < 6; i++)
 			{
 				utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000);
+				utils::memory_commit(g_reservations2 + addr / 16 + i * 0x10000, 0x4000);
 			}

 			// End of the address space
 			utils::memory_commit(g_reservations + 0xfff0000, 0x10000);
+			utils::memory_commit(g_reservations2 + 0xfff0000, 0x10000);
 		}

 		if (flags & 0x100)
--- a/rpcs3/Emu/Memory/vm.h
+++ b/rpcs3/Emu/Memory/vm.h
@ -14,6 +14,7 @@ namespace vm
 	extern u8* const g_exec_addr;
 	extern u8* const g_stat_addr;
 	extern u8* const g_reservations;
+	extern u8* const g_reservations2;

 	struct writer_lock;

--- a/rpcs3/Emu/Memory/vm_reservation.h
+++ b/rpcs3/Emu/Memory/vm_reservation.h
@ -21,9 +21,9 @@ namespace vm
 	}

 	// Get reservation sync variable
-	inline atomic_t<u64>& reservation_notifier(u32 addr, u32 size)
+	inline shared_cond& reservation_notifier(u32 addr, u32 size)
 	{
-		return reinterpret_cast<atomic_t<u64>*>(g_reservations)[addr / 128];
+		return *reinterpret_cast<shared_cond*>(g_reservations2 + addr / 128 * 8);
 	}

 	void reservation_lock_internal(atomic_t<u64>&);