diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index d5fdc82e1c..f62aab8686 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -56,3 +56,80 @@ void cond_variable::imp_wake(u32 _count) noexcept m_value.notify_one(); } } + +bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept +{ + if (slot >= 32) + { + // Invalid argument, assume notified + return true; + } + + const u64 wait_bit = c_wait << slot; + const u64 lock_bit = c_lock << slot; + + // Change state from c_lock to c_wait + const u64 old_ = m_cvx32.fetch_op([=](u64& cvx32) + { + if (cvx32 & wait_bit) + { + // c_lock -> c_wait + cvx32 &= ~(lock_bit & ~wait_bit); + } + else + { + // c_sig -> c_lock + cvx32 |= lock_bit; + } + }); + + if ((old_ & wait_bit) == 0) + { + // Already signaled, return without waiting + return true; + } + + return balanced_wait_until(m_cvx32, _timeout, [&](u64& cvx32, auto... ret) -> int + { + if ((cvx32 & wait_bit) == 0) + { + // c_sig -> c_lock + cvx32 |= lock_bit; + return +1; + } + + if constexpr (sizeof...(ret)) + { + // Retire + cvx32 |= lock_bit; + return -1; + } + + return 0; + }); +} + +void shared_cond::imp_notify() noexcept +{ + auto [old, ok] = m_cvx32.fetch_op([](u64& cvx32) + { + if (const u64 sig_mask = cvx32 & 0xffffffff) + { + cvx32 &= 0xffffffffull << 32; + cvx32 |= sig_mask << 32; + return true; + } + + return false; + }); + + // Determine if some waiters need a syscall notification + const u64 wait_mask = old & (~old >> 32); + + if (UNLIKELY(!ok || !wait_mask)) + { + return; + } + + balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); +} diff --git a/Utilities/cond.h b/Utilities/cond.h index 82e3959145..94ac173d13 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -94,3 +94,98 @@ public: static constexpr u64 max_timeout = UINT64_MAX / 1000; }; + +// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers). +class shared_cond +{ + // For information, shouldn't modify + enum : u64 + { + // Wait bit is aligned for compatibility with 32-bit futex. + c_wait = 1, + c_sig = 1ull << 32, + c_lock = 1ull << 32 | 1, + }; + + // Split in 32-bit parts for convenient bit combining + atomic_t m_cvx32{0}; + + class shared_lock + { + shared_cond* m_this; + u32 m_slot; + + friend class shared_cond; + + public: + shared_lock(shared_cond* _this) noexcept + : m_this(_this) + { + // Lock and remember obtained slot index + m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32) + { + // Combine used bits and invert to find least significant bit unused + const u32 slot = static_cast(utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true)); + + // Set lock bits (does nothing if all slots are used) + const u64 bit = (1ull << slot) & 0xffffffff; + cvx32 |= bit | (bit << 32); + return slot; + }); + } + + shared_lock(const shared_lock&) = delete; + + shared_lock(shared_lock&& rhs) + : m_this(rhs.m_this) + , m_slot(rhs.m_slot) + { + rhs.m_slot = 32; + } + + shared_lock& operator=(const shared_lock&) = delete; + + ~shared_lock() + { + // Clear the slot (does nothing if all slots are used) + const u64 bit = (1ull << m_slot) & 0xffffffff; + m_this->m_cvx32 &= ~(bit | (bit << 32)); + } + + explicit operator bool() const noexcept + { + // Check success + return m_slot < 32; + } + + bool wait(u64 usec_timeout = -1) const noexcept + { + return m_this->wait(*this, usec_timeout); + } + }; + + bool imp_wait(u32 slot, u64 _timeout) noexcept; + void imp_notify() noexcept; + +public: + constexpr shared_cond() = default; + + shared_lock try_shared_lock() noexcept + { + return shared_lock(this); + } + + bool wait(shared_lock const& lock, u64 usec_timeout = -1) noexcept + { + AUDIT(lock.m_this == this); + return imp_wait(lock.m_slot, usec_timeout); + } + + void notify_all() noexcept + { + if (LIKELY(!m_cvx32)) + return; + + imp_notify(); + } +}; diff --git a/Utilities/sync.h b/Utilities/sync.h index caed15337a..d4fedeabea 100644 --- a/Utilities/sync.h +++ b/Utilities/sync.h @@ -149,3 +149,160 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t return g_futex(uaddr, futex_op, val, timeout, mask); #endif } + +template +bool balanced_wait_until(atomic_t& var, u64 usec_timeout, Pred&& pred) +{ + static_assert(sizeof(T) == 4 || sizeof(T) == 8); + + const bool is_inf = usec_timeout > u64{UINT32_MAX / 1000} * 1000000; + + // Optional second argument indicates that the predicate should try to retire + auto test_pred = [&](T& _new, auto... args) + { + T old = var.load(); + + while (true) + { + _new = old; + + // Zero indicates failure without modifying the value + // Negative indicates failure but modifies the value + auto ret = std::invoke(std::forward(pred), _new, args...); + + if (LIKELY(!ret || var.compare_exchange(old, _new))) + { + return ret > 0; + } + } + }; + + T value; + +#ifdef _WIN32 + if (OptWaitOnAddress) + { + while (!test_pred(value)) + { + if (OptWaitOnAddress(&var, &value, sizeof(T), is_inf ? INFINITE : usec_timeout / 1000)) + { + if (!test_pred(value, nullptr)) + { + return false; + } + + break; + } + + if (GetLastError() == ERROR_TIMEOUT) + { + // Retire + return test_pred(value, nullptr); + } + } + + return true; + } + + LARGE_INTEGER timeout; + timeout.QuadPart = usec_timeout * -10; + + if (!usec_timeout || NtWaitForKeyedEvent(nullptr, &var, false, is_inf ? nullptr : &timeout)) + { + // Timed out: retire + if (!test_pred(value, nullptr)) + { + return false; + } + + // Signaled in the last moment: restore balance + NtWaitForKeyedEvent(nullptr, &var, false, nullptr); + return true; + } + + if (!test_pred(value, nullptr)) + { + // Stolen notification: restore balance + NtReleaseKeyedEvent(nullptr, &var, false, nullptr); + return false; + } + + return true; +#else + struct timespec timeout; + timeout.tv_sec = usec_timeout / 1000000; + timeout.tv_nsec = (usec_timeout % 1000000) * 1000; + + char* ptr = reinterpret_cast(&var); + + if constexpr (sizeof(T) == 8) + { + ptr += 4 * IS_BE_MACHINE; + } + + while (!test_pred(value)) + { + if (futex(ptr, FUTEX_WAIT_PRIVATE, static_cast(value), is_inf ? nullptr : &timeout) == 0) + { + if (!test_pred(value, nullptr)) + { + return false; + } + + break; + } + + switch (errno) + { + case EAGAIN: break; + case ETIMEDOUT: return test_pred(value, nullptr); + default: verify("Unknown futex error" HERE), 0; + } + } + + return true; +#endif +} + +template +void balanced_awaken(atomic_t& var, u32 weight) +{ + static_assert(sizeof(T) == 4 || sizeof(T) == 8); + +#ifdef _WIN32 + if (OptWaitOnAddress) + { + if (All || weight > 3) + { + OptWakeByAddressAll(&var); + return; + } + + for (u32 i = 0; i < weight; i++) + { + OptWakeByAddressSingle(&var); + } + + return; + } + + for (u32 i = 0; i < weight; i++) + { + NtReleaseKeyedEvent(nullptr, &var, false, nullptr); + } +#else + char* ptr = reinterpret_cast(&var); + + if constexpr (sizeof(T) == 8) + { + ptr += 4 * IS_BE_MACHINE; + } + + if (All || weight) + { + futex(ptr, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min(INT_MAX, weight)); + } + + return; +#endif +} diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 83a5adaf03..78edcd2e5c 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2385,6 +2385,13 @@ s64 spu_thread::get_ch_value(u32 ch) fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1); } + const auto pseudo_lock = vm::reservation_notifier(raddr, 128).try_shared_lock(); + + if (!pseudo_lock) + { + fmt::throw_exception("Unexpected: reservation notifier lock failed"); + } + while (res = get_events(), !res) { state += cpu_flag::wait; @@ -2394,7 +2401,7 @@ s64 spu_thread::get_ch_value(u32 ch) return -1; } - vm::reservation_notifier(raddr, 128).wait(rtime, atomic_wait_timeout{30000}); + pseudo_lock.wait(100); } check_state(); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 56d6544fc9..314a2d580c 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -17,6 +17,8 @@ #include #include +static_assert(sizeof(shared_cond) == 8, "Unexpected size of shared_cond"); + namespace vm { static u8* memory_reserve_4GiB(std::uintptr_t _addr = 0) @@ -48,6 +50,9 @@ namespace vm // Reservation stats (compressed x16) u8* const g_reservations = memory_reserve_4GiB((std::uintptr_t)g_stat_addr); + // Reservation sync variables + u8* const g_reservations2 = g_reservations + 0x10000000; + // Memory locations std::vector> g_locations; @@ -629,9 +634,11 @@ namespace vm if (addr == 0x10000) { utils::memory_commit(g_reservations, 0x1000); + utils::memory_commit(g_reservations2, 0x1000); } utils::memory_commit(g_reservations + addr / 16, size / 16); + utils::memory_commit(g_reservations2 + addr / 16, size / 16); } else { @@ -639,10 +646,12 @@ namespace vm for (u32 i = 0; i < 6; i++) { utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000); + utils::memory_commit(g_reservations2 + addr / 16 + i * 0x10000, 0x4000); } // End of the address space utils::memory_commit(g_reservations + 0xfff0000, 0x10000); + utils::memory_commit(g_reservations2 + 0xfff0000, 0x10000); } if (flags & 0x100) diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index c200964f63..381fd6139c 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -14,6 +14,7 @@ namespace vm extern u8* const g_exec_addr; extern u8* const g_stat_addr; extern u8* const g_reservations; + extern u8* const g_reservations2; struct writer_lock; diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index b90f3d1c48..e3ee7ea6b1 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -21,9 +21,9 @@ namespace vm } // Get reservation sync variable - inline atomic_t& reservation_notifier(u32 addr, u32 size) + inline shared_cond& reservation_notifier(u32 addr, u32 size) { - return reinterpret_cast*>(g_reservations)[addr / 128]; + return *reinterpret_cast(g_reservations2 + addr / 128 * 8); } void reservation_lock_internal(atomic_t&);