diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index fc1981f6f1..a6d976c7c0 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -12,7 +12,7 @@ void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept verify(HERE), _old; // Wait with timeout - m_value.wait(_old, atomic_wait_timeout{_timeout > max_timeout ? UINT64_MAX : _timeout * 1000}); + m_value.wait(_old, c_signal_mask, atomic_wait_timeout{_timeout > max_timeout ? UINT64_MAX : _timeout * 1000}); // Cleanup m_value.atomic_op([](u32& value) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index a66439d1ee..95753f9f2e 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -3060,7 +3060,7 @@ s64 spu_thread::get_ch_value(u32 ch) return -1; } - vm::reservation_notifier(raddr, 128).wait(rtime, atomic_wait_timeout{100'000}); + vm::reservation_notifier(raddr, 128).wait(rtime, -128, atomic_wait_timeout{100'000}); } check_state(); diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index 67eefbf27f..b0367b1d17 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -46,7 +46,11 @@ static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = []( // Compare data in memory with old value, and return true if they are equal template -static inline bool ptr_cmp(const void* data, std::size_t size, u64 old_value, u64 mask) +static inline bool +#ifdef _WIN32 +__vectorcall +#endif +ptr_cmp(const void* data, std::size_t size, __m128i old128, __m128i mask128) { if constexpr (CheckCb) { @@ -64,12 +68,27 @@ static inline bool ptr_cmp(const void* data, std::size_t size, u64 old_value, u6 } } + const u64 old_value = _mm_cvtsi128_si64(old128); + const u64 mask = _mm_cvtsi128_si64(mask128); + switch (size) { case 1: return (reinterpret_cast*>(data)->load() & mask) == (old_value & mask); case 2: return (reinterpret_cast*>(data)->load() & mask) == (old_value & mask); case 4: return (reinterpret_cast*>(data)->load() & mask) == (old_value & mask); case 8: return (reinterpret_cast*>(data)->load() & mask) == (old_value & mask); + case 16: + { + const auto v0 = _mm_load_si128(reinterpret_cast(data)); + const auto v1 = _mm_xor_si128(v0, old128); + const auto v2 = _mm_and_si128(v1, mask128); + const auto v3 = _mm_packs_epi16(v2, v2); + + if (_mm_cvtsi128_si64(v3) == 0) + { + return true; + } + } } return false; @@ -411,7 +430,11 @@ static void slot_free(std::uintptr_t iptr, sync_var* loc, u64 lv = 0) } } -SAFE_BUFFERS void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_value, u64 timeout, u64 mask) +SAFE_BUFFERS void +#ifdef _WIN32 +__vectorcall +#endif +atomic_storage_futex::wait(const void* data, std::size_t size, __m128i old_value, u64 timeout, __m128i mask) { const std::uintptr_t iptr = reinterpret_cast(data); diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index 205e002bff..a134716f15 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -21,7 +21,11 @@ private: template friend class atomic_t; - static void wait(const void* data, std::size_t size, u64 old_value, u64 timeout, u64 mask); + static void +#ifdef _WIN32 + __vectorcall +#endif + wait(const void* data, std::size_t size, __m128i old128, u64 timeout, __m128i mask128); static void notify_one(const void* data); static void notify_all(const void* data); @@ -1141,10 +1145,36 @@ public: return atomic_storage::btr(m_data, bit); } - template + // Timeout is discouraged void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept { - atomic_storage_futex::wait(&m_data, sizeof(T), std::bit_cast>(old_value), static_cast(timeout), Mask); + if constexpr (sizeof(T) <= 8) + { + const __m128i old = _mm_cvtsi64_si128(std::bit_cast>(old_value)); + atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast(timeout), _mm_set1_epi64x(-1)); + } + else if constexpr (sizeof(T) == 16) + { + const __m128i old = std::bit_cast<__m128i>(old_value); + atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast(timeout), _mm_set1_epi64x(-1)); + } + } + + // Overload with mask (only selected bits are checked), timeout is discouraged + void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) + { + if constexpr (sizeof(T) <= 8) + { + const __m128i old = _mm_cvtsi64_si128(std::bit_cast>(old_value)); + const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); + atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast(timeout), mask); + } + else if constexpr (sizeof(T) == 16) + { + const __m128i old = std::bit_cast<__m128i>(old_value); + const __m128i mask = std::bit_cast<__m128i>(mask_value); + atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast(timeout), mask); + } } void notify_one() noexcept