atomics.cpp: add support for waiting on 128-bit atomics

Complementarily.
Also refactored to make waiting mask non-template arg.
This commit is contained in:
Nekotekina 2020-10-26 23:32:40 +03:00
parent 13de773486
commit c50233cc92
4 changed files with 60 additions and 7 deletions

View File

@ -12,7 +12,7 @@ void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
verify(HERE), _old;
// Wait with timeout
m_value.wait<c_signal_mask>(_old, atomic_wait_timeout{_timeout > max_timeout ? UINT64_MAX : _timeout * 1000});
m_value.wait(_old, c_signal_mask, atomic_wait_timeout{_timeout > max_timeout ? UINT64_MAX : _timeout * 1000});
// Cleanup
m_value.atomic_op([](u32& value)

View File

@ -3060,7 +3060,7 @@ s64 spu_thread::get_ch_value(u32 ch)
return -1;
}
vm::reservation_notifier(raddr, 128).wait<UINT64_MAX & -128>(rtime, atomic_wait_timeout{100'000});
vm::reservation_notifier(raddr, 128).wait(rtime, -128, atomic_wait_timeout{100'000});
}
check_state();

View File

@ -46,7 +46,11 @@ static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = [](
// Compare data in memory with old value, and return true if they are equal
template <bool CheckCb = true, bool CheckData = true>
static inline bool ptr_cmp(const void* data, std::size_t size, u64 old_value, u64 mask)
static inline bool
#ifdef _WIN32
__vectorcall
#endif
ptr_cmp(const void* data, std::size_t size, __m128i old128, __m128i mask128)
{
if constexpr (CheckCb)
{
@ -64,12 +68,27 @@ static inline bool ptr_cmp(const void* data, std::size_t size, u64 old_value, u6
}
}
const u64 old_value = _mm_cvtsi128_si64(old128);
const u64 mask = _mm_cvtsi128_si64(mask128);
switch (size)
{
case 1: return (reinterpret_cast<const atomic_t<u8>*>(data)->load() & mask) == (old_value & mask);
case 2: return (reinterpret_cast<const atomic_t<u16>*>(data)->load() & mask) == (old_value & mask);
case 4: return (reinterpret_cast<const atomic_t<u32>*>(data)->load() & mask) == (old_value & mask);
case 8: return (reinterpret_cast<const atomic_t<u64>*>(data)->load() & mask) == (old_value & mask);
case 16:
{
const auto v0 = _mm_load_si128(reinterpret_cast<const __m128i*>(data));
const auto v1 = _mm_xor_si128(v0, old128);
const auto v2 = _mm_and_si128(v1, mask128);
const auto v3 = _mm_packs_epi16(v2, v2);
if (_mm_cvtsi128_si64(v3) == 0)
{
return true;
}
}
}
return false;
@ -411,7 +430,11 @@ static void slot_free(std::uintptr_t iptr, sync_var* loc, u64 lv = 0)
}
}
SAFE_BUFFERS void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_value, u64 timeout, u64 mask)
SAFE_BUFFERS void
#ifdef _WIN32
__vectorcall
#endif
atomic_storage_futex::wait(const void* data, std::size_t size, __m128i old_value, u64 timeout, __m128i mask)
{
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);

View File

@ -21,7 +21,11 @@ private:
template <typename T, std::size_t Align>
friend class atomic_t;
static void wait(const void* data, std::size_t size, u64 old_value, u64 timeout, u64 mask);
static void
#ifdef _WIN32
__vectorcall
#endif
wait(const void* data, std::size_t size, __m128i old128, u64 timeout, __m128i mask128);
static void notify_one(const void* data);
static void notify_all(const void* data);
@ -1141,10 +1145,36 @@ public:
return atomic_storage<type>::btr(m_data, bit);
}
template <u64 Mask = 0xffffffffffffffff>
// Timeout is discouraged
void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{
atomic_storage_futex::wait(&m_data, sizeof(T), std::bit_cast<get_uint_t<sizeof(T)>>(old_value), static_cast<u64>(timeout), Mask);
if constexpr (sizeof(T) <= 8)
{
const __m128i old = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(old_value));
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), _mm_set1_epi64x(-1));
}
else if constexpr (sizeof(T) == 16)
{
const __m128i old = std::bit_cast<__m128i>(old_value);
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), _mm_set1_epi64x(-1));
}
}
// Overload with mask (only selected bits are checked), timeout is discouraged
void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
{
if constexpr (sizeof(T) <= 8)
{
const __m128i old = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(old_value));
const __m128i mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(mask_value));
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), mask);
}
else if constexpr (sizeof(T) == 16)
{
const __m128i old = std::bit_cast<__m128i>(old_value);
const __m128i mask = std::bit_cast<__m128i>(mask_value);
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), mask);
}
}
void notify_one() noexcept