#pragma once #include "types.h" #include // Helper class, provides access to compiler-specific atomic intrinsics template struct atomic_storage { static_assert(sizeof(T) <= 16 && sizeof(T) == alignof(T), "atomic_storage<> error: invalid type"); /* First part: Non-MSVC intrinsics */ #ifndef _MSC_VER #if defined(__ATOMIC_HLE_ACQUIRE) && defined(__ATOMIC_HLE_RELEASE) static constexpr int s_hle_ack = __ATOMIC_SEQ_CST | __ATOMIC_HLE_ACQUIRE; static constexpr int s_hle_rel = __ATOMIC_SEQ_CST | __ATOMIC_HLE_RELEASE; #else static constexpr int s_hle_ack = __ATOMIC_SEQ_CST; static constexpr int s_hle_rel = __ATOMIC_SEQ_CST; #endif static inline bool compare_exchange(T& dest, T& comp, T exch) { return __atomic_compare_exchange(&dest, &comp, &exch, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch) { static_assert(sizeof(T) == 4 || sizeof(T) == 8); return __atomic_compare_exchange(&dest, &comp, &exch, false, s_hle_ack, s_hle_ack); } static inline T load(const T& dest) { T result; __atomic_load(&dest, &result, __ATOMIC_SEQ_CST); return result; } static inline void store(T& dest, T value) { __atomic_store(&dest, &value, __ATOMIC_SEQ_CST); } static inline void release(T& dest, T value) { __atomic_store(&dest, &value, __ATOMIC_RELEASE); } static inline T exchange(T& dest, T value) { T result; __atomic_exchange(&dest, &value, &result, __ATOMIC_SEQ_CST); return result; } static inline T fetch_add(T& dest, T value) { return __atomic_fetch_add(&dest, value, __ATOMIC_SEQ_CST); } static inline T fetch_add_hle_rel(T& dest, T value) { static_assert(sizeof(T) == 4 || sizeof(T) == 8); return __atomic_fetch_add(&dest, value, s_hle_rel); } static inline T add_fetch(T& dest, T value) { return __atomic_add_fetch(&dest, value, __ATOMIC_SEQ_CST); } static inline T fetch_sub(T& dest, T value) { return __atomic_fetch_sub(&dest, value, __ATOMIC_SEQ_CST); } static inline T sub_fetch(T& dest, T value) { return __atomic_sub_fetch(&dest, value, __ATOMIC_SEQ_CST); } static inline T fetch_and(T& dest, T value) { return __atomic_fetch_and(&dest, value, __ATOMIC_SEQ_CST); } static inline T and_fetch(T& dest, T value) { return __atomic_and_fetch(&dest, value, __ATOMIC_SEQ_CST); } static inline T fetch_xor(T& dest, T value) { return __atomic_fetch_xor(&dest, value, __ATOMIC_SEQ_CST); } static inline T xor_fetch(T& dest, T value) { return __atomic_xor_fetch(&dest, value, __ATOMIC_SEQ_CST); } static inline T fetch_or(T& dest, T value) { return __atomic_fetch_or(&dest, value, __ATOMIC_SEQ_CST); } static inline T or_fetch(T& dest, T value) { return __atomic_or_fetch(&dest, value, __ATOMIC_SEQ_CST); } #endif /* Second part: MSVC-specific */ #ifdef _MSC_VER static inline T add_fetch(T& dest, T value) { return atomic_storage::fetch_add(dest, value) + value; } static inline T fetch_sub(T& dest, T value) { return atomic_storage::fetch_add(dest, 0 - value); } static inline T sub_fetch(T& dest, T value) { return atomic_storage::fetch_add(dest, 0 - value) - value; } static inline T and_fetch(T& dest, T value) { return atomic_storage::fetch_and(dest, value) & value; } static inline T or_fetch(T& dest, T value) { return atomic_storage::fetch_or(dest, value) | value; } static inline T xor_fetch(T& dest, T value) { return atomic_storage::fetch_xor(dest, value) ^ value; } #endif /* Third part: fallbacks, may be hidden by subsequent atomic_storage<> specializations */ static inline T fetch_inc(T& dest) { return atomic_storage::fetch_add(dest, 1); } static inline T inc_fetch(T& dest) { return atomic_storage::add_fetch(dest, 1); } static inline T fetch_dec(T& dest) { return atomic_storage::fetch_sub(dest, 1); } static inline T dec_fetch(T& dest) { return atomic_storage::sub_fetch(dest, 1); } static inline bool test_and_set(T& dest, T mask) { return (atomic_storage::fetch_or(dest, mask) & mask) != 0; } static inline bool test_and_reset(T& dest, T mask) { return (atomic_storage::fetch_and(dest, ~mask) & mask) != 0; } static inline bool test_and_complement(T& dest, T mask) { return (atomic_storage::fetch_xor(dest, mask) & mask) != 0; } static inline bool bts(T& dest, uint bit) { return atomic_storage::test_and_set(dest, static_cast(1) << bit); } static inline bool btr(T& dest, uint bit) { return atomic_storage::test_and_reset(dest, static_cast(1) << bit); } static inline bool btc(T& dest, uint bit) { return atomic_storage::test_and_complement(dest, static_cast(1) << bit); } }; /* The rest: ugly MSVC intrinsics + inline asm implementations */ template struct atomic_storage : atomic_storage { #ifdef _MSC_VER static inline bool compare_exchange(T& dest, T& comp, T exch) { char v = *(char*)∁ char r = _InterlockedCompareExchange8((volatile char*)&dest, (char&)exch, v); comp = (T&)r; return r == v; } static inline T load(const T& dest) { char value = *(const volatile char*)&dest; _ReadWriteBarrier(); return (T&)value; } static inline void store(T& dest, T value) { _InterlockedExchange8((volatile char*)&dest, (char&)value); } static inline void release(T& dest, T value) { _ReadWriteBarrier(); *(volatile char*)&dest = (char&)value; } static inline T exchange(T& dest, T value) { char r = _InterlockedExchange8((volatile char*)&dest, (char&)value); return (T&)r; } static inline T fetch_add(T& dest, T value) { char r = _InterlockedExchangeAdd8((volatile char*)&dest, (char&)value); return (T&)r; } static inline T fetch_and(T& dest, T value) { char r = _InterlockedAnd8((volatile char*)&dest, (char&)value); return (T&)r; } static inline T fetch_or(T& dest, T value) { char r = _InterlockedOr8((volatile char*)&dest, (char&)value); return (T&)r; } static inline T fetch_xor(T& dest, T value) { char r = _InterlockedXor8((volatile char*)&dest, (char&)value); return (T&)r; } #endif }; template struct atomic_storage : atomic_storage { #ifdef _MSC_VER static inline bool compare_exchange(T& dest, T& comp, T exch) { short v = *(short*)∁ short r = _InterlockedCompareExchange16((volatile short*)&dest, (short&)exch, v); comp = (T&)r; return r == v; } static inline T load(const T& dest) { short value = *(const volatile short*)&dest; _ReadWriteBarrier(); return (T&)value; } static inline void store(T& dest, T value) { _InterlockedExchange16((volatile short*)&dest, (short&)value); } static inline void release(T& dest, T value) { _ReadWriteBarrier(); *(volatile short*)&dest = (short&)value; } static inline T exchange(T& dest, T value) { short r = _InterlockedExchange16((volatile short*)&dest, (short&)value); return (T&)r; } static inline T fetch_add(T& dest, T value) { short r = _InterlockedExchangeAdd16((volatile short*)&dest, (short&)value); return (T&)r; } static inline T fetch_and(T& dest, T value) { short r = _InterlockedAnd16((volatile short*)&dest, (short&)value); return (T&)r; } static inline T fetch_or(T& dest, T value) { short r = _InterlockedOr16((volatile short*)&dest, (short&)value); return (T&)r; } static inline T fetch_xor(T& dest, T value) { short r = _InterlockedXor16((volatile short*)&dest, (short&)value); return (T&)r; } static inline T inc_fetch(T& dest) { short r = _InterlockedIncrement16((volatile short*)&dest); return (T&)r; } static inline T dec_fetch(T& dest) { short r = _InterlockedDecrement16((volatile short*)&dest); return (T&)r; } #else static inline bool bts(T& dest, uint bit) { bool result; ushort _bit = (ushort)bit; __asm__("lock btsw %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (_bit) : "cc"); return result; } static inline bool btr(T& dest, uint bit) { bool result; ushort _bit = (ushort)bit; __asm__("lock btrw %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (_bit) : "cc"); return result; } static inline bool btc(T& dest, uint bit) { bool result; ushort _bit = (ushort)bit; __asm__("lock btcw %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (_bit) : "cc"); return result; } #endif }; template struct atomic_storage : atomic_storage { #ifdef _MSC_VER static inline bool compare_exchange(T& dest, T& comp, T exch) { long v = *(long*)∁ long r = _InterlockedCompareExchange((volatile long*)&dest, (long&)exch, v); comp = (T&)r; return r == v; } static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch) { long v = *(long*)∁ long r = _InterlockedCompareExchange_HLEAcquire((volatile long*)&dest, (long&)exch, v); comp = (T&)r; return r == v; } static inline T load(const T& dest) { long value = *(const volatile long*)&dest; _ReadWriteBarrier(); return (T&)value; } static inline void store(T& dest, T value) { _InterlockedExchange((volatile long*)&dest, (long&)value); } static inline void release(T& dest, T value) { _ReadWriteBarrier(); *(volatile long*)&dest = (long&)value; } static inline T exchange(T& dest, T value) { long r = _InterlockedExchange((volatile long*)&dest, (long&)value); return (T&)r; } static inline T fetch_add(T& dest, T value) { long r = _InterlockedExchangeAdd((volatile long*)&dest, (long&)value); return (T&)r; } static inline T fetch_add_hle_rel(T& dest, T value) { long r = _InterlockedExchangeAdd_HLERelease((volatile long*)&dest, (long&)value); return (T&)r; } static inline T fetch_and(T& dest, T value) { long r = _InterlockedAnd((volatile long*)&dest, (long&)value); return (T&)r; } static inline T fetch_or(T& dest, T value) { long r = _InterlockedOr((volatile long*)&dest, (long&)value); return (T&)r; } static inline T fetch_xor(T& dest, T value) { long r = _InterlockedXor((volatile long*)&dest, (long&)value); return (T&)r; } static inline T inc_fetch(T& dest) { long r = _InterlockedIncrement((volatile long*)&dest); return (T&)r; } static inline T dec_fetch(T& dest) { long r = _InterlockedDecrement((volatile long*)&dest); return (T&)r; } static inline bool bts(T& dest, uint bit) { return _interlockedbittestandset((volatile long*)&dest, bit) != 0; } static inline bool btr(T& dest, uint bit) { return _interlockedbittestandreset((volatile long*)&dest, bit) != 0; } #else static inline bool bts(T& dest, uint bit) { bool result; __asm__("lock btsl %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (bit) : "cc"); return result; } static inline bool btr(T& dest, uint bit) { bool result; __asm__("lock btrl %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (bit) : "cc"); return result; } static inline bool btc(T& dest, uint bit) { bool result; __asm__("lock btcl %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (bit) : "cc"); return result; } #endif }; template struct atomic_storage : atomic_storage { #ifdef _MSC_VER static inline bool compare_exchange(T& dest, T& comp, T exch) { llong v = *(llong*)∁ llong r = _InterlockedCompareExchange64((volatile llong*)&dest, (llong&)exch, v); comp = (T&)r; return r == v; } static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch) { llong v = *(llong*)∁ llong r = _InterlockedCompareExchange64_HLEAcquire((volatile llong*)&dest, (llong&)exch, v); comp = (T&)r; return r == v; } static inline T load(const T& dest) { llong value = *(const volatile llong*)&dest; _ReadWriteBarrier(); return (T&)value; } static inline void store(T& dest, T value) { _InterlockedExchange64((volatile llong*)&dest, (llong&)value); } static inline void release(T& dest, T value) { _ReadWriteBarrier(); *(volatile llong*)&dest = (llong&)value; } static inline T exchange(T& dest, T value) { llong r = _InterlockedExchange64((volatile llong*)&dest, (llong&)value); return (T&)r; } static inline T fetch_add(T& dest, T value) { llong r = _InterlockedExchangeAdd64((volatile llong*)&dest, (llong&)value); return (T&)r; } static inline T fetch_add_hle_rel(T& dest, T value) { llong r = _InterlockedExchangeAdd64_HLERelease((volatile llong*)&dest, (llong&)value); return (T&)r; } static inline T fetch_and(T& dest, T value) { llong r = _InterlockedAnd64((volatile llong*)&dest, (llong&)value); return (T&)r; } static inline T fetch_or(T& dest, T value) { llong r = _InterlockedOr64((volatile llong*)&dest, (llong&)value); return (T&)r; } static inline T fetch_xor(T& dest, T value) { llong r = _InterlockedXor64((volatile llong*)&dest, (llong&)value); return (T&)r; } static inline T inc_fetch(T& dest) { llong r = _InterlockedIncrement64((volatile llong*)&dest); return (T&)r; } static inline T dec_fetch(T& dest) { llong r = _InterlockedDecrement64((volatile llong*)&dest); return (T&)r; } static inline bool bts(T& dest, uint bit) { return _interlockedbittestandset64((volatile llong*)&dest, bit) != 0; } static inline bool btr(T& dest, uint bit) { return _interlockedbittestandreset64((volatile llong*)&dest, bit) != 0; } #else static inline bool bts(T& dest, uint bit) { bool result; ullong _bit = bit; __asm__("lock btsq %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (_bit) : "cc"); return result; } static inline bool btr(T& dest, uint bit) { bool result; ullong _bit = bit; __asm__("lock btrq %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (_bit) : "cc"); return result; } static inline bool btc(T& dest, uint bit) { bool result; ullong _bit = bit; __asm__("lock btcq %2, %0\n" "setc %1" : "+m" (dest), "=r" (result) : "Ir" (_bit) : "cc"); return result; } #endif }; template struct atomic_storage : atomic_storage { #ifdef _MSC_VER static inline bool compare_exchange(T& dest, T& comp, T exch) { llong* _exch = (llong*)&exch; return _InterlockedCompareExchange128((volatile llong*)&dest, _exch[1], _exch[0], (llong*)&comp) != 0; } static inline T load(const T& dest) { llong result[2]{0, 0}; _InterlockedCompareExchange128((volatile llong*)&dest, 0, 0, result); return *(T*)+result; } static inline void store(T& dest, T value) { llong lo = *(llong*)&value; llong hi = *((llong*)&value + 1); llong cmp[2]{ *(volatile llong*)&dest, *((volatile llong*)&dest + 1) }; while (!_InterlockedCompareExchange128((volatile llong*)&dest, hi, lo, cmp)); } static inline void release(T& dest, T value) { llong lo = *(llong*)&value; llong hi = *((llong*)&value + 1); llong cmp[2]{ *(volatile llong*)&dest, *((volatile llong*)&dest + 1) }; while (!_InterlockedCompareExchange128((volatile llong*)&dest, hi, lo, cmp)); } static inline T exchange(T& dest, T value) { llong lo = *(llong*)&value; llong hi = *((llong*)&value + 1); llong cmp[2]{ *(volatile llong*)&dest, *((volatile llong*)&dest + 1) }; while (!_InterlockedCompareExchange128((volatile llong*)&dest, hi, lo, cmp)); return *(T*)+cmp; } #endif // TODO }; // Atomic type with lock-free and standard layout guarantees (and appropriate limitations) template class atomic_t { protected: using type = typename std::remove_cv::type; static_assert(alignof(type) == sizeof(type), "atomic_t<> error: unexpected alignment, use alignas() if necessary"); type m_data; public: atomic_t() noexcept = default; atomic_t(const atomic_t&) = delete; atomic_t& operator =(const atomic_t&) = delete; // Define simple type using simple_type = simple_t; constexpr atomic_t(const type& value) noexcept : m_data(value) { } // Unsafe direct access type& raw() { return m_data; } // Atomically compare data with cmp, replace with exch if equal, return previous data value anyway type compare_and_swap(const type& cmp, const type& exch) { type old = cmp; atomic_storage::compare_exchange(m_data, old, exch); return old; } // Atomically compare data with cmp, replace with exch if equal, return true if data was replaced bool compare_and_swap_test(const type& cmp, const type& exch) { type old = cmp; return atomic_storage::compare_exchange(m_data, old, exch); } // As in std::atomic bool compare_exchange(type& cmp_and_old, const type& exch) { return atomic_storage::compare_exchange(m_data, cmp_and_old, exch); } // Atomic operation; returns old value, or pair of old value and return value (cancel op if evaluates to false) template > std::conditional_t, type, std::pair> fetch_op(F&& func) { type _new, old = atomic_storage::load(m_data); while (true) { _new = old; if constexpr (std::is_void_v) { std::invoke(std::forward(func), _new); if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return old; } } else { RT ret = std::invoke(std::forward(func), _new); if (LIKELY(!ret || atomic_storage::compare_exchange(m_data, old, _new))) { return {old, std::move(ret)}; } } } } // fetch_op overload with function (invokable) provided as a template parameter template > std::conditional_t, type, std::pair> fetch_op() { type _new, old = atomic_storage::load(m_data); while (true) { _new = old; if constexpr (std::is_void_v) { std::invoke(F, _new); if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return old; } } else { RT ret = std::invoke(F, _new); if (LIKELY(!ret || atomic_storage::compare_exchange(m_data, old, _new))) { return {old, std::move(ret)}; } } } } // Atomic operation; returns function result value, function is the lambda template > RT atomic_op(F&& func) { type _new, old = atomic_storage::load(m_data); while (true) { _new = old; if constexpr (std::is_void_v) { std::invoke(std::forward(func), _new); if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return; } } else { RT result = std::invoke(std::forward(func), _new); if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return result; } } } } // atomic_op overload with function (invokable) provided as a template parameter template > RT atomic_op() { type _new, old = atomic_storage::load(m_data); while (true) { _new = old; if constexpr (std::is_void_v) { std::invoke(F, _new); if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return; } } else { RT result = std::invoke(F, _new); if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return result; } } } } // Atomically read data type load() const { return atomic_storage::load(m_data); } // Atomically read data operator simple_type() const { return atomic_storage::load(m_data); } // Atomically write data void store(const type& rhs) { atomic_storage::store(m_data, rhs); } type operator =(const type& rhs) { atomic_storage::store(m_data, rhs); return rhs; } // Atomically write data with release memory order (faster on x86) void release(const type& rhs) { atomic_storage::release(m_data, rhs); } // Atomically replace data with value, return previous data value type exchange(const type& rhs) { return atomic_storage::exchange(m_data, rhs); } type fetch_add(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_add(m_data, rhs); } return fetch_op([&](T& v) { v += rhs; }); } type add_fetch(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::add_fetch(m_data, rhs); } return atomic_op([&](T& v) { v += rhs; return v; }); } auto operator +=(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::add_fetch(m_data, rhs); } return atomic_op([&](T& v) { return v += rhs; }); } type fetch_sub(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_sub(m_data, rhs); } return fetch_op([&](T& v) { v -= rhs; }); } type sub_fetch(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::sub_fetch(m_data, rhs); } return atomic_op([&](T& v) { v -= rhs; return v; }); } auto operator -=(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::sub_fetch(m_data, rhs); } return atomic_op([&](T& v) { return v -= rhs; }); } type fetch_and(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_and(m_data, rhs); } return fetch_op([&](T& v) { v &= rhs; }); } type and_fetch(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::and_fetch(m_data, rhs); } return atomic_op([&](T& v) { v &= rhs; return v; }); } auto operator &=(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::and_fetch(m_data, rhs); } return atomic_op([&](T& v) { return v &= rhs; }); } type fetch_or(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_or(m_data, rhs); } return fetch_op([&](T& v) { v |= rhs; }); } type or_fetch(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::or_fetch(m_data, rhs); } return atomic_op([&](T& v) { v |= rhs; return v; }); } auto operator |=(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::or_fetch(m_data, rhs); } return atomic_op([&](T& v) { return v |= rhs; }); } type fetch_xor(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_xor(m_data, rhs); } return fetch_op([&](T& v) { v ^= rhs; }); } type xor_fetch(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::xor_fetch(m_data, rhs); } return atomic_op([&](T& v) { v ^= rhs; return v; }); } auto operator ^=(const type& rhs) { if constexpr(std::is_integral::value) { return atomic_storage::xor_fetch(m_data, rhs); } return atomic_op([&](T& v) { return v ^= rhs; }); } auto operator ++() { if constexpr(std::is_integral::value) { return atomic_storage::inc_fetch(m_data); } return atomic_op([](T& v) { return ++v; }); } auto operator --() { if constexpr(std::is_integral::value) { return atomic_storage::dec_fetch(m_data); } return atomic_op([](T& v) { return --v; }); } auto operator ++(int) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_inc(m_data); } return atomic_op([](T& v) { return v++; }); } auto operator --(int) { if constexpr(std::is_integral::value) { return atomic_storage::fetch_dec(m_data); } return atomic_op([](T& v) { return v--; }); } // Conditionally decrement bool try_dec(simple_type greater_than = std::numeric_limits::min()) { type _new, old = atomic_storage::load(m_data); while (true) { _new = old; if (!(_new > greater_than)) { return false; } _new -= 1; if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return true; } } } // Conditionally increment bool try_inc(simple_type less_than = std::numeric_limits::max()) { type _new, old = atomic_storage::load(m_data); while (true) { _new = old; if (!(_new < less_than)) { return false; } _new += 1; if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { return true; } } } };