#pragma once #ifdef _MSC_VER #include <intrin.h> #else #include <x86intrin.h> #endif #include <immintrin.h> #include <emmintrin.h> #include <cstdint> #include <type_traits> #include <utility> #include <chrono> #include <array> // Assume little-endian #define IS_LE_MACHINE 1 #define IS_BE_MACHINE 0 #ifdef _MSC_VER #define ASSUME(cond) __assume(cond) #define LIKELY #define UNLIKELY #define SAFE_BUFFERS __declspec(safebuffers) #define NEVER_INLINE __declspec(noinline) #define FORCE_INLINE __forceinline #else #define ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) #define LIKELY(cond) __builtin_expect(!!(cond), 1) #define UNLIKELY(cond) __builtin_expect(!!(cond), 0) #define SAFE_BUFFERS #define NEVER_INLINE __attribute__((noinline)) #define FORCE_INLINE __attribute__((always_inline)) inline #endif #define CHECK_SIZE(type, size) static_assert(sizeof(type) == size, "Invalid " #type " type size") #define CHECK_ALIGN(type, align) static_assert(alignof(type) == align, "Invalid " #type " type alignment") #define CHECK_MAX_SIZE(type, size) static_assert(sizeof(type) <= size, #type " type size is too big") #define CHECK_SIZE_ALIGN(type, size, align) CHECK_SIZE(type, size); CHECK_ALIGN(type, align) // Return 32 bit sizeof() to avoid widening/narrowing conversions with size_t #define SIZE_32(...) static_cast<u32>(sizeof(__VA_ARGS__)) // Return 32 bit alignof() to avoid widening/narrowing conversions with size_t #define ALIGN_32(...) static_cast<u32>(alignof(__VA_ARGS__)) #define CONCATENATE_DETAIL(x, y) x ## y #define CONCATENATE(x, y) CONCATENATE_DETAIL(x, y) #define STRINGIZE_DETAIL(x) #x "" #define STRINGIZE(x) STRINGIZE_DETAIL(x) #define HERE "\n(in file " __FILE__ ":" STRINGIZE(__LINE__) ")" // Ensure that the expression evaluates to true. Obsolete. //#define EXPECTS(...) do { if (!(__VA_ARGS__)) fmt::raw_error("Precondition failed: " #__VA_ARGS__ HERE); } while (0) //#define ENSURES(...) do { if (!(__VA_ARGS__)) fmt::raw_error("Postcondition failed: " #__VA_ARGS__ HERE); } while (0) #define DECLARE(...) decltype(__VA_ARGS__) __VA_ARGS__ #define STR_CASE(...) case __VA_ARGS__: return #__VA_ARGS__ using schar = signed char; using uchar = unsigned char; using ushort = unsigned short; using uint = unsigned int; using ulong = unsigned long; using ullong = unsigned long long; using llong = long long; using uptr = std::uintptr_t; using u8 = std::uint8_t; using u16 = std::uint16_t; using u32 = std::uint32_t; using u64 = std::uint64_t; using s8 = std::int8_t; using s16 = std::int16_t; using s32 = std::int32_t; using s64 = std::int64_t; using steady_clock = std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock>::type; namespace gsl { enum class byte : u8; } // Formatting helper, type-specific preprocessing for improving safety and functionality template <typename T, typename = void> struct fmt_unveil; template <typename Arg> using fmt_unveil_t = typename fmt_unveil<Arg>::type; struct fmt_type_info; namespace fmt { template <typename... Args> const fmt_type_info* get_type_info(); } template <typename T, std::size_t Align = alignof(T), std::size_t Size = sizeof(T)> struct se_storage; template <typename T, bool Se = true, std::size_t Align = alignof(T)> class se_t; template <typename T, std::size_t Size = sizeof(T)> struct atomic_storage; template <typename T1, typename T2, typename = void> struct atomic_add; template <typename T1, typename T2, typename = void> struct atomic_sub; template <typename T1, typename T2, typename = void> struct atomic_and; template <typename T1, typename T2, typename = void> struct atomic_or; template <typename T1, typename T2, typename = void> struct atomic_xor; template <typename T, typename = void> struct atomic_pre_inc; template <typename T, typename = void> struct atomic_post_inc; template <typename T, typename = void> struct atomic_pre_dec; template <typename T, typename = void> struct atomic_post_dec; template <typename T1, typename T2, typename = void> struct atomic_test_and_set; template <typename T1, typename T2, typename = void> struct atomic_test_and_reset; template <typename T1, typename T2, typename = void> struct atomic_test_and_complement; template <typename T> class atomic_t; #ifdef _MSC_VER using std::void_t; #else namespace void_details { template <typename...> struct make_void { using type = void; }; } template <typename... T> using void_t = typename void_details::make_void<T...>::type; #endif // Extract T::simple_type if available, remove cv qualifiers template <typename T, typename = void> struct simple_type_helper { using type = typename std::remove_cv<T>::type; }; template <typename T> struct simple_type_helper<T, void_t<typename T::simple_type>> { using type = typename T::simple_type; }; template <typename T> using simple_t = typename simple_type_helper<T>::type; // Bool type equivalent class b8 { u8 m_value; public: b8() = default; constexpr b8(bool value) : m_value(value) { } constexpr operator bool() const { return m_value != 0; } }; // Bool wrapper for restricting bool result conversions struct explicit_bool_t { const bool value; constexpr explicit_bool_t(bool value) : value(value) { } explicit constexpr operator bool() const { return value; } }; #ifndef _MSC_VER using u128 = __uint128_t; using s128 = __int128_t; #else // Unsigned 128-bit integer implementation (TODO) struct alignas(16) u128 { u64 lo, hi; u128() = default; constexpr u128(u64 l) : lo(l) , hi(0) { } friend u128 operator+(const u128& l, const u128& r) { u128 value; _addcarry_u64(_addcarry_u64(0, r.lo, l.lo, &value.lo), r.hi, l.hi, &value.hi); return value; } friend u128 operator+(const u128& l, u64 r) { u128 value; _addcarry_u64(_addcarry_u64(0, r, l.lo, &value.lo), l.hi, 0, &value.hi); return value; } friend u128 operator+(u64 l, const u128& r) { u128 value; _addcarry_u64(_addcarry_u64(0, r.lo, l, &value.lo), 0, r.hi, &value.hi); return value; } friend u128 operator-(const u128& l, const u128& r) { u128 value; _subborrow_u64(_subborrow_u64(0, r.lo, l.lo, &value.lo), r.hi, l.hi, &value.hi); return value; } friend u128 operator-(const u128& l, u64 r) { u128 value; _subborrow_u64(_subborrow_u64(0, r, l.lo, &value.lo), 0, l.hi, &value.hi); return value; } friend u128 operator-(u64 l, const u128& r) { u128 value; _subborrow_u64(_subborrow_u64(0, r.lo, l, &value.lo), r.hi, 0, &value.hi); return value; } u128 operator+() const { return *this; } u128 operator-() const { u128 value; _subborrow_u64(_subborrow_u64(0, lo, 0, &value.lo), hi, 0, &value.hi); return value; } u128& operator++() { _addcarry_u64(_addcarry_u64(0, 1, lo, &lo), 0, hi, &hi); return *this; } u128 operator++(int) { u128 value = *this; _addcarry_u64(_addcarry_u64(0, 1, lo, &lo), 0, hi, &hi); return value; } u128& operator--() { _subborrow_u64(_subborrow_u64(0, 1, lo, &lo), 0, hi, &hi); return *this; } u128 operator--(int) { u128 value = *this; _subborrow_u64(_subborrow_u64(0, 1, lo, &lo), 0, hi, &hi); return value; } u128 operator~() const { u128 value; value.lo = ~lo; value.hi = ~hi; return value; } friend u128 operator&(const u128& l, const u128& r) { u128 value; value.lo = l.lo & r.lo; value.hi = l.hi & r.hi; return value; } friend u128 operator|(const u128& l, const u128& r) { u128 value; value.lo = l.lo | r.lo; value.hi = l.hi | r.hi; return value; } friend u128 operator^(const u128& l, const u128& r) { u128 value; value.lo = l.lo ^ r.lo; value.hi = l.hi ^ r.hi; return value; } u128& operator+=(const u128& r) { _addcarry_u64(_addcarry_u64(0, r.lo, lo, &lo), r.hi, hi, &hi); return *this; } u128& operator+=(uint64_t r) { _addcarry_u64(_addcarry_u64(0, r, lo, &lo), 0, hi, &hi); return *this; } u128& operator&=(const u128& r) { lo &= r.lo; hi &= r.hi; return *this; } u128& operator|=(const u128& r) { lo |= r.lo; hi |= r.hi; return *this; } u128& operator^=(const u128& r) { lo ^= r.lo; hi ^= r.hi; return *this; } }; // Signed 128-bit integer implementation (TODO) struct alignas(16) s128 { u64 lo; s64 hi; s128() = default; constexpr s128(s64 l) : hi(l >> 63) , lo(l) { } constexpr s128(u64 l) : hi(0) , lo(l) { } }; #endif CHECK_SIZE_ALIGN(u128, 16, 16); CHECK_SIZE_ALIGN(s128, 16, 16); union alignas(2) f16 { u16 _u16; u8 _u8[2]; explicit f16(u16 raw) { _u16 = raw; } explicit operator float() const { // See http://stackoverflow.com/a/26779139 // The conversion doesn't handle NaN/Inf u32 raw = ((_u16 & 0x8000) << 16) | // Sign (just moved) (((_u16 & 0x7c00) + 0x1C000) << 13) | // Exponent ( exp - 15 + 127) ((_u16 & 0x03FF) << 13); // Mantissa return (float&)raw; } }; CHECK_SIZE_ALIGN(f16, 2, 2); using f32 = float; using f64 = double; struct ignore { template <typename T> ignore(T) { } }; template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>> constexpr T align(const T& value, ullong align) { return static_cast<T>((value + (align - 1)) & ~(align - 1)); } template <typename T, typename T2> inline u32 offset32(T T2::*const mptr) { #ifdef _MSC_VER static_assert(sizeof(mptr) == sizeof(u32), "Invalid pointer-to-member size"); return reinterpret_cast<const u32&>(mptr); #elif __GNUG__ static_assert(sizeof(mptr) == sizeof(std::size_t), "Invalid pointer-to-member size"); return static_cast<u32>(reinterpret_cast<const std::size_t&>(mptr)); #else static_assert(sizeof(mptr) == 0, "Invalid pointer-to-member size"); #endif } template <typename T> struct offset32_array { static_assert(std::is_array<T>::value, "Invalid pointer-to-member type (array expected)"); template <typename Arg> static inline u32 index32(const Arg& arg) { return SIZE_32(std::remove_extent_t<T>) * static_cast<u32>(arg); } }; template <typename T, std::size_t N> struct offset32_array<std::array<T, N>> { template <typename Arg> static inline u32 index32(const Arg& arg) { return SIZE_32(T) * static_cast<u32>(arg); } }; template <typename Arg> struct offset32_detail; template <typename T, typename T2, typename Arg, typename... Args> inline u32 offset32(T T2::*const mptr, const Arg& arg, const Args&... args) { return offset32_detail<Arg>::offset32(mptr, arg, args...); } template <typename Arg> struct offset32_detail { template <typename T, typename T2, typename... Args> static inline u32 offset32(T T2::*const mptr, const Arg& arg, const Args&... args) { return ::offset32(mptr, args...) + offset32_array<T>::index32(arg); } }; template <typename T3, typename T4> struct offset32_detail<T3 T4::*> { template <typename T, typename T2, typename... Args> static inline u32 offset32(T T2::*const mptr, T3 T4::*const mptr2, const Args&... args) { return ::offset32(mptr) + ::offset32(mptr2, args...); } }; inline u32 cntlz32(u32 arg, bool nonzero = false) { #ifdef _MSC_VER ulong res; return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32; #else return arg || nonzero ? __builtin_clzll(arg) - 32 : 32; #endif } inline u64 cntlz64(u64 arg, bool nonzero = false) { #ifdef _MSC_VER ulong res; return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64; #else return arg || nonzero ? __builtin_clzll(arg) : 64; #endif } inline u32 cnttz32(u32 arg, bool nonzero = false) { #ifdef _MSC_VER ulong res; return _BitScanForward(&res, arg) || nonzero ? res : 32; #else return arg || nonzero ? __builtin_ctzll(arg) : 32; #endif } inline u64 cnttz64(u64 arg, bool nonzero = false) { #ifdef _MSC_VER ulong res; return _BitScanForward64(&res, arg) || nonzero ? res : 64; #else return arg || nonzero ? __builtin_ctzll(arg) : 64; #endif } // Helper function, used by ""_u16, ""_u32, ""_u64 constexpr u8 to_u8(char c) { return static_cast<u8>(c); } // Convert 2-byte string to u16 value like reinterpret_cast does constexpr u16 operator""_u16(const char* s, std::size_t length) { return length != 2 ? throw s : #if IS_LE_MACHINE == 1 to_u8(s[1]) << 8 | to_u8(s[0]); #endif } // Convert 4-byte string to u32 value like reinterpret_cast does constexpr u32 operator""_u32(const char* s, std::size_t length) { return length != 4 ? throw s : #if IS_LE_MACHINE == 1 to_u8(s[3]) << 24 | to_u8(s[2]) << 16 | to_u8(s[1]) << 8 | to_u8(s[0]); #endif } // Convert 8-byte string to u64 value like reinterpret_cast does constexpr u64 operator""_u64(const char* s, std::size_t length) { return length != 8 ? throw s : #if IS_LE_MACHINE == 1 static_cast<u64>(to_u8(s[7]) << 24 | to_u8(s[6]) << 16 | to_u8(s[5]) << 8 | to_u8(s[4])) << 32 | to_u8(s[3]) << 24 | to_u8(s[2]) << 16 | to_u8(s[1]) << 8 | to_u8(s[0]); #endif } namespace fmt { [[noreturn]] void raw_error(const char* msg); [[noreturn]] void raw_verify_error(const char* msg, const fmt_type_info* sup, u64 arg); [[noreturn]] void raw_narrow_error(const char* msg, const fmt_type_info* sup, u64 arg); } struct verify_func { template <typename T> bool operator()(T&& value) const { if (std::forward<T>(value)) { return true; } return false; } }; template <uint N> struct verify_impl { const char* cause; template <typename T> auto operator,(T&& value) const { // Verification (can be safely disabled) if (!verify_func()(std::forward<T>(value))) { fmt::raw_verify_error(cause, nullptr, N); } return verify_impl<N + 1>{cause}; } }; // Verification helper, checks several conditions delimited with comma operator inline auto verify(const char* cause) { return verify_impl<0>{cause}; } // Verification helper (returns value or lvalue reference, may require to use verify_move instead) template <typename F = verify_func, typename T> inline T verify(const char* cause, T&& value, F&& pred = F()) { if (!pred(std::forward<T>(value))) { using unref = std::remove_const_t<std::remove_reference_t<T>>; fmt::raw_verify_error(cause, fmt::get_type_info<fmt_unveil_t<unref>>(), fmt_unveil<unref>::get(value)); } return std::forward<T>(value); } // Verification helper (must be used in return expression or in place of std::move) template <typename F = verify_func, typename T> inline std::remove_reference_t<T>&& verify_move(const char* cause, T&& value, F&& pred = F()) { if (!pred(std::forward<T>(value))) { using unref = std::remove_const_t<std::remove_reference_t<T>>; fmt::raw_verify_error(cause, fmt::get_type_info<fmt_unveil_t<unref>>(), fmt_unveil<unref>::get(value)); } return std::move(value); } // narrow() function details template <typename From, typename To = void, typename = void> struct narrow_impl { // Temporarily (diagnostic) static_assert(std::is_void<To>::value, "narrow_impl<> specialization not found"); // Returns true if value cannot be represented in type To static constexpr bool test(const From& value) { // Unspecialized cases (including cast to void) always considered narrowing return true; } }; // Unsigned to unsigned narrowing template <typename From, typename To> struct narrow_impl<From, To, std::enable_if_t<std::is_unsigned<From>::value && std::is_unsigned<To>::value>> { static constexpr bool test(const From& value) { return sizeof(To) < sizeof(From) && static_cast<To>(value) != value; } }; // Signed to signed narrowing template <typename From, typename To> struct narrow_impl<From, To, std::enable_if_t<std::is_signed<From>::value && std::is_signed<To>::value>> { static constexpr bool test(const From& value) { return sizeof(To) < sizeof(From) && static_cast<To>(value) != value; } }; // Unsigned to signed narrowing template <typename From, typename To> struct narrow_impl<From, To, std::enable_if_t<std::is_unsigned<From>::value && std::is_signed<To>::value>> { static constexpr bool test(const From& value) { return sizeof(To) <= sizeof(From) && value > (static_cast<std::make_unsigned_t<To>>(-1) >> 1); } }; // Signed to unsigned narrowing (I) template <typename From, typename To> struct narrow_impl<From, To, std::enable_if_t<std::is_signed<From>::value && std::is_unsigned<To>::value && sizeof(To) >= sizeof(From)>> { static constexpr bool test(const From& value) { return value < static_cast<From>(0); } }; // Signed to unsigned narrowing (II) template <typename From, typename To> struct narrow_impl<From, To, std::enable_if_t<std::is_signed<From>::value && std::is_unsigned<To>::value && sizeof(To) < sizeof(From)>> { static constexpr bool test(const From& value) { return static_cast<std::make_unsigned_t<From>>(value) > static_cast<To>(-1); } }; // Simple type enabled (TODO: allow for To as well) template <typename From, typename To> struct narrow_impl<From, To, void_t<typename From::simple_type>> : narrow_impl<simple_t<From>, To> { }; template <typename To = void, typename From, typename = decltype(static_cast<To>(std::declval<From>()))> inline To narrow(const From& value, const char* msg = nullptr) { // Narrow check if (narrow_impl<From, To>::test(value)) { // Pack value as formatting argument fmt::raw_narrow_error(msg, fmt::get_type_info<fmt_unveil_t<From>>(), fmt_unveil<From>::get(value)); } return static_cast<To>(value); } // Returns u32 size() for container template <typename CT, typename = decltype(static_cast<u32>(std::declval<CT>().size()))> inline u32 size32(const CT& container, const char* msg = nullptr) { return narrow<u32>(container.size(), msg); } // Returns u32 size for an array template <typename T, std::size_t Size> constexpr u32 size32(const T (&)[Size], const char* msg = nullptr) { return static_cast<u32>(Size); } template <typename T1, typename = std::enable_if_t<std::is_integral<T1>::value>> constexpr bool test(const T1& value) { return value != 0; } template <typename T1, typename T2, typename = std::enable_if_t<std::is_integral<T1>::value && std::is_integral<T2>::value>> constexpr bool test(const T1& lhs, const T2& rhs) { return (lhs & rhs) != 0; } template <typename T, typename T2, typename = std::enable_if_t<std::is_integral<T>::value && std::is_integral<T2>::value>> inline bool test_and_set(T& lhs, const T2& rhs) { const bool result = (lhs & rhs) != 0; lhs |= rhs; return result; } template <typename T, typename T2, typename = std::enable_if_t<std::is_integral<T>::value && std::is_integral<T2>::value>> inline bool test_and_reset(T& lhs, const T2& rhs) { const bool result = (lhs & rhs) != 0; lhs &= ~rhs; return result; } template <typename T, typename T2, typename = std::enable_if_t<std::is_integral<T>::value && std::is_integral<T2>::value>> inline bool test_and_complement(T& lhs, const T2& rhs) { const bool result = (lhs & rhs) != 0; lhs ^= rhs; return result; } // Simplified hash algorithm for pointers. May be used in std::unordered_(map|set). template <typename T, std::size_t Align = alignof(T)> struct pointer_hash { std::size_t operator()(T* ptr) const { return reinterpret_cast<std::uintptr_t>(ptr) / Align; } }; template <typename T, std::size_t Shift = 0> struct value_hash { std::size_t operator()(T value) const { return static_cast<std::size_t>(value) >> Shift; } }; // Contains value of any POD type with fixed size and alignment. TT<> is the type converter applied. // For example, `simple_t` may be used to remove endianness. template <template <typename> class TT, std::size_t S, std::size_t A = S> struct alignas(A) any_pod { std::aligned_storage_t<S, A> data; any_pod() = default; template <typename T, typename T2 = TT<T>, typename = std::enable_if_t<std::is_pod<T2>::value && sizeof(T2) == S && alignof(T2) <= A>> any_pod(const T& value) { reinterpret_cast<T2&>(data) = value; } template <typename T, typename T2 = TT<T>, typename = std::enable_if_t<std::is_pod<T2>::value && sizeof(T2) == S && alignof(T2) <= A>> T2& as() { return reinterpret_cast<T2&>(data); } template <typename T, typename T2 = TT<T>, typename = std::enable_if_t<std::is_pod<T2>::value && sizeof(T2) == S && alignof(T2) <= A>> const T2& as() const { return reinterpret_cast<const T2&>(data); } }; using any16 = any_pod<simple_t, sizeof(u16)>; using any32 = any_pod<simple_t, sizeof(u32)>; using any64 = any_pod<simple_t, sizeof(u64)>; struct cmd64 : any64 { struct pair_t { any32 arg1; any32 arg2; }; cmd64() = default; template <typename T> cmd64(const T& value) : any64(value) { } template <typename T1, typename T2> cmd64(const T1& arg1, const T2& arg2) : any64(pair_t{arg1, arg2}) { } explicit operator bool() const { return as<u64>() != 0; } // TODO: compatibility with std::pair/std::tuple? template <typename T> decltype(auto) arg1() { return as<pair_t>().arg1.as<T>(); } template <typename T> decltype(auto) arg1() const { return as<const pair_t>().arg1.as<const T>(); } template <typename T> decltype(auto) arg2() { return as<pair_t>().arg2.as<T>(); } template <typename T> decltype(auto) arg2() const { return as<const pair_t>().arg2.as<const T>(); } }; static_assert(sizeof(cmd64) == 8 && std::is_pod<cmd64>::value, "Incorrect cmd64 type"); // Allows to define integer convertible to multiple types template <typename T, T Value, typename T1 = void, typename... Ts> struct multicast : multicast<T, Value, Ts...> { constexpr multicast() : multicast<T, Value, Ts...>() { } // Implicit conversion to desired type constexpr operator T1() const { return static_cast<T1>(Value); } }; // Recursion terminator template <typename T, T Value> struct multicast<T, Value, void> { constexpr multicast() = default; // Explicit conversion to base type explicit constexpr operator T() const { return Value; } }; // Error code type (return type), implements error reporting. Could be a template. struct error_code { // Use fixed s32 type for now s32 value; error_code() = default; // Implementation must be provided specially static s32 error_report(const fmt_type_info* sup, u64 arg, const fmt_type_info* sup2, u64 arg2); // Helper type enum class not_an_error : s32 { __not_an_error // SFINAE marker }; // __not_an_error tester template<typename ET, typename = void> struct is_error : std::integral_constant<bool, std::is_enum<ET>::value || std::is_integral<ET>::value> { }; template<typename ET> struct is_error<ET, void_t<decltype(ET::__not_an_error)>> : std::false_type { }; // Not an error constructor template<typename ET, typename = decltype(ET::__not_an_error)> error_code(const ET& value, std::nullptr_t = nullptr) : value(static_cast<s32>(value)) { } // Error constructor template<typename ET, typename = std::enable_if_t<is_error<ET>::value>> error_code(const ET& value) : value(error_report(fmt::get_type_info<fmt_unveil_t<ET>>(), fmt_unveil<ET>::get(value), nullptr, 0)) { } // Error constructor (2 args) template<typename ET, typename T2, typename = std::enable_if_t<is_error<ET>::value>> error_code(const ET& value, const T2& value2) : value(error_report(fmt::get_type_info<fmt_unveil_t<ET>>(), fmt_unveil<ET>::get(value), fmt::get_type_info<fmt_unveil_t<T2>>(), fmt_unveil<T2>::get(value2))) { } operator s32() const { return value; } }; // Helper function for error_code template <typename T> constexpr FORCE_INLINE error_code::not_an_error not_an_error(const T& value) { return static_cast<error_code::not_an_error>(static_cast<s32>(value)); } // Synchronization helper (cache-friendly busy waiting) inline void busy_wait(std::size_t cycles = 3000) { const u64 s = __rdtsc(); do _mm_pause(); while (__rdtsc() - s < cycles); } // Rotate helpers #if defined(__GNUG__) inline u8 rol8(u8 x, u8 n) { u8 result = x; __asm__("rolb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u8 ror8(u8 x, u8 n) { u8 result = x; __asm__("rorb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u16 rol16(u16 x, u16 n) { u16 result = x; __asm__("rolw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u16 ror16(u16 x, u16 n) { u16 result = x; __asm__("rorw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u32 rol32(u32 x, u32 n) { u32 result = x; __asm__("roll %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u32 ror32(u32 x, u32 n) { u32 result = x; __asm__("rorl %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u64 rol64(u64 x, u64 n) { u64 result = x; __asm__("rolq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u64 ror64(u64 x, u64 n) { u64 result = x; __asm__("rorq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); return result; } inline u64 umulh64(u64 a, u64 b) { u64 result; __asm__("mulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b)); return result; } inline s64 mulh64(s64 a, s64 b) { s64 result; __asm__("imulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b)); return result; } #elif defined(_MSC_VER) inline u8 rol8(u8 x, u8 n) { return _rotl8(x, n); } inline u8 ror8(u8 x, u8 n) { return _rotr8(x, n); } inline u16 rol16(u16 x, u16 n) { return _rotl16(x, (u8)n); } inline u16 ror16(u16 x, u16 n) { return _rotr16(x, (u8)n); } inline u32 rol32(u32 x, u32 n) { return _rotl(x, (int)n); } inline u32 ror32(u32 x, u32 n) { return _rotr(x, (int)n); } inline u64 rol64(u64 x, u64 n) { return _rotl64(x, (int)n); } inline u64 ror64(u64 x, u64 n) { return _rotr64(x, (int)n); } inline u64 umulh64(u64 x, u64 y) { return __umulh(x, y); } inline s64 mulh64(s64 x, s64 y) { return __mulh(x, y); } #endif