#ifndef BETYPE_H_GUARD #define BETYPE_H_GUARD #include "types.h" #include "util/endian.hpp" #include #include #if __has_include() #include #else #include #endif // 128-bit vector type and also se_storage<> storage type union alignas(16) v128 { uchar _bytes[16]; char _chars[16]; template struct masked_array_t // array type accessed as (index ^ M) { char m_data[16]; public: T& operator[](std::size_t index) { return reinterpret_cast(m_data)[index ^ M]; } const T& operator[](std::size_t index) const { return reinterpret_cast(m_data)[index ^ M]; } }; template using normal_array_t = masked_array_t; template using reversed_array_t = masked_array_t; normal_array_t _u64; normal_array_t _s64; reversed_array_t u64r; reversed_array_t s64r; normal_array_t _u32; normal_array_t _s32; reversed_array_t u32r; reversed_array_t s32r; normal_array_t _u16; normal_array_t _s16; reversed_array_t u16r; reversed_array_t s16r; normal_array_t _u8; normal_array_t _s8; reversed_array_t u8r; reversed_array_t s8r; normal_array_t _f; normal_array_t _d; reversed_array_t fr; reversed_array_t dr; __m128 vf; __m128i vi; __m128d vd; struct bit_array_128 { char m_data[16]; public: class bit_element { u64& data; const u64 mask; public: bit_element(u64& data, const u64 mask) : data(data) , mask(mask) { } operator bool() const { return (data & mask) != 0; } bit_element& operator=(const bool right) { if (right) { data |= mask; } else { data &= ~mask; } return *this; } bit_element& operator=(const bit_element& right) { if (right) { data |= mask; } else { data &= ~mask; } return *this; } }; // Index 0 returns the MSB and index 127 returns the LSB bit_element operator[](u32 index) { const auto data_ptr = reinterpret_cast(m_data); if constexpr (std::endian::little == std::endian::native) { return bit_element(data_ptr[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F)); } else { return bit_element(data_ptr[index >> 6], 0x8000000000000000ull >> (index & 0x3F)); } } // Index 0 returns the MSB and index 127 returns the LSB bool operator[](u32 index) const { const auto data_ptr = reinterpret_cast(m_data); if constexpr (std::endian::little == std::endian::native) { return (data_ptr[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0; } else { return (data_ptr[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0; } } } _bit; static v128 from64(u64 _0, u64 _1 = 0) { v128 ret; ret._u64[0] = _0; ret._u64[1] = _1; return ret; } static v128 from64r(u64 _1, u64 _0 = 0) { return from64(_0, _1); } static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0) { v128 ret; ret._u32[0] = _0; ret._u32[1] = _1; ret._u32[2] = _2; ret._u32[3] = _3; return ret; } static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0) { return from32(_0, _1, _2, _3); } static v128 from32p(u32 value) { v128 ret; ret.vi = _mm_set1_epi32(static_cast(value)); return ret; } static v128 from16p(u16 value) { v128 ret; ret.vi = _mm_set1_epi16(static_cast(value)); return ret; } static v128 from8p(u8 value) { v128 ret; ret.vi = _mm_set1_epi8(static_cast(value)); return ret; } static v128 fromBit(u32 bit) { v128 ret = {}; ret._bit[bit] = true; return ret; } static v128 fromV(__m128i value) { v128 ret; ret.vi = value; return ret; } static v128 fromF(__m128 value) { v128 ret; ret.vf = value; return ret; } static v128 fromD(__m128d value) { v128 ret; ret.vd = value; return ret; } // Unaligned load with optional index offset static v128 loadu(const void* ptr, std::size_t index = 0) { v128 ret; std::memcpy(&ret, static_cast(ptr) + index * sizeof(v128), sizeof(v128)); return ret; } // Unaligned store with optional index offset static void storeu(v128 value, void* ptr, std::size_t index = 0) { std::memcpy(static_cast(ptr) + index * sizeof(v128), &value, sizeof(v128)); } static inline v128 add8(const v128& left, const v128& right) { return fromV(_mm_add_epi8(left.vi, right.vi)); } static inline v128 add16(const v128& left, const v128& right) { return fromV(_mm_add_epi16(left.vi, right.vi)); } static inline v128 add32(const v128& left, const v128& right) { return fromV(_mm_add_epi32(left.vi, right.vi)); } static inline v128 addfs(const v128& left, const v128& right) { return fromF(_mm_add_ps(left.vf, right.vf)); } static inline v128 addfd(const v128& left, const v128& right) { return fromD(_mm_add_pd(left.vd, right.vd)); } static inline v128 sub8(const v128& left, const v128& right) { return fromV(_mm_sub_epi8(left.vi, right.vi)); } static inline v128 sub16(const v128& left, const v128& right) { return fromV(_mm_sub_epi16(left.vi, right.vi)); } static inline v128 sub32(const v128& left, const v128& right) { return fromV(_mm_sub_epi32(left.vi, right.vi)); } static inline v128 subfs(const v128& left, const v128& right) { return fromF(_mm_sub_ps(left.vf, right.vf)); } static inline v128 subfd(const v128& left, const v128& right) { return fromD(_mm_sub_pd(left.vd, right.vd)); } static inline v128 maxu8(const v128& left, const v128& right) { return fromV(_mm_max_epu8(left.vi, right.vi)); } static inline v128 minu8(const v128& left, const v128& right) { return fromV(_mm_min_epu8(left.vi, right.vi)); } static inline v128 eq8(const v128& left, const v128& right) { return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); } static inline v128 eq16(const v128& left, const v128& right) { return fromV(_mm_cmpeq_epi16(left.vi, right.vi)); } static inline v128 eq32(const v128& left, const v128& right) { return fromV(_mm_cmpeq_epi32(left.vi, right.vi)); } static inline v128 eq32f(const v128& left, const v128& right) { return fromF(_mm_cmpeq_ps(left.vf, right.vf)); } static inline v128 eq64f(const v128& left, const v128& right) { return fromD(_mm_cmpeq_pd(left.vd, right.vd)); } static inline bool use_fma = false; static inline v128 fma32f(v128 a, const v128& b, const v128& c) { #ifndef __FMA__ if (use_fma) [[likely]] { #ifdef _MSC_VER a.vf = _mm_fmadd_ps(a.vf, b.vf, c.vf); return a; #else __asm__("vfmadd213ps %[c], %[b], %[a]" : [a] "+x" (a.vf) : [b] "x" (b.vf) , [c] "x" (c.vf)); return a; #endif } for (int i = 0; i < 4; i++) { a._f[i] = std::fmaf(a._f[i], b._f[i], c._f[i]); } return a; #else a.vf = _mm_fmadd_ps(a.vf, b.vf, c.vf); return a; #endif } bool operator==(const v128& right) const { return _mm_movemask_epi8(v128::eq32(*this, right).vi) == 0xffff; } bool operator!=(const v128& right) const { return !operator==(right); } // result = (~left) & (right) static inline v128 andnot(const v128& left, const v128& right) { return fromV(_mm_andnot_si128(left.vi, right.vi)); } void clear() { *this = {}; } }; template struct offset32_array> { template static inline u32 index32(const Arg& arg) { return u32{sizeof(T)} * (static_cast(arg) ^ static_cast(M)); } }; inline v128 operator|(const v128& left, const v128& right) { return v128::fromV(_mm_or_si128(left.vi, right.vi)); } inline v128 operator&(const v128& left, const v128& right) { return v128::fromV(_mm_and_si128(left.vi, right.vi)); } inline v128 operator^(const v128& left, const v128& right) { return v128::fromV(_mm_xor_si128(left.vi, right.vi)); } inline v128 operator~(const v128& other) { return other ^ v128::from32p(UINT32_MAX); // XOR with ones } using stx::se_t; using stx::se_storage; // se_t<> with native endianness template using nse_t = se_t; template using be_t = se_t; template using le_t = se_t; // Type converter: converts native endianness arithmetic/enum types to appropriate se_t<> type template struct to_se { template struct to_se_ { using type = T2; }; template struct to_se_::value || std::is_enum::value>> { using type = std::conditional_t<(sizeof(T2) > 1), se_t, T2>; }; // Convert arithmetic and enum types using type = typename to_se_::type; }; template struct to_se { using type = se_t; }; template struct to_se { using type = se_t; }; template struct to_se { using type = se_t; }; template struct to_se::value>> { // Move const qualifier using type = const typename to_se::type; }; template struct to_se::value && !std::is_const::value>> { // Move volatile qualifier using type = volatile typename to_se::type; }; template struct to_se { // Move array qualifier using type = typename to_se::type[]; }; template struct to_se { // Move array qualifier using type = typename to_se::type[N]; }; // BE/LE aliases for to_se<> template using to_be_t = typename to_se::type; template using to_le_t = typename to_se::type; // BE/LE aliases for atomic_t template using atomic_be_t = atomic_t, Align>; template using atomic_le_t = atomic_t, Align>; template struct fmt_unveil, void> { using type = typename fmt_unveil::type; static inline auto get(const se_t& arg) { return fmt_unveil::get(arg); } }; static_assert(be_t(1) + be_t(2) + be_t(3) == 6); static_assert(le_t(1) + le_t(2) + le_t(3) == 6); #endif // BETYPE_H_GUARD