Replace uint128_wrapper with uint128_fallback

This commit is contained in:
Victor Zverovich 2022-04-02 10:03:29 -07:00
parent b4dc7a1d34
commit 02eb215f2f
3 changed files with 50 additions and 52 deletions

View File

@ -390,7 +390,7 @@ template <typename T> struct std_string_view {};
#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && \
!(FMT_CLANG_VERSION && FMT_MSC_VER)
# define FMT_USE_INT128 1
using int128_opt = __int128_t; // An optional 128-bit integer.
using int128_opt = __int128_t; // An optional native 128-bit integer.
using uint128_opt = __uint128_t;
template <typename T> inline auto convert_for_visit(T value) -> T {
return value;

View File

@ -733,39 +733,31 @@ FMT_INLINE FMT_CONSTEXPR20 digits::result grisu_gen_digits(
}
}
// A 128-bit integer type used internally.
struct uint128_wrapper {
uint128_wrapper() = default;
uint64_t high_;
uint64_t low_;
constexpr uint128_wrapper(uint64_t high, uint64_t low) noexcept
: high_{high}, low_{low} {}
constexpr uint64_t high() const noexcept { return high_; }
constexpr uint64_t low() const noexcept { return low_; }
uint128_wrapper& operator+=(uint64_t n) noexcept {
#if FMT_HAS_BUILTIN(__builtin_addcll)
unsigned long long carry;
low_ = __builtin_addcll(low_, n, 0, &carry);
high_ += carry;
#elif FMT_HAS_BUILTIN(__builtin_ia32_addcarryx_u64)
unsigned long long result;
auto carry = __builtin_ia32_addcarryx_u64(0, low_, n, &result);
low_ = result;
high_ += carry;
#elif defined(_MSC_VER) && defined(_M_X64)
auto carry = _addcarry_u64(0, low_, n, &low_);
_addcarry_u64(carry, high_, 0, &high_);
#else
low_ += n;
high_ += (low_ < n ? 1 : 0);
#endif
inline FMT_CONSTEXPR20 uint128_fallback& uint128_fallback::operator+=(
uint64_t n) noexcept {
if (is_constant_evaluated()) {
lo_ += n;
hi_ += (lo_ < n ? 1 : 0);
return *this;
}
};
#if FMT_HAS_BUILTIN(__builtin_addcll)
unsigned long long carry;
lo_ = __builtin_addcll(lo_, n, 0, &carry);
hi_ += carry;
#elif FMT_HAS_BUILTIN(__builtin_ia32_addcarryx_u64)
unsigned long long result;
auto carry = __builtin_ia32_addcarryx_u64(0, lo_, n, &result);
lo_ = result;
hi_ += carry;
#elif defined(_MSC_VER) && defined(_M_X64)
auto carry = _addcarry_u64(0, lo_, n, &lo_);
_addcarry_u64(carry, hi_, 0, &hi_);
#else
lo_ += n;
hi_ += (lo_ < n ? 1 : 0);
#endif
return *this;
}
// Compilers should be able to optimize this into the ror instruction.
FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept {
@ -777,16 +769,14 @@ FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept {
return (n >> r) | (n << (64 - r));
}
// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
namespace dragonbox {
// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
inline uint128_wrapper umul128(uint64_t x, uint64_t y) noexcept {
inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
#if FMT_USE_INT128
auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
#elif defined(_MSC_VER) && defined(_M_X64)
uint128_wrapper result;
result.low_ = _umul128(x, y, &result.high_);
auto result = uint128_fallback();
result.lo_ = _umul128(x, y, &result.hi_);
return result;
#else
const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
@ -808,6 +798,8 @@ inline uint128_wrapper umul128(uint64_t x, uint64_t y) noexcept {
#endif
}
// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
namespace dragonbox {
// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
#if FMT_USE_INT128
@ -822,9 +814,9 @@ inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
// 128-bit unsigned integer.
inline uint128_wrapper umul192_upper128(uint64_t x,
uint128_wrapper y) noexcept {
uint128_wrapper r = umul128(x, y.high());
inline uint128_fallback umul192_upper128(uint64_t x,
uint128_fallback y) noexcept {
uint128_fallback r = umul128(x, y.high());
r += umul128_upper64(x, y.low());
return r;
}
@ -837,10 +829,10 @@ inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept {
// Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a
// 128-bit unsigned integer.
inline uint128_wrapper umul192_lower128(uint64_t x,
uint128_wrapper y) noexcept {
inline uint128_fallback umul192_lower128(uint64_t x,
uint128_fallback y) noexcept {
uint64_t high = x * y.high();
uint128_wrapper high_low = umul128(x, y.low());
uint128_fallback high_low = umul128(x, y.low());
return {high + high_low.high(), high_low.low()};
}
@ -1017,13 +1009,13 @@ template <> struct cache_accessor<float> {
template <> struct cache_accessor<double> {
using carrier_uint = float_info<double>::carrier_uint;
using cache_entry_type = uint128_wrapper;
using cache_entry_type = uint128_fallback;
static uint128_wrapper get_cached_power(int k) noexcept {
static uint128_fallback get_cached_power(int k) noexcept {
FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
"k is out of range");
static constexpr const uint128_wrapper pow10_significands[] = {
static constexpr const uint128_fallback pow10_significands[] = {
#if FMT_USE_FULL_CACHE_DRAGONBOX
{0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
{0x9faacf3df73609b1, 0x77b191618c54e9ad},
@ -1695,7 +1687,7 @@ template <> struct cache_accessor<double> {
int offset = k - kb;
// Get base cache.
uint128_wrapper base_cache = pow10_significands[cache_index];
uint128_fallback base_cache = pow10_significands[cache_index];
if (offset == 0) return base_cache;
// Compute the required amount of bit-shift.
@ -1704,8 +1696,8 @@ template <> struct cache_accessor<double> {
// Try to recover the real cache.
uint64_t pow5 = powers_of_5_64[offset];
uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5);
uint128_wrapper middle_low = umul128(base_cache.low(), pow5);
uint128_fallback recovered_cache = umul128(base_cache.high(), pow5);
uint128_fallback middle_low = umul128(base_cache.low(), pow5);
recovered_cache += middle_low.high();
@ -1713,8 +1705,8 @@ template <> struct cache_accessor<double> {
uint64_t middle_to_low = recovered_cache.low() << (64 - alpha);
recovered_cache =
uint128_wrapper{(recovered_cache.low() >> alpha) | high_to_middle,
((middle_low.low() >> alpha) | middle_to_low)};
uint128_fallback{(recovered_cache.low() >> alpha) | high_to_middle,
((middle_low.low() >> alpha) | middle_to_low)};
FMT_ASSERT(recovered_cache.low() + 1 != 0, "");
return {recovered_cache.high(), recovered_cache.low() + 1};
#endif

View File

@ -321,11 +321,16 @@ inline auto is_big_endian() -> bool {
class uint128_fallback {
private:
uint64_t lo_, hi_;
constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept;
public:
constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
constexpr uint64_t high() const noexcept { return hi_; }
constexpr uint64_t low() const noexcept { return lo_; }
template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
constexpr explicit operator T() const {
return static_cast<T>(lo_);
@ -384,6 +389,7 @@ class uint128_fallback {
lo_ = new_lo;
hi_ = new_hi;
}
FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept;
};
using uint128_t = conditional_t<FMT_USE_INT128, uint128_opt, uint128_fallback>;