mirror of
https://github.com/fmtlib/fmt.git
synced 2025-01-26 12:35:32 +00:00
Implement a new formatting algorithm for small given precision (#3269)
Implement the formatting algorithm for small given precision discussed in https://github.com/fmtlib/fmt/issues/3262 and https://github.com/fmtlib/fmt/pull/2750
This commit is contained in:
parent
bfc0924eac
commit
0f42c17d85
@ -174,58 +174,10 @@ FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept {
|
|||||||
return (n >> r) | (n << (64 - r));
|
return (n >> r) | (n << (64 - r));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
|
|
||||||
inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
|
|
||||||
#if FMT_USE_INT128
|
|
||||||
auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
|
|
||||||
return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
|
|
||||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
|
||||||
auto result = uint128_fallback();
|
|
||||||
result.lo_ = _umul128(x, y, &result.hi_);
|
|
||||||
return result;
|
|
||||||
#else
|
|
||||||
const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
|
|
||||||
|
|
||||||
uint64_t a = x >> 32;
|
|
||||||
uint64_t b = x & mask;
|
|
||||||
uint64_t c = y >> 32;
|
|
||||||
uint64_t d = y & mask;
|
|
||||||
|
|
||||||
uint64_t ac = a * c;
|
|
||||||
uint64_t bc = b * c;
|
|
||||||
uint64_t ad = a * d;
|
|
||||||
uint64_t bd = b * d;
|
|
||||||
|
|
||||||
uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
|
|
||||||
|
|
||||||
return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
|
|
||||||
(intermediate << 32) + (bd & mask)};
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
|
// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
|
||||||
namespace dragonbox {
|
namespace dragonbox {
|
||||||
// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
|
|
||||||
inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
|
|
||||||
#if FMT_USE_INT128
|
|
||||||
auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
|
|
||||||
return static_cast<uint64_t>(p >> 64);
|
|
||||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
|
||||||
return __umulh(x, y);
|
|
||||||
#else
|
|
||||||
return umul128(x, y).high();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
|
|
||||||
// 128-bit unsigned integer.
|
|
||||||
inline uint128_fallback umul192_upper128(uint64_t x,
|
|
||||||
uint128_fallback y) noexcept {
|
|
||||||
uint128_fallback r = umul128(x, y.high());
|
|
||||||
r += umul128_upper64(x, y.low());
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a
|
// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a
|
||||||
// 64-bit unsigned integer.
|
// 64-bit unsigned integer.
|
||||||
inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept {
|
inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept {
|
||||||
@ -247,19 +199,7 @@ inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept {
|
|||||||
return x * y;
|
return x * y;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from
|
|
||||||
// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1.
|
|
||||||
inline int floor_log10_pow2(int e) noexcept {
|
|
||||||
FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
|
|
||||||
static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
|
|
||||||
return (e * 315653) >> 20;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Various fast log computations.
|
// Various fast log computations.
|
||||||
inline int floor_log2_pow10(int e) noexcept {
|
|
||||||
FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
|
|
||||||
return (e * 1741647) >> 19;
|
|
||||||
}
|
|
||||||
inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
|
inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
|
||||||
FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
|
FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
|
||||||
return (e * 631305 - 261663) >> 21;
|
return (e * 631305 - 261663) >> 21;
|
||||||
@ -1040,8 +980,23 @@ template <> struct cache_accessor<double> {
|
|||||||
{0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
|
{0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
|
||||||
{0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2},
|
{0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2},
|
||||||
{0xc5a05277621be293, 0xc7098b7305241886},
|
{0xc5a05277621be293, 0xc7098b7305241886},
|
||||||
{ 0xf70867153aa2db38,
|
{0xf70867153aa2db38, 0xb8cbee4fc66d1ea8},
|
||||||
0xb8cbee4fc66d1ea8 }
|
{0x9a65406d44a5c903, 0x737f74f1dc043329},
|
||||||
|
{0xc0fe908895cf3b44, 0x505f522e53053ff3},
|
||||||
|
{0xf13e34aabb430a15, 0x647726b9e7c68ff0},
|
||||||
|
{0x96c6e0eab509e64d, 0x5eca783430dc19f6},
|
||||||
|
{0xbc789925624c5fe0, 0xb67d16413d132073},
|
||||||
|
{0xeb96bf6ebadf77d8, 0xe41c5bd18c57e890},
|
||||||
|
{0x933e37a534cbaae7, 0x8e91b962f7b6f15a},
|
||||||
|
{0xb80dc58e81fe95a1, 0x723627bbb5a4adb1},
|
||||||
|
{0xe61136f2227e3b09, 0xcec3b1aaa30dd91d},
|
||||||
|
{0x8fcac257558ee4e6, 0x213a4f0aa5e8a7b2},
|
||||||
|
{0xb3bd72ed2af29e1f, 0xa988e2cd4f62d19e},
|
||||||
|
{0xe0accfa875af45a7, 0x93eb1b80a33b8606},
|
||||||
|
{0x8c6c01c9498d8b88, 0xbc72f130660533c4},
|
||||||
|
{0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5},
|
||||||
|
{ 0xdb68c2ca82ed2a05,
|
||||||
|
0xa67398db9f6820e2 }
|
||||||
#else
|
#else
|
||||||
{0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
|
{0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
|
||||||
{0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
|
{0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
|
||||||
@ -1065,8 +1020,9 @@ template <> struct cache_accessor<double> {
|
|||||||
{0x8da471a9de737e24, 0x5ceaecfed289e5d3},
|
{0x8da471a9de737e24, 0x5ceaecfed289e5d3},
|
||||||
{0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
|
{0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
|
||||||
{0xb8da1662e7b00a17, 0x3d6a751f3b936244},
|
{0xb8da1662e7b00a17, 0x3d6a751f3b936244},
|
||||||
{ 0x95527a5202df0ccb,
|
{0x95527a5202df0ccb, 0x0f37801e0c43ebc9},
|
||||||
0x0f37801e0c43ebc9 }
|
{ 0xf13e34aabb430a15,
|
||||||
|
0x647726b9e7c68ff0 }
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1169,6 +1125,10 @@ template <> struct cache_accessor<double> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
FMT_FUNC uint128_fallback get_cached_power(int k) noexcept {
|
||||||
|
return cache_accessor<double>::get_cached_power(k);
|
||||||
|
}
|
||||||
|
|
||||||
// Various integer checks
|
// Various integer checks
|
||||||
template <class T>
|
template <class T>
|
||||||
bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
|
bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
|
||||||
|
@ -495,6 +495,16 @@ FMT_CONSTEXPR20 inline auto countl_zero(uint32_t n) -> int {
|
|||||||
return lz;
|
return lz;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FMT_CONSTEXPR20 inline auto countl_zero(uint64_t n) -> int {
|
||||||
|
#ifdef FMT_BUILTIN_CLZLL
|
||||||
|
if (!is_constant_evaluated()) return FMT_BUILTIN_CLZLL(n);
|
||||||
|
#endif
|
||||||
|
int lz = 0;
|
||||||
|
constexpr uint64_t msb_mask = 1ull << (num_bits<uint64_t>() - 1);
|
||||||
|
for (; (n & msb_mask) == 0; n <<= 1) lz++;
|
||||||
|
return lz;
|
||||||
|
}
|
||||||
|
|
||||||
FMT_INLINE void assume(bool condition) {
|
FMT_INLINE void assume(bool condition) {
|
||||||
(void)condition;
|
(void)condition;
|
||||||
#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
|
#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
|
||||||
@ -1204,7 +1214,7 @@ FMT_CONSTEXPR auto count_digits(UInt n) -> int {
|
|||||||
FMT_INLINE auto do_count_digits(uint32_t n) -> int {
|
FMT_INLINE auto do_count_digits(uint32_t n) -> int {
|
||||||
// An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
|
// An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
|
||||||
// This increments the upper 32 bits (log10(T) - 1) when >= T is added.
|
// This increments the upper 32 bits (log10(T) - 1) when >= T is added.
|
||||||
# define FMT_INC(T) (((sizeof(# T) - 1ull) << 32) - T)
|
# define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T)
|
||||||
static constexpr uint64_t table[] = {
|
static constexpr uint64_t table[] = {
|
||||||
FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8
|
FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8
|
||||||
FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64
|
FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64
|
||||||
@ -1365,7 +1375,71 @@ class utf8_to_utf16 {
|
|||||||
auto str() const -> std::wstring { return {&buffer_[0], size()}; }
|
auto str() const -> std::wstring { return {&buffer_[0], size()}; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
|
||||||
|
inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
|
||||||
|
#if FMT_USE_INT128
|
||||||
|
auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
|
||||||
|
return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
|
||||||
|
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||||
|
auto result = uint128_fallback();
|
||||||
|
result.lo_ = _umul128(x, y, &result.hi_);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
|
||||||
|
|
||||||
|
uint64_t a = x >> 32;
|
||||||
|
uint64_t b = x & mask;
|
||||||
|
uint64_t c = y >> 32;
|
||||||
|
uint64_t d = y & mask;
|
||||||
|
|
||||||
|
uint64_t ac = a * c;
|
||||||
|
uint64_t bc = b * c;
|
||||||
|
uint64_t ad = a * d;
|
||||||
|
uint64_t bd = b * d;
|
||||||
|
|
||||||
|
uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
|
||||||
|
|
||||||
|
return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
|
||||||
|
(intermediate << 32) + (bd & mask)};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
namespace dragonbox {
|
namespace dragonbox {
|
||||||
|
// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from
|
||||||
|
// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1.
|
||||||
|
inline int floor_log10_pow2(int e) noexcept {
|
||||||
|
FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
|
||||||
|
static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
|
||||||
|
return (e * 315653) >> 20;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int floor_log2_pow10(int e) noexcept {
|
||||||
|
FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
|
||||||
|
return (e * 1741647) >> 19;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
|
||||||
|
inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
|
||||||
|
#if FMT_USE_INT128
|
||||||
|
auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
|
||||||
|
return static_cast<uint64_t>(p >> 64);
|
||||||
|
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||||
|
return __umulh(x, y);
|
||||||
|
#else
|
||||||
|
return umul128(x, y).high();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
|
||||||
|
// 128-bit unsigned integer.
|
||||||
|
inline uint128_fallback umul192_upper128(uint64_t x,
|
||||||
|
uint128_fallback y) noexcept {
|
||||||
|
uint128_fallback r = umul128(x, y.high());
|
||||||
|
r += umul128_upper64(x, y.low());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
FMT_API uint128_fallback get_cached_power(int k) noexcept;
|
||||||
|
|
||||||
// Type-specific information that Dragonbox uses.
|
// Type-specific information that Dragonbox uses.
|
||||||
template <typename T, typename Enable = void> struct float_info;
|
template <typename T, typename Enable = void> struct float_info;
|
||||||
@ -1389,7 +1463,7 @@ template <> struct float_info<double> {
|
|||||||
static const int big_divisor = 1000;
|
static const int big_divisor = 1000;
|
||||||
static const int small_divisor = 100;
|
static const int small_divisor = 100;
|
||||||
static const int min_k = -292;
|
static const int min_k = -292;
|
||||||
static const int max_k = 326;
|
static const int max_k = 341;
|
||||||
static const int shorter_interval_tie_lower_threshold = -77;
|
static const int shorter_interval_tie_lower_threshold = -77;
|
||||||
static const int shorter_interval_tie_upper_threshold = -77;
|
static const int shorter_interval_tie_upper_threshold = -77;
|
||||||
};
|
};
|
||||||
@ -1614,12 +1688,28 @@ template <typename T = void> struct basic_data {
|
|||||||
static constexpr uint64_t power_of_10_64[20] = {
|
static constexpr uint64_t power_of_10_64[20] = {
|
||||||
1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL),
|
1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL),
|
||||||
10000000000000000000ULL};
|
10000000000000000000ULL};
|
||||||
|
|
||||||
|
// For checking rounding thresholds.
|
||||||
|
// The kth entry is chosen to be the smallest integer such that the
|
||||||
|
// upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
|
||||||
|
static constexpr uint32_t fractional_part_rounding_thresholds[8] = {
|
||||||
|
2576980378, // ceil(2^31 + 2^32/10^1)
|
||||||
|
2190433321, // ceil(2^31 + 2^32/10^2)
|
||||||
|
2151778616, // ceil(2^31 + 2^32/10^3)
|
||||||
|
2147913145, // ceil(2^31 + 2^32/10^4)
|
||||||
|
2147526598, // ceil(2^31 + 2^32/10^5)
|
||||||
|
2147487943, // ceil(2^31 + 2^32/10^6)
|
||||||
|
2147484078, // ceil(2^31 + 2^32/10^7)
|
||||||
|
2147483691 // ceil(2^31 + 2^32/10^8)
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
#if FMT_CPLUSPLUS < 201703L
|
#if FMT_CPLUSPLUS < 201703L
|
||||||
template <typename T> constexpr uint64_t basic_data<T>::pow10_significands[];
|
template <typename T> constexpr uint64_t basic_data<T>::pow10_significands[];
|
||||||
template <typename T> constexpr int16_t basic_data<T>::pow10_exponents[];
|
template <typename T> constexpr int16_t basic_data<T>::pow10_exponents[];
|
||||||
template <typename T> constexpr uint64_t basic_data<T>::power_of_10_64[];
|
template <typename T> constexpr uint64_t basic_data<T>::power_of_10_64[];
|
||||||
|
template <typename T>
|
||||||
|
constexpr uint32_t basic_data<T>::fractional_part_rounding_thresholds[];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This is a struct rather than an alias to avoid shadowing warnings in gcc.
|
// This is a struct rather than an alias to avoid shadowing warnings in gcc.
|
||||||
@ -3330,7 +3420,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
|
|||||||
auto dec = dragonbox::to_decimal(static_cast<double>(value));
|
auto dec = dragonbox::to_decimal(static_cast<double>(value));
|
||||||
write<char>(buffer_appender<char>(buf), dec.significand);
|
write<char>(buffer_appender<char>(buf), dec.significand);
|
||||||
return dec.exponent;
|
return dec.exponent;
|
||||||
} else {
|
} else if (is_constant_evaluated()) {
|
||||||
// Use Grisu + Dragon4 for the given precision:
|
// Use Grisu + Dragon4 for the given precision:
|
||||||
// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
|
// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
|
||||||
const int min_exp = -60; // alpha in Grisu.
|
const int min_exp = -60; // alpha in Grisu.
|
||||||
@ -3349,6 +3439,248 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
|
|||||||
exp += handler.size - cached_exp10 - 1;
|
exp += handler.size - cached_exp10 - 1;
|
||||||
precision = handler.precision;
|
precision = handler.precision;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Extract significand bits and exponent bits.
|
||||||
|
using info = dragonbox::float_info<double>;
|
||||||
|
auto br = bit_cast<uint64_t>(static_cast<double>(value));
|
||||||
|
|
||||||
|
const uint64_t significand_mask =
|
||||||
|
(static_cast<uint64_t>(1) << num_significand_bits<double>()) - 1;
|
||||||
|
uint64_t significand = (br & significand_mask);
|
||||||
|
int exponent = static_cast<int>((br & exponent_mask<double>()) >>
|
||||||
|
num_significand_bits<double>());
|
||||||
|
|
||||||
|
if (exponent != 0) { // Check if normal.
|
||||||
|
exponent -= exponent_bias<double>() + num_significand_bits<double>();
|
||||||
|
significand |=
|
||||||
|
(static_cast<uint64_t>(1) << num_significand_bits<double>());
|
||||||
|
significand <<= 1;
|
||||||
|
} else {
|
||||||
|
// Normalize subnormal inputs.
|
||||||
|
FMT_ASSERT(significand != 0, "zeros should not appear hear");
|
||||||
|
int shift = countl_zero(significand);
|
||||||
|
FMT_ASSERT(shift >= num_bits<uint64_t>() - num_significand_bits<double>(),
|
||||||
|
"");
|
||||||
|
shift -= (num_bits<uint64_t>() - num_significand_bits<double>() - 2);
|
||||||
|
exponent = (std::numeric_limits<double>::min_exponent -
|
||||||
|
num_significand_bits<double>()) -
|
||||||
|
shift;
|
||||||
|
significand <<= shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the first several nonzero decimal significand digits.
|
||||||
|
// We call the number we get the first segment.
|
||||||
|
const int k = info::kappa - dragonbox::floor_log10_pow2(exponent);
|
||||||
|
exp = -k;
|
||||||
|
const int beta = exponent + dragonbox::floor_log2_pow10(k);
|
||||||
|
uint64_t first_segment;
|
||||||
|
bool has_more_segments;
|
||||||
|
int digits_in_the_first_segment;
|
||||||
|
{
|
||||||
|
const auto r = dragonbox::umul192_upper128(
|
||||||
|
significand << beta, dragonbox::get_cached_power(k));
|
||||||
|
first_segment = r.high();
|
||||||
|
has_more_segments = r.low() != 0;
|
||||||
|
|
||||||
|
// The first segment can have 18 ~ 19 digits.
|
||||||
|
if (first_segment >= 1000000000000000000ULL) {
|
||||||
|
digits_in_the_first_segment = 19;
|
||||||
|
} else {
|
||||||
|
// When it is of 18-digits, we align it to 19-digits by adding a bogus
|
||||||
|
// zero at the end.
|
||||||
|
digits_in_the_first_segment = 18;
|
||||||
|
first_segment *= 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the actual number of decimal digits to print
|
||||||
|
if (fixed) {
|
||||||
|
adjust_precision(precision, exp + digits_in_the_first_segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use Dragon4 only when there might be not enough digits in the first
|
||||||
|
// segment.
|
||||||
|
if (digits_in_the_first_segment > precision) {
|
||||||
|
use_dragon = false;
|
||||||
|
|
||||||
|
if (precision <= 0) {
|
||||||
|
exp += digits_in_the_first_segment;
|
||||||
|
|
||||||
|
if (precision < 0) {
|
||||||
|
// Nothing to do, since all we have are just leading zeros.
|
||||||
|
buf.try_resize(0);
|
||||||
|
} else {
|
||||||
|
// We may need to round-up.
|
||||||
|
buf.try_resize(1);
|
||||||
|
if ((first_segment | static_cast<uint64_t>(has_more_segments)) >
|
||||||
|
5000000000000000000ULL) {
|
||||||
|
buf[0] = '1';
|
||||||
|
} else {
|
||||||
|
buf[0] = '0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // precision <= 0
|
||||||
|
else {
|
||||||
|
exp += digits_in_the_first_segment - precision;
|
||||||
|
|
||||||
|
// When precision > 0, we divide the first segment into three
|
||||||
|
// subsegments, each with 9, 9, and 0 ~ 1 digits so that each fits
|
||||||
|
// in 32-bits which usually allows faster calculation than in
|
||||||
|
// 64-bits. Since some compiler (e.g. MSVC) doesn't know how to optimize
|
||||||
|
// division-by-constant for large 64-bit divisors, we do it here
|
||||||
|
// manually. The magic number 7922816251426433760 below is equal to
|
||||||
|
// ceil(2^(64+32) / 10^10).
|
||||||
|
const uint32_t first_subsegment = static_cast<uint32_t>(
|
||||||
|
dragonbox::umul128_upper64(first_segment, 7922816251426433760ULL) >>
|
||||||
|
32);
|
||||||
|
const uint64_t second_third_subsegments =
|
||||||
|
first_segment - first_subsegment * 10000000000ULL;
|
||||||
|
|
||||||
|
uint64_t prod;
|
||||||
|
uint32_t digits;
|
||||||
|
bool should_round_up;
|
||||||
|
int number_of_digits_to_print = precision > 9 ? 9 : precision;
|
||||||
|
|
||||||
|
// Print a 9-digits subsegment, either the first or the second.
|
||||||
|
auto print_subsegment = [&](uint32_t subsegment, char* buffer) {
|
||||||
|
int number_of_digits_printed = 0;
|
||||||
|
|
||||||
|
// If we want to print an odd number of digits from the subsegment,
|
||||||
|
if ((number_of_digits_to_print & 1) != 0) {
|
||||||
|
// Convert to 64-bit fixed-point fractional form with 1-digit
|
||||||
|
// integer part. The magic number 720575941 is a good enough
|
||||||
|
// approximation of 2^(32 + 24) / 10^8; see
|
||||||
|
// https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case
|
||||||
|
// for details.
|
||||||
|
prod = ((subsegment * static_cast<uint64_t>(720575941)) >> 24) + 1;
|
||||||
|
digits = static_cast<uint32_t>(prod >> 32);
|
||||||
|
*buffer = static_cast<char>('0' + digits);
|
||||||
|
number_of_digits_printed++;
|
||||||
|
}
|
||||||
|
// If we want to print an even number of digits from the
|
||||||
|
// first_subsegment,
|
||||||
|
else {
|
||||||
|
// Convert to 64-bit fixed-point fractional form with 2-digits
|
||||||
|
// integer part. The magic number 450359963 is a good enough
|
||||||
|
// approximation of 2^(32 + 20) / 10^7; see
|
||||||
|
// https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case
|
||||||
|
// for details.
|
||||||
|
prod = ((subsegment * static_cast<uint64_t>(450359963)) >> 20) + 1;
|
||||||
|
digits = static_cast<uint32_t>(prod >> 32);
|
||||||
|
copy2(buffer, digits2(digits));
|
||||||
|
number_of_digits_printed += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print all digit pairs.
|
||||||
|
while (number_of_digits_printed < number_of_digits_to_print) {
|
||||||
|
prod = static_cast<uint32_t>(prod) * static_cast<uint64_t>(100);
|
||||||
|
digits = static_cast<uint32_t>(prod >> 32);
|
||||||
|
copy2(buffer + number_of_digits_printed, digits2(digits));
|
||||||
|
number_of_digits_printed += 2;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Print first subsegment.
|
||||||
|
print_subsegment(first_subsegment, buf.data());
|
||||||
|
|
||||||
|
// Perform rounding if the first subsegment is the last subsegment to
|
||||||
|
// print.
|
||||||
|
if (precision <= 9) {
|
||||||
|
// Rounding inside the subsegment.
|
||||||
|
// We round-up if:
|
||||||
|
// - either the fractional part is strictly larger than 1/2, or
|
||||||
|
// - the fractional part is exactly 1/2 and the last digit is odd.
|
||||||
|
// We rely on the following observations:
|
||||||
|
// - If fractional_part >= threshold, then the fractional part is
|
||||||
|
// strictly larger than 1/2.
|
||||||
|
// - If the MSB of fractional_part is set, then the fractional part
|
||||||
|
// must be at least 1/2.
|
||||||
|
// - When the MSB of fractional_part is set, either
|
||||||
|
// second_third_subsegments being nonzero or has_more_segments
|
||||||
|
// being true means there are further digits not printed, so the
|
||||||
|
// fractional part is strictly larger than 1/2.
|
||||||
|
if (precision < 9) {
|
||||||
|
uint32_t fractional_part = static_cast<uint32_t>(prod);
|
||||||
|
should_round_up = fractional_part >=
|
||||||
|
data::fractional_part_rounding_thresholds
|
||||||
|
[8 - number_of_digits_to_print] ||
|
||||||
|
((fractional_part >> 31) &
|
||||||
|
((digits & 1) | (second_third_subsegments != 0) |
|
||||||
|
has_more_segments)) != 0;
|
||||||
|
}
|
||||||
|
// Rounding at the subsegment boundary.
|
||||||
|
// In this case, the fractional part is at least 1/2 if and only if
|
||||||
|
// second_third_subsegments >= 5000000000ULL, and is strictly larger
|
||||||
|
// than 1/2 if we further have either second_third_subsegments >
|
||||||
|
// 5000000000ULL or has_more_segments == true.
|
||||||
|
else {
|
||||||
|
should_round_up = second_third_subsegments > 5000000000ULL ||
|
||||||
|
(second_third_subsegments == 5000000000ULL &&
|
||||||
|
((digits & 1) != 0 || has_more_segments));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Otherwise, print the second subsegment.
|
||||||
|
else {
|
||||||
|
// Compilers are not aware of how to leverage the maximum value of
|
||||||
|
// second_third_subsegments to find out a better magic number which
|
||||||
|
// allows us to eliminate an additional shift. 1844674407370955162 =
|
||||||
|
// ceil(2^64/10) < ceil(2^64*(10^9/(10^10 - 1))).
|
||||||
|
const uint32_t second_subsegment =
|
||||||
|
static_cast<uint32_t>(dragonbox::umul128_upper64(
|
||||||
|
second_third_subsegments, 1844674407370955162ULL));
|
||||||
|
const uint32_t third_subsegment =
|
||||||
|
static_cast<uint32_t>(second_third_subsegments) -
|
||||||
|
second_subsegment * 10;
|
||||||
|
|
||||||
|
number_of_digits_to_print = precision - 9;
|
||||||
|
print_subsegment(second_subsegment, buf.data() + 9);
|
||||||
|
|
||||||
|
// Rounding inside the subsegment.
|
||||||
|
if (precision < 18) {
|
||||||
|
// The condition third_subsegment != 0 implies that the segment was
|
||||||
|
// of 19 digits, so in this case the third segment should be
|
||||||
|
// consisting of a genuine digit from the input.
|
||||||
|
uint32_t fractional_part = static_cast<uint32_t>(prod);
|
||||||
|
should_round_up = fractional_part >=
|
||||||
|
data::fractional_part_rounding_thresholds
|
||||||
|
[8 - number_of_digits_to_print] ||
|
||||||
|
((fractional_part >> 31) &
|
||||||
|
((digits & 1) | (third_subsegment != 0) |
|
||||||
|
has_more_segments)) != 0;
|
||||||
|
}
|
||||||
|
// Rounding at the subsegment boundary.
|
||||||
|
else {
|
||||||
|
// In this case, the segment must be of 19 digits, thus
|
||||||
|
// the third subsegment should be consisting of a genuine digit from
|
||||||
|
// the input.
|
||||||
|
should_round_up = third_subsegment > 5 ||
|
||||||
|
(third_subsegment == 5 &&
|
||||||
|
((digits & 1) != 0 || has_more_segments));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round-up if necessary.
|
||||||
|
if (should_round_up) {
|
||||||
|
++buf[precision - 1];
|
||||||
|
for (int i = precision - 1; i > 0 && buf[i] > '9'; --i) {
|
||||||
|
buf[i] = '0';
|
||||||
|
++buf[i - 1];
|
||||||
|
}
|
||||||
|
if (buf[0] > '9') {
|
||||||
|
buf[0] = '1';
|
||||||
|
if (fixed)
|
||||||
|
buf[precision++] = '0';
|
||||||
|
else
|
||||||
|
++exp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf.try_resize(to_unsigned(precision));
|
||||||
|
}
|
||||||
|
} // if (digits_in_the_first_segment > precision)
|
||||||
|
else {
|
||||||
|
// Adjust the exponent for its use in Dragon4.
|
||||||
|
exp += digits_in_the_first_segment - 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (use_dragon) {
|
if (use_dragon) {
|
||||||
auto f = basic_fp<uint128_t>();
|
auto f = basic_fp<uint128_t>();
|
||||||
@ -3964,7 +4296,9 @@ template <> struct formatter<bytes> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// group_digits_view is not derived from view because it copies the argument.
|
// group_digits_view is not derived from view because it copies the argument.
|
||||||
template <typename T> struct group_digits_view { T value; };
|
template <typename T> struct group_digits_view {
|
||||||
|
T value;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
\rst
|
\rst
|
||||||
|
@ -246,7 +246,7 @@ TEST(fp_test, dragonbox_max_k) {
|
|||||||
fmt::detail::const_check(double_info::max_k),
|
fmt::detail::const_check(double_info::max_k),
|
||||||
double_info::kappa -
|
double_info::kappa -
|
||||||
floor_log10_pow2(std::numeric_limits<double>::min_exponent -
|
floor_log10_pow2(std::numeric_limits<double>::min_exponent -
|
||||||
fmt::detail::num_significand_bits<double>() - 1));
|
2 * fmt::detail::num_significand_bits<double>() - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(fp_test, get_round_direction) {
|
TEST(fp_test, get_round_direction) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user