Implement Grisu2 digit generation

This commit is contained in:
Victor Zverovich 2018-08-25 16:08:32 -07:00
parent 569ac91e0b
commit f0d0a1ebd7
3 changed files with 95 additions and 33 deletions

View File

@ -73,7 +73,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-Wcast-qual -Wformat=2 -Wmissing-include-dirs
-Wcast-align -Wnon-virtual-dtor
-Wctor-dtor-privacy -Wdisabled-optimization
-Winvalid-pch -Wmissing-declarations -Woverloaded-virtual
-Winvalid-pch -Woverloaded-virtual
-Wno-ctor-dtor-privacy -Wno-dangling-else -Wno-float-equal
-Wno-format-nonliteral -Wno-sign-conversion -Wno-shadow)
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.6)
@ -101,8 +101,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-Wno-unused-member-function
-Wno-format-nonliteral -Wno-missing-noreturn -Wno-undefined-func-template
-Wno-shadow -Wno-sign-conversion -Wno-used-but-marked-unused
-Wno-covered-switch-default -Wno-missing-variable-declarations
-Wno-double-promotion)
-Wno-covered-switch-default -Wno-missing-prototypes
-Wno-missing-variable-declarations -Wno-double-promotion)
set(WERROR_FLAG -Werror)

View File

@ -275,11 +275,16 @@ const char basic_data<T>::DIGITS[] =
template <typename T>
const uint32_t basic_data<T>::POWERS_OF_10_32[] = {
1, FMT_POWERS_OF_10(1)
};
template <typename T>
const uint32_t basic_data<T>::ZERO_OR_POWERS_OF_10_32[] = {
0, FMT_POWERS_OF_10(1)
};
template <typename T>
const uint64_t basic_data<T>::POWERS_OF_10_64[] = {
const uint64_t basic_data<T>::ZERO_OR_POWERS_OF_10_64[] = {
0,
FMT_POWERS_OF_10(1),
FMT_POWERS_OF_10(1000000000ull),
@ -361,6 +366,78 @@ FMT_FUNC fp get_cached_power(int min_exponent, int &pow10_exponent) {
pow10_exponent = first_dec_exp + index * dec_exp_step;
return fp(data::POW10_SIGNIFICANDS[index], data::POW10_EXPONENTS[index]);
}
// Generates output using Grisu2 digit-gen algorithm.
FMT_FUNC void grisu2_gen_digits(
const fp &scaled_value, const fp &scaled_upper, uint64_t delta,
char *buffer, size_t &size, int &dec_exp) {
internal::fp one(1ull << -scaled_upper.e, scaled_upper.e);
uint32_t hi = static_cast<uint32_t>(scaled_upper.f >> -one.e); // p1 in Grisu
uint64_t lo = scaled_upper.f & (one.f - 1); // p2 in Grisu
size = 0;
auto kappa = count_digits(hi); // TODO: more descriptive name
while (kappa > 0) {
uint32_t digit = 0;
// This optimization by miloyip reduces the number of integer divisions by
// one per iteration.
switch (kappa) {
case 10: digit = hi / 1000000000; hi %= 1000000000; break;
case 9: digit = hi / 100000000; hi %= 100000000; break;
case 8: digit = hi / 10000000; hi %= 10000000; break;
case 7: digit = hi / 1000000; hi %= 1000000; break;
case 6: digit = hi / 100000; hi %= 100000; break;
case 5: digit = hi / 10000; hi %= 10000; break;
case 4: digit = hi / 1000; hi %= 1000; break;
case 3: digit = hi / 100; hi %= 100; break;
case 2: digit = hi / 10; hi %= 10; break;
case 1: digit = hi; hi = 0; break;
default:
FMT_ASSERT(false, "invalid number of digits");
}
if (digit != 0 || size != 0)
buffer[size++] = '0' + static_cast<char>(digit);
--kappa;
uint64_t remainder = (static_cast<uint64_t>(hi) << -one.e) + lo;
if (remainder <= delta) {
dec_exp += kappa;
// TODO: use scaled_value
(void)scaled_value;
return;
}
}
for (;;) {
lo *= 10;
delta *= 10;
char digit = static_cast<char>(lo >> -one.e);
if (digit != 0 || size != 0)
buffer[size++] = '0' + digit;
lo &= one.f - 1;
--kappa;
if (lo < delta) {
dec_exp += kappa;
return;
}
}
}
FMT_FUNC void grisu2_format(double value, char *buffer, size_t &size) {
fp fp_value(value);
fp lower, upper;
fp_value.compute_boundaries(lower, upper);
// Find a cached power of 10 close to 1 / upper.
int dec_exp = 0; // K in Grisu paper.
const int min_exp = -60;
auto dec_pow = get_cached_power(
min_exp - (upper.e + fp::significand_size), dec_exp);
fp_value.normalize();
fp scaled_value = fp_value * dec_pow;
fp scaled_lower = lower * dec_pow;
fp scaled_upper = upper * dec_pow;
++scaled_lower.f; // +1 ulp
--scaled_upper.f; // -1 ulp
uint64_t delta = scaled_upper.f - scaled_lower.f;
grisu2_gen_digits(scaled_value, scaled_upper, delta, buffer, size, dec_exp);
}
} // namespace internal
#if FMT_USE_WINDOWS_H

View File

@ -365,6 +365,10 @@ FMT_API fp operator*(fp x, fp y);
// (binary) exponent satisfies min_exponent <= c_k.e <= min_exponent + 3.
FMT_API fp get_cached_power(int min_exponent, int &pow10_exponent);
// Formats value using Grisu2 algorithm:
// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
FMT_API void grisu2_format(double value, char *buffer, size_t &size);
template <typename Allocator>
typename Allocator::value_type *allocate(Allocator& alloc, std::size_t n) {
#if __cplusplus >= 201103L || FMT_MSC_VER >= 1700
@ -952,7 +956,8 @@ struct int_traits {
template <typename T = void>
struct FMT_API basic_data {
static const uint32_t POWERS_OF_10_32[];
static const uint64_t POWERS_OF_10_64[];
static const uint32_t ZERO_OR_POWERS_OF_10_32[];
static const uint64_t ZERO_OR_POWERS_OF_10_64[];
static const uint64_t POW10_SIGNIFICANDS[];
static const int16_t POW10_EXPONENTS[];
static const char DIGITS[];
@ -973,7 +978,7 @@ inline unsigned count_digits(uint64_t n) {
// Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
// and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits.
int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12;
return to_unsigned(t) - (n < data::POWERS_OF_10_64[t]) + 1;
return to_unsigned(t) - (n < data::ZERO_OR_POWERS_OF_10_64[t]) + 1;
}
#else
// Fallback version of count_digits used when __builtin_clz is not available.
@ -1043,7 +1048,8 @@ class decimal_formatter {
// https://github.com/jeaiii/itoa
unsigned n = N - 1;
unsigned a = n / 5 * n * 53 / 16;
uint64_t t = ((1ULL << (32 + a)) / data::POWERS_OF_10_32[n] + 1 - n / 9);
uint64_t t = ((1ULL << (32 + a)) /
data::ZERO_OR_POWERS_OF_10_32[n] + 1 - n / 9);
t = ((t * u) >> a) + n / 5 * 4;
write_pair(0, t >> 32);
for (unsigned i = 2; i < N; i += 2) {
@ -1075,7 +1081,7 @@ class decimal_formatter_null : public decimal_formatter {
// Optional version of count_digits for better performance on 32-bit platforms.
inline unsigned count_digits(uint32_t n) {
int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12;
return to_unsigned(t) - (n < data::POWERS_OF_10_32[t]) + 1;
return to_unsigned(t) - (n < data::ZERO_OR_POWERS_OF_10_32[t]) + 1;
}
#endif
@ -2943,31 +2949,10 @@ void basic_writer<Range>::write_double(T value, const format_specs &spec) {
basic_memory_buffer<char_type> buffer;
if (internal::const_check(FMT_USE_GRISU && sizeof(T) <= sizeof(double) &&
std::numeric_limits<double>::is_iec559)) {
internal::fp fp_value(static_cast<double>(value));
fp_value.normalize();
// Find a cached power of 10 close to 1 / fp_value.
int dec_exp = 0;
const int min_exp = -60;
auto dec_pow = internal::get_cached_power(
min_exp - (fp_value.e + internal::fp::significand_size), dec_exp);
internal::fp product = fp_value * dec_pow;
// Generate output using Grisu digit-gen-mix algorithm.
internal::fp one(1ull << -product.e, product.e);
uint64_t hi = product.f >> -one.e;
uint64_t f = product.f & (one.f - 1);
typedef back_insert_range<internal::basic_buffer<char_type>> range;
basic_writer<range> w{range(buffer)};
w.write(hi);
size_t digits = buffer.size();
w.write('.');
const unsigned max_digits = 18;
while (digits++ < max_digits) {
f *= 10;
w.write(static_cast<char>('0' + (f >> -one.e)));
f &= one.f - 1;
}
w.write('e');
w.write(-dec_exp);
char buf[100]; // TODO: max size
size_t size = 0;
internal::grisu2_format(static_cast<double>(value), buf, size);
buffer.append(buf, buf + size); // TODO: avoid extra copy
} else {
format_specs normalized_spec(spec);
normalized_spec.type_ = handler.type;