From ffdc3fdbd90385429dd2ea6a774848e39d4f957a Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Wed, 24 Jul 2024 17:47:11 -0700 Subject: [PATCH] Align digits table --- include/fmt/chrono.h | 2 +- include/fmt/format.h | 71 +++++++++++++++++++++++++------------------- 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/include/fmt/chrono.h b/include/fmt/chrono.h index 40f7e8fd..c01e4b14 100644 --- a/include/fmt/chrono.h +++ b/include/fmt/chrono.h @@ -1481,7 +1481,7 @@ class tm_writer { char buf[10]; size_t offset = 0; if (year >= 0 && year < 10000) { - copy2(buf, digits2(static_cast(year / 100))); + write2digits(buf, static_cast(year / 100)); } else { offset = 4; write_year_extended(year); diff --git a/include/fmt/format.h b/include/fmt/format.h index 71ae4ac7..4d41e357 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1113,13 +1113,17 @@ using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; (factor) * 100000000, (factor) * 1000000000 // Converts value in the range [0, 100) to a string. -constexpr auto digits2(size_t value) -> const char* { - // GCC generates slightly better code when value is pointer-size. - return &"0001020304050607080910111213141516171819" - "2021222324252627282930313233343536373839" - "4041424344454647484950515253545556575859" - "6061626364656667686970717273747576777879" - "8081828384858687888990919293949596979899"[value * 2]; +// GCC generates slightly better code when value is pointer-size. +inline auto digits2(size_t value) -> const char* { + // Align data since unaligned access may be slower when crossing a + // hardware-specific boundary. + alignas(2) static const char data[] = + "0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"; + return &data[value * 2]; } // Sign is a template parameter to workaround a bug in gcc 4.8. @@ -1272,15 +1276,15 @@ inline auto equal2(const char* lhs, const char* rhs) -> bool { return memcmp(lhs, rhs, 2) == 0; } -// Copies two characters from src to dst. +// Writes a two-digit value to out. template -FMT_CONSTEXPR20 FMT_INLINE void copy2(Char* dst, const char* src) { - if (!is_constant_evaluated() && sizeof(Char) == sizeof(char)) { - memcpy(dst, src, 2); +FMT_CONSTEXPR20 FMT_INLINE void write2digits(Char* out, size_t value) { + if (!is_constant_evaluated() && std::is_same::value) { + memcpy(out, digits2(value), 2); return; } - *dst++ = static_cast(*src++); - *dst = static_cast(*src); + *out++ = static_cast('0' + value / 10); + *out = static_cast('0' + value % 10); } // Formats a decimal unsigned integer value writing to out pointing to a buffer @@ -1295,12 +1299,12 @@ FMT_CONSTEXPR20 auto do_format_decimal(Char* out, UInt value, int size) // of for every digit. The idea comes from the talk by Alexandrescu // "Three Optimization Tips for C++". See speed-test for a comparison. n -= 2; - copy2(out + n, digits2(static_cast(value % 100))); + write2digits(out + n, static_cast(value % 100)); value /= 100; } if (value >= 10) { n -= 2; - copy2(out + n, digits2(static_cast(value))); + write2digits(out + n, static_cast(value)); } else { out[--n] = static_cast('0' + value); } @@ -1584,25 +1588,30 @@ template constexpr auto exponent_bias() -> int { } // Writes the exponent exp in the form "[+-]d{2,3}" to buffer. -template -FMT_CONSTEXPR auto write_exponent(int exp, It it) -> It { +template +FMT_CONSTEXPR auto write_exponent(int exp, OutputIt out) -> OutputIt { FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); if (exp < 0) { - *it++ = static_cast('-'); + *out++ = static_cast('-'); exp = -exp; } else { - *it++ = static_cast('+'); + *out++ = static_cast('+'); } - if (exp >= 100) { - const char* top = digits2(to_unsigned(exp / 100)); - if (exp >= 1000) *it++ = static_cast(top[0]); - *it++ = static_cast(top[1]); - exp %= 100; + unsigned uexp = to_unsigned(exp); + if (is_constant_evaluated()) { + if (uexp < 10) *out++ = '0'; + return format_decimal(out, uexp, count_digits(uexp)); } - const char* d = digits2(to_unsigned(exp)); - *it++ = static_cast(d[0]); - *it++ = static_cast(d[1]); - return it; + if (uexp >= 100u) { + const char* top = digits2(uexp / 100); + if (uexp >= 1000u) *out++ = static_cast(top[0]); + *out++ = static_cast(top[1]); + uexp %= 100; + } + const char* d = digits2(uexp); + *out++ = static_cast(d[0]); + *out++ = static_cast(d[1]); + return out; } // A floating-point number f * pow(2, e) where F is an unsigned type. @@ -2457,7 +2466,7 @@ inline auto write_significand(Char* out, UInt significand, int significand_size, int floating_size = significand_size - integral_size; for (int i = floating_size / 2; i > 0; --i) { out -= 2; - copy2(out, digits2(static_cast(significand % 100))); + write2digits(out, static_cast(significand % 100)); significand /= 100; } if (floating_size % 2 != 0) { @@ -3361,7 +3370,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // for details. prod = ((subsegment * static_cast(450359963)) >> 20) + 1; digits = static_cast(prod >> 32); - copy2(buffer, digits2(digits)); + write2digits(buffer, digits); number_of_digits_printed += 2; } @@ -3369,7 +3378,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, while (number_of_digits_printed < number_of_digits_to_print) { prod = static_cast(prod) * static_cast(100); digits = static_cast(prod >> 32); - copy2(buffer + number_of_digits_printed, digits2(digits)); + write2digits(buffer + number_of_digits_printed, digits); number_of_digits_printed += 2; } };