diff --git a/include/fmt/format.h b/include/fmt/format.h index 5402c280..2621cf84 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1049,11 +1049,19 @@ inline auto equal2(const char* lhs, const char* rhs) -> bool { } // Copies two characters from src to dst. -template void copy2(Char* dst, const char* src) { - *dst++ = static_cast(*src++); - *dst = static_cast(*src); +template +FMT_CONSTEXPR20 FMT_INLINE void copy2(Char* dst, const char* src) { + if (!is_constant_evaluated() && std::is_same::value) { + memcpy(dst, src, 2); + } else { + // We read both bytes before writing so that the compiler can do it in + // one pair of read/write instructions (even if Char aliases char) + char dc0 = *src++; + char dc1 = *src; + *dst++ = static_cast(dc0); + *dst = static_cast(dc1); + } } -FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); } template struct format_decimal_result { Iterator begin; @@ -1069,14 +1077,6 @@ FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size) FMT_ASSERT(size >= count_digits(value), "invalid digit count"); out += size; Char* end = out; - if (is_constant_evaluated()) { - while (value >= 10) { - *--out = static_cast('0' + value % 10); - value /= 10; - } - *--out = static_cast('0' + value); - return {out, end}; - } while (value >= 100) { // Integer division is slow so do it for a group of two digits instead // of for every digit. The idea comes from the talk by Alexandrescu