From 0ecb3d1829b0546edb645a548e0ccfaf362925d7 Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Wed, 21 Oct 2020 09:11:10 -0700 Subject: [PATCH] Optimize alignment parsing --- include/fmt/format-inl.h | 13 +++---------- include/fmt/format.h | 22 +++++++++++++--------- test/format-test.cc | 2 +- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index e54fa62e..b7cb3209 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -2603,24 +2603,17 @@ int snprintf_float(T value, int precision, float_specs specs, * error, but it will always advance at least one byte. */ inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) { - static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; static const int shiftc[] = {0, 18, 12, 6, 0}; static const int shifte[] = {0, 6, 4, 2, 0}; - auto s = reinterpret_cast(buf); - int len = lengths[s[0] >> 3]; - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - const char* next = buf + len + !len; + int len = code_point_length(buf); + const char* next = buf + len; // Assume a four-byte character and load four bytes. Unused bits are // shifted out. + auto s = reinterpret_cast(buf); *c = uint32_t(s[0] & masks[len]) << 18; *c |= uint32_t(s[1] & 0x3f) << 12; *c |= uint32_t(s[2] & 0x3f) << 6; diff --git a/include/fmt/format.h b/include/fmt/format.h index a51f4eec..17ee90a6 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1112,7 +1112,7 @@ Char* format_uint(Char* buffer, detail::fallback_uintptr n, int num_digits, template inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { - if (auto ptr = to_pointer(out, num_digits)) { + if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { format_uint(ptr, value, num_digits, upper); return out; } @@ -2727,12 +2727,16 @@ template struct precision_adapter { }; template -FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) { - if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1; - do { - ++begin; - } while (begin != end && (*begin & 0xc0) == 0x80); - return begin; +FMT_CONSTEXPR int code_point_length(const Char* begin) { + if (const_check(sizeof(Char) != 1)) return 1; + constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; + int len = lengths[static_cast(*begin) >> 3]; + + // Compute the pointer to the next character early so that the next + // iteration can start working on the next character. Neither Clang + // nor GCC figure out this reordering on their own. + return len + !len; } // Converts a character to the underlying integral type. @@ -2752,8 +2756,8 @@ FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end, Handler&& handler) { FMT_ASSERT(begin != end, ""); auto align = align::none; - auto p = next_code_point(begin, end); - if (p == end) p = begin; + auto p = begin + code_point_length(begin); + if (p >= end) p = begin; for (;;) { switch (to_integral(*p)) { case '<': diff --git a/test/format-test.cc b/test/format-test.cc index 192fcf7e..40a0137b 100644 --- a/test/format-test.cc +++ b/test/format-test.cc @@ -646,7 +646,7 @@ TEST(FormatterTest, Fill) { EXPECT_EQ(std::string("\0\0\0*", 4), format(string_view("{:\0>4}", 6), '*')); EXPECT_EQ("жж42", format("{0:ж>4}", 42)); EXPECT_THROW_MSG(format("{:\x80\x80\x80\x80\x80>}", 0), format_error, - "invalid fill"); + "missing '}' in format string"); } TEST(FormatterTest, PlusSign) {