From 0889856d6160775462e9e0722d121aab0e75e873 Mon Sep 17 00:00:00 2001 From: Attila Tajti Date: Sat, 2 Nov 2019 10:02:28 +0100 Subject: [PATCH] Fix UTF-8 truncation --- include/fmt/format.h | 21 ++++++++++++++++++++- test/format-test.cc | 10 ++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/fmt/format.h b/include/fmt/format.h index 6ed7fc43..4c9e33fd 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -436,6 +436,24 @@ inline size_t count_code_points(basic_string_view s) { return num_code_points; } +template +inline size_t code_point_index(basic_string_view s, size_t n) { + size_t size = s.size(); + return n < size ? n : size; +} + +// Calculates the index of the nth code point in a UTF-8 string. +inline size_t code_point_index(basic_string_view s, size_t n) { + const char8_t* data = s.data(); + size_t num_code_points = 0; + for (size_t i = 0, size = s.size(); i != size; ++i) { + if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) { + return i; + } + } + return s.size(); +} + inline char8_t to_char8_t(char c) { return static_cast(c); } template @@ -1729,7 +1747,8 @@ template class basic_writer { const Char* data = s.data(); std::size_t size = s.size(); if (specs.precision >= 0 && internal::to_unsigned(specs.precision) < size) - size = internal::to_unsigned(specs.precision); + size = internal::code_point_index(s, + internal::to_unsigned(specs.precision)); write(data, size, specs); } diff --git a/test/format-test.cc b/test/format-test.cc index f5a35569..2173936b 100644 --- a/test/format-test.cc +++ b/test/format-test.cc @@ -2621,3 +2621,13 @@ TEST(FormatTest, FormatCustomChar) { EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0], mychar('x')); } + +TEST(FormatTest, FormatUTF8Precision) { + using str_type = std::basic_string; + str_type format(reinterpret_cast(u8"{:.4}")); + str_type str(reinterpret_cast(u8"caf\u00e9s")); // cafés + auto result = fmt::format(format, str); + EXPECT_EQ(fmt::internal::count_code_points(result), 4); + EXPECT_EQ(result.size(), 5); + EXPECT_EQ(result, str.substr(0, 5)); +}