Fix UTF-8 truncation

This commit is contained in:
Attila Tajti 2019-11-02 10:02:28 +01:00 committed by Victor Zverovich
parent d6eede9e08
commit 0889856d61
2 changed files with 30 additions and 1 deletions

View File

@ -436,6 +436,24 @@ inline size_t count_code_points(basic_string_view<char8_t> s) {
return num_code_points;
}
template <typename Char>
inline size_t code_point_index(basic_string_view<Char> s, size_t n) {
size_t size = s.size();
return n < size ? n : size;
}
// Calculates the index of the nth code point in a UTF-8 string.
inline size_t code_point_index(basic_string_view<char8_t> s, size_t n) {
const char8_t* data = s.data();
size_t num_code_points = 0;
for (size_t i = 0, size = s.size(); i != size; ++i) {
if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) {
return i;
}
}
return s.size();
}
inline char8_t to_char8_t(char c) { return static_cast<char8_t>(c); }
template <typename InputIt, typename OutChar>
@ -1729,7 +1747,8 @@ template <typename Range> class basic_writer {
const Char* data = s.data();
std::size_t size = s.size();
if (specs.precision >= 0 && internal::to_unsigned(specs.precision) < size)
size = internal::to_unsigned(specs.precision);
size = internal::code_point_index(s,
internal::to_unsigned(specs.precision));
write(data, size, specs);
}

View File

@ -2621,3 +2621,13 @@ TEST(FormatTest, FormatCustomChar) {
EXPECT_EQ(result.size(), 1);
EXPECT_EQ(result[0], mychar('x'));
}
TEST(FormatTest, FormatUTF8Precision) {
using str_type = std::basic_string<char8_t>;
str_type format(reinterpret_cast<const char8_t*>(u8"{:.4}"));
str_type str(reinterpret_cast<const char8_t*>(u8"caf\u00e9s")); // cafés
auto result = fmt::format(format, str);
EXPECT_EQ(fmt::internal::count_code_points(result), 4);
EXPECT_EQ(result.size(), 5);
EXPECT_EQ(result, str.substr(0, 5));
}