From 1afe201ae882d23105c893ea6a384dd3d7c00092 Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Wed, 18 Dec 2019 09:40:51 -0800 Subject: [PATCH] Handle block boundaries in utf8_to_utf16 --- include/fmt/format-inl.h | 16 +++++++++++++--- test/format-test.cc | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index f1fa067f..29888931 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -158,8 +158,7 @@ FMT_FUNC void report_error(format_func func, int error_code, FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count, FILE* stream) { size_t written = std::fwrite(ptr, size, count, stream); - if (written < count) - FMT_THROW(system_error(errno, "cannot write to file")); + if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); } } // namespace internal @@ -1272,7 +1271,7 @@ template <> struct formatter { }; FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) { - for (auto p = s.data(), end = p + s.size(); p != end;) { + auto transcode = [this](const char* p) { auto cp = uint32_t(); auto error = 0; p = utf8_decode(p, &cp, &error); @@ -1284,6 +1283,17 @@ FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) { buffer_.push_back(static_cast(0xD800 + (cp >> 10))); buffer_.push_back(static_cast(0xDC00 + (cp & 0x3FF))); } + return p; + }; + auto p = s.data(); + const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. + if (s.size() >= block_size) { + for (auto end = p + s.size() - block_size + 1; p < end;) p = transcode(p); + } + if (auto num_chars_left = s.data() + s.size() - p) { + char buf[4] = {}; + memcpy(buf, p, num_chars_left); + transcode(buf); } buffer_.push_back(0); } diff --git a/test/format-test.cc b/test/format-test.cc index c6828b3f..a2d254f6 100644 --- a/test/format-test.cc +++ b/test/format-test.cc @@ -408,6 +408,8 @@ TEST(UtilTest, UTF8ToUTF16) { EXPECT_EQ(L"\xD801\xDC37", fmt::internal::utf8_to_utf16("𐐷").str()); EXPECT_THROW_MSG(fmt::internal::utf8_to_utf16("\xc3\x28"), std::runtime_error, "invalid utf8"); + EXPECT_THROW_MSG(fmt::internal::utf8_to_utf16(fmt::string_view("л", 1)), + std::runtime_error, "invalid utf8"); } TEST(UtilTest, UTF8ToUTF16EmptyString) {