Handle block boundaries in utf8_to_utf16

This commit is contained in:
Victor Zverovich 2019-12-18 09:40:51 -08:00
parent cd2b99032f
commit 1afe201ae8
2 changed files with 15 additions and 3 deletions

View File

@ -158,8 +158,7 @@ FMT_FUNC void report_error(format_func func, int error_code,
FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count,
FILE* stream) {
size_t written = std::fwrite(ptr, size, count, stream);
if (written < count)
FMT_THROW(system_error(errno, "cannot write to file"));
if (written < count) FMT_THROW(system_error(errno, "cannot write to file"));
}
} // namespace internal
@ -1272,7 +1271,7 @@ template <> struct formatter<internal::bigint> {
};
FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) {
for (auto p = s.data(), end = p + s.size(); p != end;) {
auto transcode = [this](const char* p) {
auto cp = uint32_t();
auto error = 0;
p = utf8_decode(p, &cp, &error);
@ -1284,6 +1283,17 @@ FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) {
buffer_.push_back(static_cast<wchar_t>(0xD800 + (cp >> 10)));
buffer_.push_back(static_cast<wchar_t>(0xDC00 + (cp & 0x3FF)));
}
return p;
};
auto p = s.data();
const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars.
if (s.size() >= block_size) {
for (auto end = p + s.size() - block_size + 1; p < end;) p = transcode(p);
}
if (auto num_chars_left = s.data() + s.size() - p) {
char buf[4] = {};
memcpy(buf, p, num_chars_left);
transcode(buf);
}
buffer_.push_back(0);
}

View File

@ -408,6 +408,8 @@ TEST(UtilTest, UTF8ToUTF16) {
EXPECT_EQ(L"\xD801\xDC37", fmt::internal::utf8_to_utf16("𐐷").str());
EXPECT_THROW_MSG(fmt::internal::utf8_to_utf16("\xc3\x28"), std::runtime_error,
"invalid utf8");
EXPECT_THROW_MSG(fmt::internal::utf8_to_utf16(fmt::string_view("л", 1)),
std::runtime_error, "invalid utf8");
}
TEST(UtilTest, UTF8ToUTF16EmptyString) {