From 2207ea0b36c82ebf7ebba8568b035a86c2b6438a Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Sun, 22 Aug 2021 16:33:39 -0700 Subject: [PATCH] More escaping --- include/fmt/ranges.h | 17 ++++++++++++++--- test/ranges-test.cc | 7 +++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/fmt/ranges.h b/include/fmt/ranges.h index 8649bb81..a35c0664 100644 --- a/include/fmt/ranges.h +++ b/include/fmt/ranges.h @@ -431,6 +431,7 @@ auto find_escape(const Char* begin, const Char* end) -> find_escape_result { for (; begin != end; ++begin) { auto cp = static_cast::type>(*begin); + if (sizeof(Char) == 1 && cp >= 0x80) continue; if (needs_escape(cp)) return {begin, begin + 1, cp}; } return {begin, nullptr, 0}; @@ -480,9 +481,19 @@ auto write_range_entry(OutputIt out, basic_string_view str) -> OutputIt { *out++ = '\\'; break; default: - if (is_utf8() && escape.cp > 0xffff) { - out = format_to(out, "\\U{:08x}", escape.cp); - continue; + if (is_utf8()) { + if (escape.cp < 0x100) { + out = format_to(out, "\\x{:02x}", escape.cp); + continue; + } + if (escape.cp < 0x10000) { + out = format_to(out, "\\u{:04x}", escape.cp); + continue; + } + if (escape.cp < 0x110000) { + out = format_to(out, "\\U{:08x}", escape.cp); + continue; + } } for (Char escape_char : basic_string_view( escape.begin, to_unsigned(escape.end - escape.begin))) { diff --git a/test/ranges-test.cc b/test/ranges-test.cc index fd1608f2..603d1976 100644 --- a/test/ranges-test.cc +++ b/test/ranges-test.cc @@ -275,10 +275,13 @@ TEST(ranges_test, escape_string) { EXPECT_EQ(fmt::format("{}", vec{"\n\r\t\"\\"}), "[\"\\n\\r\\t\\\"\\\\\"]"); EXPECT_EQ(fmt::format("{}", vec{"\x07"}), "[\"\\x07\"]"); EXPECT_EQ(fmt::format("{}", vec{"\x7f"}), "[\"\\x7f\"]"); + EXPECT_EQ(fmt::format("{}", vec{"n\xcc\x83"}), "[\"n\xcc\x83\"]"); - // Unassigned Unicode code points. if (fmt::detail::is_utf8()) { + EXPECT_EQ(fmt::format("{}", vec{"\xcd\xb8"}), "[\"\\u0378\"]"); + // Unassigned Unicode code points. EXPECT_EQ(fmt::format("{}", vec{"\xf0\xaa\x9b\x9e"}), "[\"\\U0002a6de\"]"); - EXPECT_EQ(fmt::format("{}", vec{"\xf4\x8f\xbf\xbf"}), "[\"\\U0010ffff\"]"); + EXPECT_EQ(fmt::format("{}", vec{"\xf4\x8f\xbf\xc0"}), + "[\"\\xf4\\x8f\\xbf\\xc0\"]"); } }