From ac0ab8eff3bbd4be476193c0722b257eaf82346c Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Thu, 20 Jul 2023 17:18:03 -0700 Subject: [PATCH] Improve path formatter --- include/fmt/format.h | 24 ++++++++++++++++-------- include/fmt/std.h | 30 +++++++++++++++++++++++++----- test/std-test.cc | 11 ++++++----- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/include/fmt/format.h b/include/fmt/format.h index 16898c44..690dfb00 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -569,8 +569,8 @@ template ::value)> __attribute__((no_sanitize("undefined"))) #endif inline auto -reserve(std::back_insert_iterator it, size_t n) - -> typename Container::value_type* { +reserve(std::back_insert_iterator it, size_t n) -> + typename Container::value_type* { Container& c = get_container(it); size_t size = c.size(); c.resize(size + n); @@ -1251,7 +1251,7 @@ FMT_CONSTEXPR auto count_digits(UInt n) -> int { FMT_INLINE auto do_count_digits(uint32_t n) -> int { // An optimization by Kendall Willets from https://bit.ly/3uOIQrB. // This increments the upper 32 bits (log10(T) - 1) when >= T is added. -# define FMT_INC(T) (((sizeof(# T) - 1ull) << 32) - T) +# define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T) static constexpr uint64_t table[] = { FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8 FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64 @@ -1412,6 +1412,8 @@ class utf8_to_utf16 { auto str() const -> std::wstring { return {&buffer_[0], size()}; } }; +enum class to_utf8_error_policy { abort, replace }; + // A converter from UTF-16/UTF-32 (host endian) to UTF-8. template class to_utf8 { private: @@ -1440,16 +1442,20 @@ template class to_utf8 { buffer_.push_back(0); return true; } - static bool convert(Buffer& buf, basic_string_view s) { + static bool convert( + Buffer& buf, basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) { for (auto p = s.begin(); p != s.end(); ++p) { uint32_t c = static_cast(*p); if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) { - // surrogate pair + // Handle a surrogate pair. ++p; if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) { - return false; + if (policy == to_utf8_error_policy::abort) return false; + buf.append(string_view("�")); + } else { + c = (c << 10) + static_cast(*p) - 0x35fdc00; } - c = (c << 10) + static_cast(*p) - 0x35fdc00; } if (c < 0x80) { buf.push_back(static_cast(c)); @@ -4147,7 +4153,9 @@ template <> struct formatter { }; // group_digits_view is not derived from view because it copies the argument. -template struct group_digits_view { T value; }; +template struct group_digits_view { + T value; +}; /** \rst diff --git a/include/fmt/std.h b/include/fmt/std.h index 9aafee38..7ac81aea 100644 --- a/include/fmt/std.h +++ b/include/fmt/std.h @@ -61,22 +61,32 @@ FMT_BEGIN_NAMESPACE namespace detail { +template auto get_path_string(const std::filesystem::path& p) { + return p.string(); +} + template void write_escaped_path(basic_memory_buffer& quoted, const std::filesystem::path& p) { write_escaped_string(std::back_inserter(quoted), p.string()); } + # ifdef _WIN32 +template <> +auto get_path_string(const std::filesystem::path& p) { + return to_utf8(p.native()); +} + template <> inline void write_escaped_path(memory_buffer& quoted, const std::filesystem::path& p) { auto buf = basic_memory_buffer(); write_escaped_string(std::back_inserter(buf), p.native()); - // Convert UTF-16 to UTF-8. - if (!to_utf8::convert(quoted, {buf.data(), buf.size()})) - FMT_THROW(std::runtime_error("invalid utf16")); + bool valid = to_utf8::convert(quoted, {buf.data(), buf.size()}); + FMT_ASSERT(valid, "invalid utf16"); } -# endif +# endif // _WIN32 + template <> inline void write_escaped_path( basic_memory_buffer& quoted, @@ -92,8 +102,11 @@ template struct formatter { private: format_specs specs_; detail::arg_ref width_ref_; + bool debug_ = false; public: + FMT_CONSTEXPR void set_debug_format(bool set = true) { debug_ = set; } + template FMT_CONSTEXPR auto parse(ParseContext& ctx) { auto it = ctx.begin(), end = ctx.end(); if (it == end) return it; @@ -102,7 +115,10 @@ template struct formatter { if (it == end) return it; it = detail::parse_dynamic_spec(it, end, specs_.width, width_ref_, ctx); - if (it != end && *it == '?') ++it; + if (it != end && *it == '?') { + debug_ = true; + ++it; + } return it; } @@ -111,6 +127,10 @@ template struct formatter { auto specs = specs_; detail::handle_dynamic_spec(specs.width, width_ref_, ctx); + if (!debug_) { + auto s = detail::get_path_string(p); + return detail::write(ctx.out(), basic_string_view(s), specs); + } auto quoted = basic_memory_buffer(); detail::write_escaped_path(quoted, p); return detail::write(ctx.out(), diff --git a/test/std-test.cc b/test/std-test.cc index 78f86d5a..35867f05 100644 --- a/test/std-test.cc +++ b/test/std-test.cc @@ -19,9 +19,9 @@ using testing::StartsWith; #ifdef __cpp_lib_filesystem TEST(std_test, path) { - EXPECT_EQ(fmt::format("{:8}", std::filesystem::path("foo")), "\"foo\" "); + EXPECT_EQ(fmt::format("{:8}", std::filesystem::path("foo")), "foo "); EXPECT_EQ(fmt::format("{}", std::filesystem::path("foo\"bar.txt")), - "\"foo\\\"bar.txt\""); + "foo\"bar.txt"); EXPECT_EQ(fmt::format("{:?}", std::filesystem::path("foo\"bar.txt")), "\"foo\\\"bar.txt\""); @@ -29,8 +29,9 @@ TEST(std_test, path) { EXPECT_EQ(fmt::format("{}", std::filesystem::path( L"\x0428\x0447\x0443\x0447\x044B\x043D\x0448" L"\x0447\x044B\x043D\x0430")), - "\"Шчучыншчына\""); - EXPECT_EQ(fmt::format("{}", std::filesystem::path(L"\xd800")), "\"\\ud800\""); + "Шчучыншчына"); + EXPECT_EQ(fmt::format("{:?}", std::filesystem::path(L"\xd800")), + "\"\\ud800\""); # endif } @@ -39,7 +40,7 @@ TEST(ranges_std_test, format_vector_path) { auto p = std::filesystem::path("foo/bar.txt"); auto c = std::vector{"abc", "def"}; EXPECT_EQ(fmt::format("path={}, range={}", p, c), - "path=\"foo/bar.txt\", range=[\"abc\", \"def\"]"); + "path=foo/bar.txt, range=[\"abc\", \"def\"]"); } // Test that path is not escaped twice in the debug mode.