From 33f7d732a17990c147d878f6fb93686bf17553a2 Mon Sep 17 00:00:00 2001 From: Mark Gillard Date: Sat, 10 Oct 2020 00:30:12 +0300 Subject: [PATCH] fixed issue handling malformed utf-8 --- examples/error_printer.cpp | 1 + include/toml++/toml_parser.hpp | 33 +++++++++++++++++++++------------ tests/user_feedback.cpp | 6 +++++- toml.hpp | 33 +++++++++++++++++++++------------ 4 files changed, 48 insertions(+), 25 deletions(-) diff --git a/examples/error_printer.cpp b/examples/error_printer.cpp index 6f4f009..8366f18 100644 --- a/examples/error_printer.cpp +++ b/examples/error_printer.cpp @@ -63,6 +63,7 @@ namespace "[foo]\nbar.? = 'kek'"sv, R"('''val''' = 1)"sv, R"(a."""val""" = 1)"sv, + "1= 0x6cA#+\xf1"sv, "########## values"sv, "val = _"sv, diff --git a/include/toml++/toml_parser.hpp b/include/toml++/toml_parser.hpp index ecbdd3a..aa868e9 100644 --- a/include/toml++/toml_parser.hpp +++ b/include/toml++/toml_parser.hpp @@ -125,6 +125,15 @@ TOML_ANON_NAMESPACE_START return cp.as_view(); } + [[nodiscard]] + TOML_INTERNAL_LINKAGE + std::string_view to_sv(const ::toml::impl::utf8_codepoint* cp) noexcept + { + if (cp) + return to_sv(*cp); + return ""sv; + } + template TOML_ATTR(nonnull) TOML_INTERNAL_LINKAGE @@ -1579,7 +1588,7 @@ TOML_IMPL_NAMESPACE_START // "YYYY" uint32_t digits[4]; if (!consume_digit_sequence(digits, 4_sz)) - set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv); const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u @@ -1594,7 +1603,7 @@ TOML_IMPL_NAMESPACE_START // "MM" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv); const auto month = digits[1] + digits[0] * 10u; if (month == 0u || month > 12u) set_error_and_return_default( @@ -1614,7 +1623,7 @@ TOML_IMPL_NAMESPACE_START // "DD" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv); const auto day = digits[1] + digits[0] * 10u; if (day == 0u || day > max_days_in_month) set_error_and_return_default( @@ -1646,7 +1655,7 @@ TOML_IMPL_NAMESPACE_START // "HH" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); const auto hour = digits[1] + digits[0] * 10u; if (hour > 23u) set_error_and_return_default( @@ -1661,7 +1670,7 @@ TOML_IMPL_NAMESPACE_START // "MM" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); const auto minute = digits[1] + digits[0] * 10u; if (minute > 59u) set_error_and_return_default( @@ -1688,7 +1697,7 @@ TOML_IMPL_NAMESPACE_START // "SS" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv); const auto second = digits[1] + digits[0] * 10u; if (second > 59u) set_error_and_return_default( @@ -1778,7 +1787,7 @@ TOML_IMPL_NAMESPACE_START // "HH" int digits[2]; if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); const auto hour = digits[1] + digits[0] * 10; if (hour > 23) set_error_and_return_default( @@ -1793,7 +1802,7 @@ TOML_IMPL_NAMESPACE_START // "MM" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); const auto minute = digits[1] + digits[0] * 10; if (minute > 59) set_error_and_return_default( @@ -2517,7 +2526,7 @@ TOML_IMPL_NAMESPACE_START // handle the rest of the line after the header consume_leading_whitespace(); if (!is_eof() && !consume_comment() && !consume_line_break()) - set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); } TOML_ASSERT(!key.segments.empty()); @@ -2711,7 +2720,7 @@ TOML_IMPL_NAMESPACE_START || consume_comment()) continue; - return_if_error_or_eof(); + return_if_error(); // [tables] // [[table array]] @@ -2732,12 +2741,12 @@ TOML_IMPL_NAMESPACE_START consume_leading_whitespace(); return_if_error(); if (!is_eof() && !consume_comment() && !consume_line_break()) - set_error("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); + set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); } else // ?? - set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(*cp), "'"sv); + set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv); } while (!is_eof()); diff --git a/tests/user_feedback.cpp b/tests/user_feedback.cpp index 05f3009..59a15bc 100644 --- a/tests/user_feedback.cpp +++ b/tests/user_feedback.cpp @@ -97,8 +97,12 @@ TEST_CASE("user feedback") // - a malformed UTF-8 sequence // // it should fail to parse, but correctly issue an error (not crash!) - parsing_should_fail(FILE_LINE_ARGS, "#\xf1\x63"); + + // a malformed UTF-8 sequence during a KVP + // + // it should fail to parse, but correctly issue an error (not crash!) + parsing_should_fail(FILE_LINE_ARGS, "1= 0x6cA#+\xf1"); } } diff --git a/toml.hpp b/toml.hpp index 980ede4..79b3d52 100644 --- a/toml.hpp +++ b/toml.hpp @@ -8842,6 +8842,15 @@ TOML_ANON_NAMESPACE_START return cp.as_view(); } + [[nodiscard]] + TOML_INTERNAL_LINKAGE + std::string_view to_sv(const ::toml::impl::utf8_codepoint* cp) noexcept + { + if (cp) + return to_sv(*cp); + return ""sv; + } + template TOML_ATTR(nonnull) TOML_INTERNAL_LINKAGE @@ -10296,7 +10305,7 @@ TOML_IMPL_NAMESPACE_START // "YYYY" uint32_t digits[4]; if (!consume_digit_sequence(digits, 4_sz)) - set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv); const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u @@ -10311,7 +10320,7 @@ TOML_IMPL_NAMESPACE_START // "MM" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv); const auto month = digits[1] + digits[0] * 10u; if (month == 0u || month > 12u) set_error_and_return_default( @@ -10331,7 +10340,7 @@ TOML_IMPL_NAMESPACE_START // "DD" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv); const auto day = digits[1] + digits[0] * 10u; if (day == 0u || day > max_days_in_month) set_error_and_return_default( @@ -10363,7 +10372,7 @@ TOML_IMPL_NAMESPACE_START // "HH" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); const auto hour = digits[1] + digits[0] * 10u; if (hour > 23u) set_error_and_return_default( @@ -10378,7 +10387,7 @@ TOML_IMPL_NAMESPACE_START // "MM" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); const auto minute = digits[1] + digits[0] * 10u; if (minute > 59u) set_error_and_return_default( @@ -10405,7 +10414,7 @@ TOML_IMPL_NAMESPACE_START // "SS" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv); const auto second = digits[1] + digits[0] * 10u; if (second > 59u) set_error_and_return_default( @@ -10494,7 +10503,7 @@ TOML_IMPL_NAMESPACE_START // "HH" int digits[2]; if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv); const auto hour = digits[1] + digits[0] * 10; if (hour > 23) set_error_and_return_default( @@ -10509,7 +10518,7 @@ TOML_IMPL_NAMESPACE_START // "MM" if (!consume_digit_sequence(digits, 2_sz)) - set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv); const auto minute = digits[1] + digits[0] * 10; if (minute > 59) set_error_and_return_default( @@ -11232,7 +11241,7 @@ TOML_IMPL_NAMESPACE_START // handle the rest of the line after the header consume_leading_whitespace(); if (!is_eof() && !consume_comment() && !consume_line_break()) - set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); } TOML_ASSERT(!key.segments.empty()); @@ -11425,7 +11434,7 @@ TOML_IMPL_NAMESPACE_START || consume_comment()) continue; - return_if_error_or_eof(); + return_if_error(); // [tables] // [[table array]] @@ -11446,11 +11455,11 @@ TOML_IMPL_NAMESPACE_START consume_leading_whitespace(); return_if_error(); if (!is_eof() && !consume_comment() && !consume_line_break()) - set_error("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); + set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv); } else // ?? - set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(*cp), "'"sv); + set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv); } while (!is_eof());