fixed issue handling malformed utf-8

This commit is contained in:
Mark Gillard 2020-10-10 00:30:12 +03:00
parent 6255dd765c
commit 33f7d732a1
4 changed files with 48 additions and 25 deletions

View File

@ -63,6 +63,7 @@ namespace
"[foo]\nbar.? = 'kek'"sv, "[foo]\nbar.? = 'kek'"sv,
R"('''val''' = 1)"sv, R"('''val''' = 1)"sv,
R"(a."""val""" = 1)"sv, R"(a."""val""" = 1)"sv,
"1= 0x6cA#+\xf1"sv,
"########## values"sv, "########## values"sv,
"val = _"sv, "val = _"sv,

View File

@ -125,6 +125,15 @@ TOML_ANON_NAMESPACE_START
return cp.as_view(); return cp.as_view();
} }
[[nodiscard]]
TOML_INTERNAL_LINKAGE
std::string_view to_sv(const ::toml::impl::utf8_codepoint* cp) noexcept
{
if (cp)
return to_sv(*cp);
return ""sv;
}
template <typename T> template <typename T>
TOML_ATTR(nonnull) TOML_ATTR(nonnull)
TOML_INTERNAL_LINKAGE TOML_INTERNAL_LINKAGE
@ -1579,7 +1588,7 @@ TOML_IMPL_NAMESPACE_START
// "YYYY" // "YYYY"
uint32_t digits[4]; uint32_t digits[4];
if (!consume_digit_sequence(digits, 4_sz)) if (!consume_digit_sequence(digits, 4_sz))
set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
const auto year = digits[3] const auto year = digits[3]
+ digits[2] * 10u + digits[2] * 10u
+ digits[1] * 100u + digits[1] * 100u
@ -1594,7 +1603,7 @@ TOML_IMPL_NAMESPACE_START
// "MM" // "MM"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
const auto month = digits[1] + digits[0] * 10u; const auto month = digits[1] + digits[0] * 10u;
if (month == 0u || month > 12u) if (month == 0u || month > 12u)
set_error_and_return_default( set_error_and_return_default(
@ -1614,7 +1623,7 @@ TOML_IMPL_NAMESPACE_START
// "DD" // "DD"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
const auto day = digits[1] + digits[0] * 10u; const auto day = digits[1] + digits[0] * 10u;
if (day == 0u || day > max_days_in_month) if (day == 0u || day > max_days_in_month)
set_error_and_return_default( set_error_and_return_default(
@ -1646,7 +1655,7 @@ TOML_IMPL_NAMESPACE_START
// "HH" // "HH"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10u; const auto hour = digits[1] + digits[0] * 10u;
if (hour > 23u) if (hour > 23u)
set_error_and_return_default( set_error_and_return_default(
@ -1661,7 +1670,7 @@ TOML_IMPL_NAMESPACE_START
// "MM" // "MM"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10u; const auto minute = digits[1] + digits[0] * 10u;
if (minute > 59u) if (minute > 59u)
set_error_and_return_default( set_error_and_return_default(
@ -1688,7 +1697,7 @@ TOML_IMPL_NAMESPACE_START
// "SS" // "SS"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
const auto second = digits[1] + digits[0] * 10u; const auto second = digits[1] + digits[0] * 10u;
if (second > 59u) if (second > 59u)
set_error_and_return_default( set_error_and_return_default(
@ -1778,7 +1787,7 @@ TOML_IMPL_NAMESPACE_START
// "HH" // "HH"
int digits[2]; int digits[2];
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10; const auto hour = digits[1] + digits[0] * 10;
if (hour > 23) if (hour > 23)
set_error_and_return_default( set_error_and_return_default(
@ -1793,7 +1802,7 @@ TOML_IMPL_NAMESPACE_START
// "MM" // "MM"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10; const auto minute = digits[1] + digits[0] * 10;
if (minute > 59) if (minute > 59)
set_error_and_return_default( set_error_and_return_default(
@ -2517,7 +2526,7 @@ TOML_IMPL_NAMESPACE_START
// handle the rest of the line after the header // handle the rest of the line after the header
consume_leading_whitespace(); consume_leading_whitespace();
if (!is_eof() && !consume_comment() && !consume_line_break()) if (!is_eof() && !consume_comment() && !consume_line_break())
set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
} }
TOML_ASSERT(!key.segments.empty()); TOML_ASSERT(!key.segments.empty());
@ -2711,7 +2720,7 @@ TOML_IMPL_NAMESPACE_START
|| consume_comment()) || consume_comment())
continue; continue;
return_if_error_or_eof(); return_if_error();
// [tables] // [tables]
// [[table array]] // [[table array]]
@ -2732,12 +2741,12 @@ TOML_IMPL_NAMESPACE_START
consume_leading_whitespace(); consume_leading_whitespace();
return_if_error(); return_if_error();
if (!is_eof() && !consume_comment() && !consume_line_break()) if (!is_eof() && !consume_comment() && !consume_line_break())
set_error("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
} }
else // ?? else // ??
set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(*cp), "'"sv); set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
} }
while (!is_eof()); while (!is_eof());

View File

@ -97,8 +97,12 @@ TEST_CASE("user feedback")
// - a malformed UTF-8 sequence // - a malformed UTF-8 sequence
// //
// it should fail to parse, but correctly issue an error (not crash!) // it should fail to parse, but correctly issue an error (not crash!)
parsing_should_fail(FILE_LINE_ARGS, "#\xf1\x63"); parsing_should_fail(FILE_LINE_ARGS, "#\xf1\x63");
// a malformed UTF-8 sequence during a KVP
//
// it should fail to parse, but correctly issue an error (not crash!)
parsing_should_fail(FILE_LINE_ARGS, "1= 0x6cA#+\xf1");
} }
} }

View File

@ -8842,6 +8842,15 @@ TOML_ANON_NAMESPACE_START
return cp.as_view(); return cp.as_view();
} }
[[nodiscard]]
TOML_INTERNAL_LINKAGE
std::string_view to_sv(const ::toml::impl::utf8_codepoint* cp) noexcept
{
if (cp)
return to_sv(*cp);
return ""sv;
}
template <typename T> template <typename T>
TOML_ATTR(nonnull) TOML_ATTR(nonnull)
TOML_INTERNAL_LINKAGE TOML_INTERNAL_LINKAGE
@ -10296,7 +10305,7 @@ TOML_IMPL_NAMESPACE_START
// "YYYY" // "YYYY"
uint32_t digits[4]; uint32_t digits[4];
if (!consume_digit_sequence(digits, 4_sz)) if (!consume_digit_sequence(digits, 4_sz))
set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
const auto year = digits[3] const auto year = digits[3]
+ digits[2] * 10u + digits[2] * 10u
+ digits[1] * 100u + digits[1] * 100u
@ -10311,7 +10320,7 @@ TOML_IMPL_NAMESPACE_START
// "MM" // "MM"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
const auto month = digits[1] + digits[0] * 10u; const auto month = digits[1] + digits[0] * 10u;
if (month == 0u || month > 12u) if (month == 0u || month > 12u)
set_error_and_return_default( set_error_and_return_default(
@ -10331,7 +10340,7 @@ TOML_IMPL_NAMESPACE_START
// "DD" // "DD"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
const auto day = digits[1] + digits[0] * 10u; const auto day = digits[1] + digits[0] * 10u;
if (day == 0u || day > max_days_in_month) if (day == 0u || day > max_days_in_month)
set_error_and_return_default( set_error_and_return_default(
@ -10363,7 +10372,7 @@ TOML_IMPL_NAMESPACE_START
// "HH" // "HH"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10u; const auto hour = digits[1] + digits[0] * 10u;
if (hour > 23u) if (hour > 23u)
set_error_and_return_default( set_error_and_return_default(
@ -10378,7 +10387,7 @@ TOML_IMPL_NAMESPACE_START
// "MM" // "MM"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10u; const auto minute = digits[1] + digits[0] * 10u;
if (minute > 59u) if (minute > 59u)
set_error_and_return_default( set_error_and_return_default(
@ -10405,7 +10414,7 @@ TOML_IMPL_NAMESPACE_START
// "SS" // "SS"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
const auto second = digits[1] + digits[0] * 10u; const auto second = digits[1] + digits[0] * 10u;
if (second > 59u) if (second > 59u)
set_error_and_return_default( set_error_and_return_default(
@ -10494,7 +10503,7 @@ TOML_IMPL_NAMESPACE_START
// "HH" // "HH"
int digits[2]; int digits[2];
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10; const auto hour = digits[1] + digits[0] * 10;
if (hour > 23) if (hour > 23)
set_error_and_return_default( set_error_and_return_default(
@ -10509,7 +10518,7 @@ TOML_IMPL_NAMESPACE_START
// "MM" // "MM"
if (!consume_digit_sequence(digits, 2_sz)) if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10; const auto minute = digits[1] + digits[0] * 10;
if (minute > 59) if (minute > 59)
set_error_and_return_default( set_error_and_return_default(
@ -11232,7 +11241,7 @@ TOML_IMPL_NAMESPACE_START
// handle the rest of the line after the header // handle the rest of the line after the header
consume_leading_whitespace(); consume_leading_whitespace();
if (!is_eof() && !consume_comment() && !consume_line_break()) if (!is_eof() && !consume_comment() && !consume_line_break())
set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
} }
TOML_ASSERT(!key.segments.empty()); TOML_ASSERT(!key.segments.empty());
@ -11425,7 +11434,7 @@ TOML_IMPL_NAMESPACE_START
|| consume_comment()) || consume_comment())
continue; continue;
return_if_error_or_eof(); return_if_error();
// [tables] // [tables]
// [[table array]] // [[table array]]
@ -11446,11 +11455,11 @@ TOML_IMPL_NAMESPACE_START
consume_leading_whitespace(); consume_leading_whitespace();
return_if_error(); return_if_error();
if (!is_eof() && !consume_comment() && !consume_line_break()) if (!is_eof() && !consume_comment() && !consume_line_break())
set_error("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv); set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
} }
else // ?? else // ??
set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(*cp), "'"sv); set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
} }
while (!is_eof()); while (!is_eof());