fixed issue handling malformed utf-8

This commit is contained in:
Mark Gillard 2020-10-10 00:30:12 +03:00
parent 6255dd765c
commit 33f7d732a1
4 changed files with 48 additions and 25 deletions

View File

@ -63,6 +63,7 @@ namespace
"[foo]\nbar.? = 'kek'"sv,
R"('''val''' = 1)"sv,
R"(a."""val""" = 1)"sv,
"1= 0x6cA#+\xf1"sv,
"########## values"sv,
"val = _"sv,

View File

@ -125,6 +125,15 @@ TOML_ANON_NAMESPACE_START
return cp.as_view();
}
[[nodiscard]]
TOML_INTERNAL_LINKAGE
std::string_view to_sv(const ::toml::impl::utf8_codepoint* cp) noexcept
{
if (cp)
return to_sv(*cp);
return ""sv;
}
template <typename T>
TOML_ATTR(nonnull)
TOML_INTERNAL_LINKAGE
@ -1579,7 +1588,7 @@ TOML_IMPL_NAMESPACE_START
// "YYYY"
uint32_t digits[4];
if (!consume_digit_sequence(digits, 4_sz))
set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
const auto year = digits[3]
+ digits[2] * 10u
+ digits[1] * 100u
@ -1594,7 +1603,7 @@ TOML_IMPL_NAMESPACE_START
// "MM"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
const auto month = digits[1] + digits[0] * 10u;
if (month == 0u || month > 12u)
set_error_and_return_default(
@ -1614,7 +1623,7 @@ TOML_IMPL_NAMESPACE_START
// "DD"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
const auto day = digits[1] + digits[0] * 10u;
if (day == 0u || day > max_days_in_month)
set_error_and_return_default(
@ -1646,7 +1655,7 @@ TOML_IMPL_NAMESPACE_START
// "HH"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10u;
if (hour > 23u)
set_error_and_return_default(
@ -1661,7 +1670,7 @@ TOML_IMPL_NAMESPACE_START
// "MM"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10u;
if (minute > 59u)
set_error_and_return_default(
@ -1688,7 +1697,7 @@ TOML_IMPL_NAMESPACE_START
// "SS"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
const auto second = digits[1] + digits[0] * 10u;
if (second > 59u)
set_error_and_return_default(
@ -1778,7 +1787,7 @@ TOML_IMPL_NAMESPACE_START
// "HH"
int digits[2];
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10;
if (hour > 23)
set_error_and_return_default(
@ -1793,7 +1802,7 @@ TOML_IMPL_NAMESPACE_START
// "MM"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10;
if (minute > 59)
set_error_and_return_default(
@ -2517,7 +2526,7 @@ TOML_IMPL_NAMESPACE_START
// handle the rest of the line after the header
consume_leading_whitespace();
if (!is_eof() && !consume_comment() && !consume_line_break())
set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
}
TOML_ASSERT(!key.segments.empty());
@ -2711,7 +2720,7 @@ TOML_IMPL_NAMESPACE_START
|| consume_comment())
continue;
return_if_error_or_eof();
return_if_error();
// [tables]
// [[table array]]
@ -2732,12 +2741,12 @@ TOML_IMPL_NAMESPACE_START
consume_leading_whitespace();
return_if_error();
if (!is_eof() && !consume_comment() && !consume_line_break())
set_error("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv);
set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
}
else // ??
set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(*cp), "'"sv);
set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
}
while (!is_eof());

View File

@ -97,8 +97,12 @@ TEST_CASE("user feedback")
// - a malformed UTF-8 sequence
//
// it should fail to parse, but correctly issue an error (not crash!)
parsing_should_fail(FILE_LINE_ARGS, "#\xf1\x63");
// a malformed UTF-8 sequence during a KVP
//
// it should fail to parse, but correctly issue an error (not crash!)
parsing_should_fail(FILE_LINE_ARGS, "1= 0x6cA#+\xf1");
}
}

View File

@ -8842,6 +8842,15 @@ TOML_ANON_NAMESPACE_START
return cp.as_view();
}
[[nodiscard]]
TOML_INTERNAL_LINKAGE
std::string_view to_sv(const ::toml::impl::utf8_codepoint* cp) noexcept
{
if (cp)
return to_sv(*cp);
return ""sv;
}
template <typename T>
TOML_ATTR(nonnull)
TOML_INTERNAL_LINKAGE
@ -10296,7 +10305,7 @@ TOML_IMPL_NAMESPACE_START
// "YYYY"
uint32_t digits[4];
if (!consume_digit_sequence(digits, 4_sz))
set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
const auto year = digits[3]
+ digits[2] * 10u
+ digits[1] * 100u
@ -10311,7 +10320,7 @@ TOML_IMPL_NAMESPACE_START
// "MM"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
const auto month = digits[1] + digits[0] * 10u;
if (month == 0u || month > 12u)
set_error_and_return_default(
@ -10331,7 +10340,7 @@ TOML_IMPL_NAMESPACE_START
// "DD"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
const auto day = digits[1] + digits[0] * 10u;
if (day == 0u || day > max_days_in_month)
set_error_and_return_default(
@ -10363,7 +10372,7 @@ TOML_IMPL_NAMESPACE_START
// "HH"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10u;
if (hour > 23u)
set_error_and_return_default(
@ -10378,7 +10387,7 @@ TOML_IMPL_NAMESPACE_START
// "MM"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10u;
if (minute > 59u)
set_error_and_return_default(
@ -10405,7 +10414,7 @@ TOML_IMPL_NAMESPACE_START
// "SS"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
const auto second = digits[1] + digits[0] * 10u;
if (second > 59u)
set_error_and_return_default(
@ -10494,7 +10503,7 @@ TOML_IMPL_NAMESPACE_START
// "HH"
int digits[2];
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
const auto hour = digits[1] + digits[0] * 10;
if (hour > 23)
set_error_and_return_default(
@ -10509,7 +10518,7 @@ TOML_IMPL_NAMESPACE_START
// "MM"
if (!consume_digit_sequence(digits, 2_sz))
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
const auto minute = digits[1] + digits[0] * 10;
if (minute > 59)
set_error_and_return_default(
@ -11232,7 +11241,7 @@ TOML_IMPL_NAMESPACE_START
// handle the rest of the line after the header
consume_leading_whitespace();
if (!is_eof() && !consume_comment() && !consume_line_break())
set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv);
set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
}
TOML_ASSERT(!key.segments.empty());
@ -11425,7 +11434,7 @@ TOML_IMPL_NAMESPACE_START
|| consume_comment())
continue;
return_if_error_or_eof();
return_if_error();
// [tables]
// [[table array]]
@ -11446,11 +11455,11 @@ TOML_IMPL_NAMESPACE_START
consume_leading_whitespace();
return_if_error();
if (!is_eof() && !consume_comment() && !consume_line_break())
set_error("expected a comment or whitespace, saw '"sv, to_sv(*cp), "'"sv);
set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
}
else // ??
set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(*cp), "'"sv);
set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
}
while (!is_eof());