fixed integer overflow issues introduced in 3f4a540

closes #134
This commit is contained in:
Mark Gillard 2022-01-08 15:17:12 +02:00
parent 3f4a540ca6
commit f6ce95907e
5 changed files with 233 additions and 140 deletions

View File

@ -213,6 +213,7 @@ UTF-8 decoding is performed using a state machine based on Bjoern Hoehrmann's '[
- **[@bobfang1992](https://github.com/bobfang1992)** - Reported a bug and created a [wrapper in python](https://github.com/bobfang1992/pytomlpp)
- **[@GiulioRomualdi](https://github.com/GiulioRomualdi)** - Added cmake+meson support
- **[@levicki](https://github.com/levicki)** - Helped design some new features
- **[@moorereason](https://github.com/moorereason)** - Reported a whole bunch of bugs
- **[@mosra](https://github.com/mosra)** - Created the awesome [m.css] used to generate the API docs
- **[@ned14](https://github.com/ned14)** - Reported a bunch of bugs and helped design some new features
- **[@okureta](https://github.com/okureta)** - Reported a bug

View File

@ -93,9 +93,11 @@ namespace
R"(val = -+1)"sv,
R"(val = 1_0_)"sv,
R"(val = 1_0_ )"sv,
R"(val = 999999999999999999999999999999999999 )"sv,
R"(val = 9223372036854775808 )"sv,
R"(val = 01 )"sv,
R"(val = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 )"sv,
R"(val = 0o1000000000000000000000 )"sv,
R"(val = 9223372036854775808 )"sv,
R"(val = 0x8000000000000000 )"sv,
"########## floats"sv,
R"(val = 100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0)"sv,

View File

@ -619,40 +619,44 @@ TOML_ANON_NAMESPACE_START
template <>
struct parse_integer_traits<2>
{
static constexpr auto scope_qualifier = "binary integer"sv;
static constexpr auto is_digit = impl::is_binary_digit;
static constexpr auto is_signed = false;
static constexpr auto min_buffer_length = 63;
static constexpr auto prefix_codepoint = U'b';
static constexpr auto prefix = "b"sv;
static constexpr auto scope_qualifier = "binary integer"sv;
static constexpr auto is_digit = impl::is_binary_digit;
static constexpr auto is_signed = false;
static constexpr auto max_digits = 63;
static constexpr auto prefix_codepoint = U'b';
static constexpr auto prefix = "b"sv;
static constexpr auto full_prefix = "0b"sv;
};
template <>
struct parse_integer_traits<8>
{
static constexpr auto scope_qualifier = "octal integer"sv;
static constexpr auto is_digit = impl::is_octal_digit;
static constexpr auto is_signed = false;
static constexpr auto min_buffer_length = 21; // strlen("777777777777777777777")
static constexpr auto prefix_codepoint = U'o';
static constexpr auto prefix = "o"sv;
static constexpr auto scope_qualifier = "octal integer"sv;
static constexpr auto is_digit = impl::is_octal_digit;
static constexpr auto is_signed = false;
static constexpr auto max_digits = 21; // strlen("777777777777777777777")
static constexpr auto prefix_codepoint = U'o';
static constexpr auto prefix = "o"sv;
static constexpr auto full_prefix = "0o"sv;
};
template <>
struct parse_integer_traits<10>
{
static constexpr auto scope_qualifier = "decimal integer"sv;
static constexpr auto is_digit = impl::is_decimal_digit;
static constexpr auto is_signed = true;
static constexpr auto min_buffer_length = 19; // strlen("9223372036854775807")
static constexpr auto scope_qualifier = "decimal integer"sv;
static constexpr auto is_digit = impl::is_decimal_digit;
static constexpr auto is_signed = true;
static constexpr auto max_digits = 19; // strlen("9223372036854775807")
static constexpr auto full_prefix = ""sv;
};
template <>
struct parse_integer_traits<16>
{
static constexpr auto scope_qualifier = "hexadecimal integer"sv;
static constexpr auto is_digit = impl::is_hexadecimal_digit;
static constexpr auto is_signed = false;
static constexpr auto min_buffer_length = 16; // strlen("7FFFFFFFFFFFFFFF")
static constexpr auto prefix_codepoint = U'x';
static constexpr auto prefix = "x"sv;
static constexpr auto scope_qualifier = "hexadecimal integer"sv;
static constexpr auto is_digit = impl::is_hexadecimal_digit;
static constexpr auto is_signed = false;
static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF")
static constexpr auto prefix_codepoint = U'x';
static constexpr auto prefix = "x"sv;
static constexpr auto full_prefix = "0x"sv;
};
TOML_PURE_GETTER
@ -1758,12 +1762,6 @@ TOML_IMPL_NAMESPACE_START
char first_integer_part = '\0';
while (!is_eof() && !is_value_terminator(*cp))
{
if TOML_UNLIKELY(length == sizeof(chars))
set_error_and_return_default("exceeds maximum length of "sv,
static_cast<uint64_t>(sizeof(chars)),
" characters"sv,
(seen_exponent ? ""sv : " (consider using exponent notation)"sv));
if (*cp == U'_')
{
if (!prev || !is_decimal_digit(*prev))
@ -1773,8 +1771,13 @@ TOML_IMPL_NAMESPACE_START
advance_and_return_if_error_or_eof({});
continue;
}
else if (prev && *prev == U'_' && !is_decimal_digit(*cp))
else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp))
set_error_and_return_default("underscores must be followed by digits"sv);
else if TOML_UNLIKELY(length == sizeof(chars))
set_error_and_return_default("exceeds length limit of "sv,
static_cast<uint64_t>(sizeof(chars)),
" digits"sv,
(seen_exponent ? ""sv : " (consider using exponent notation)"sv));
else if (*cp == U'.')
{
// .1
@ -2109,10 +2112,8 @@ TOML_IMPL_NAMESPACE_START
set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
}
// consume value chars
static constexpr size_t underscore_padding = 32;
static constexpr size_t buffer_length = traits::min_buffer_length + underscore_padding;
char chars[buffer_length];
// consume digits
char digits[utf8_buffered_reader::max_history_length];
size_t length = {};
const utf8_codepoint* prev = {};
while (!is_eof() && !is_value_terminator(*cp))
@ -2126,16 +2127,16 @@ TOML_IMPL_NAMESPACE_START
advance_and_return_if_error_or_eof({});
continue;
}
else if (prev && *prev == U'_' && !traits::is_digit(*cp))
else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp))
set_error_and_return_default("underscores must be followed by digits"sv);
else if (!traits::is_digit(*cp))
else if TOML_UNLIKELY(!traits::is_digit(*cp))
set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
else if (length == sizeof(chars))
set_error_and_return_default("exceeds maximum length of "sv,
static_cast<uint64_t>(sizeof(chars)),
" characters"sv);
else if TOML_UNLIKELY(length == sizeof(digits))
set_error_and_return_default("exceeds length limit of "sv,
static_cast<uint64_t>(sizeof(digits)),
" digits"sv);
else
chars[length++] = static_cast<char>(cp->bytes[0]);
digits[length++] = static_cast<char>(cp->bytes[0]);
prev = cp;
advance_and_return_if_error({});
@ -2148,52 +2149,75 @@ TOML_IMPL_NAMESPACE_START
set_error_and_return_default("underscores must be followed by digits"sv);
}
// check for leading zeroes
if constexpr (base == 10)
{
if (chars[0] == '0')
set_error_and_return_default("leading zeroes are prohibited"sv);
}
// single digits can be converted trivially
if (length == 1u)
{
int64_t result;
if constexpr (base == 16)
return static_cast<int64_t>(hex_to_dec(chars[0]));
else if constexpr (base <= 10)
return static_cast<int64_t>(chars[0] - '0');
result = static_cast<int64_t>(hex_to_dec(digits[0]));
else
result = static_cast<int64_t>(digits[0] - '0');
if constexpr (traits::is_signed)
result *= sign;
return result;
}
// otherwise do the thing
uint64_t result = {};
// bin, oct and hex allow leading zeroes so trim them first
const char* end = digits + length;
const char* msd = digits;
if constexpr (base != 10)
{
const char* msd = chars;
const char* end = msd + length;
while (msd < end && *msd == '0')
msd++;
if (msd == end)
return 0ll;
uint64_t power = 1;
while (--end >= msd)
{
if constexpr (base == 16)
result += power * hex_to_dec(*end);
else
result += power * static_cast<uint64_t>(*end - '0');
power *= base;
}
}
// decimal integers do not allow leading zeroes
else
{
if TOML_UNLIKELY(digits[0] == '0')
set_error_and_return_default("leading zeroes are prohibited"sv);
}
// range check
if (result > static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()) + (sign < 0 ? 1ull : 0ull))
if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits)
set_error_and_return_default("'"sv,
std::string_view{ chars, length },
traits::full_prefix,
std::string_view{ digits, length },
"' is not representable in 64 bits"sv);
if constexpr (traits::is_signed)
return static_cast<int64_t>(result) * sign;
else
return static_cast<int64_t>(result);
// do the thing
{
uint64_t result = {};
{
uint64_t power = 1;
while (--end >= msd)
{
if constexpr (base == 16)
result += power * hex_to_dec(*end);
else
result += power * static_cast<uint64_t>(*end - '0');
power *= base;
}
}
// range check
if TOML_UNLIKELY(result > static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()) + (sign < 0 ? 1ull : 0ull))
set_error_and_return_default("'"sv,
traits::full_prefix,
std::string_view{ digits, length },
"' is not representable in 64 bits"sv);
if constexpr (traits::is_signed)
return static_cast<int64_t>(result) * sign;
else
return static_cast<int64_t>(result);
}
}
TOML_NODISCARD

View File

@ -260,4 +260,46 @@ b = []
{
parsing_should_fail(FILE_LINE_ARGS, "#\r"sv);
}
SECTION("github/issues/134") // https://github.com/marzer/tomlplusplus/issues/134
{
// binary
parsing_should_fail(
FILE_LINE_ARGS,
"val = 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111"sv); // uint64_t
// max
parsing_should_fail(
FILE_LINE_ARGS,
"val = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000"sv); // int64_t
// max
// + 1
parse_expected_value(FILE_LINE_ARGS,
"0b01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111"sv,
INT64_MAX); // int64_t max
// octal
parsing_should_fail(FILE_LINE_ARGS, " val = 0o1777777777777777777777"sv); // uint64_t max
parsing_should_fail(FILE_LINE_ARGS, " val = 0o1000000000000000000000"sv); // int64_t max + 1
parse_expected_value(FILE_LINE_ARGS, " 0o0777777777777777777777"sv, INT64_MAX);
// decimal
parsing_should_fail(FILE_LINE_ARGS, " val = 100000000000000000000"sv);
parsing_should_fail(FILE_LINE_ARGS, " val = 18446744073709551615"sv); // uint64_t max
parsing_should_fail(FILE_LINE_ARGS, " val = 10000000000000000000"sv);
parsing_should_fail(FILE_LINE_ARGS, " val = 9999999999999999999"sv);
parsing_should_fail(FILE_LINE_ARGS, " val = 9223372036854775808"sv); // int64_t max + 1
parse_expected_value(FILE_LINE_ARGS, " 9223372036854775807"sv, INT64_MAX);
parse_expected_value(FILE_LINE_ARGS, " 1000000000000000000"sv, 1000000000000000000LL);
parse_expected_value(FILE_LINE_ARGS, " -1000000000000000000"sv, -1000000000000000000LL);
parse_expected_value(FILE_LINE_ARGS, " -9223372036854775808"sv, INT64_MIN);
parsing_should_fail(FILE_LINE_ARGS, " val = -9223372036854775809"sv); // int64_t min - 1
parsing_should_fail(FILE_LINE_ARGS, " val = -10000000000000000000"sv);
parsing_should_fail(FILE_LINE_ARGS, " val = -18446744073709551615"sv); // -(uint64_t max)
parsing_should_fail(FILE_LINE_ARGS, " val = -100000000000000000000"sv);
// hexadecimal
parsing_should_fail(FILE_LINE_ARGS, " val = 0xFFFFFFFFFFFFFFFF"sv); // uint64_t max
parsing_should_fail(FILE_LINE_ARGS, " val = 0x8000000000000000"sv); // int64_t max + 1
parse_expected_value(FILE_LINE_ARGS, " 0x7FFFFFFFFFFFFFFF"sv, INT64_MAX);
}
}

162
toml.hpp
View File

@ -11617,40 +11617,44 @@ TOML_ANON_NAMESPACE_START
template <>
struct parse_integer_traits<2>
{
static constexpr auto scope_qualifier = "binary integer"sv;
static constexpr auto is_digit = impl::is_binary_digit;
static constexpr auto is_signed = false;
static constexpr auto min_buffer_length = 63;
static constexpr auto prefix_codepoint = U'b';
static constexpr auto prefix = "b"sv;
static constexpr auto scope_qualifier = "binary integer"sv;
static constexpr auto is_digit = impl::is_binary_digit;
static constexpr auto is_signed = false;
static constexpr auto max_digits = 63;
static constexpr auto prefix_codepoint = U'b';
static constexpr auto prefix = "b"sv;
static constexpr auto full_prefix = "0b"sv;
};
template <>
struct parse_integer_traits<8>
{
static constexpr auto scope_qualifier = "octal integer"sv;
static constexpr auto is_digit = impl::is_octal_digit;
static constexpr auto is_signed = false;
static constexpr auto min_buffer_length = 21; // strlen("777777777777777777777")
static constexpr auto prefix_codepoint = U'o';
static constexpr auto prefix = "o"sv;
static constexpr auto scope_qualifier = "octal integer"sv;
static constexpr auto is_digit = impl::is_octal_digit;
static constexpr auto is_signed = false;
static constexpr auto max_digits = 21; // strlen("777777777777777777777")
static constexpr auto prefix_codepoint = U'o';
static constexpr auto prefix = "o"sv;
static constexpr auto full_prefix = "0o"sv;
};
template <>
struct parse_integer_traits<10>
{
static constexpr auto scope_qualifier = "decimal integer"sv;
static constexpr auto is_digit = impl::is_decimal_digit;
static constexpr auto is_signed = true;
static constexpr auto min_buffer_length = 19; // strlen("9223372036854775807")
static constexpr auto scope_qualifier = "decimal integer"sv;
static constexpr auto is_digit = impl::is_decimal_digit;
static constexpr auto is_signed = true;
static constexpr auto max_digits = 19; // strlen("9223372036854775807")
static constexpr auto full_prefix = ""sv;
};
template <>
struct parse_integer_traits<16>
{
static constexpr auto scope_qualifier = "hexadecimal integer"sv;
static constexpr auto is_digit = impl::is_hexadecimal_digit;
static constexpr auto is_signed = false;
static constexpr auto min_buffer_length = 16; // strlen("7FFFFFFFFFFFFFFF")
static constexpr auto prefix_codepoint = U'x';
static constexpr auto prefix = "x"sv;
static constexpr auto scope_qualifier = "hexadecimal integer"sv;
static constexpr auto is_digit = impl::is_hexadecimal_digit;
static constexpr auto is_signed = false;
static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF")
static constexpr auto prefix_codepoint = U'x';
static constexpr auto prefix = "x"sv;
static constexpr auto full_prefix = "0x"sv;
};
TOML_PURE_GETTER
@ -12756,12 +12760,6 @@ TOML_IMPL_NAMESPACE_START
char first_integer_part = '\0';
while (!is_eof() && !is_value_terminator(*cp))
{
if TOML_UNLIKELY(length == sizeof(chars))
set_error_and_return_default("exceeds maximum length of "sv,
static_cast<uint64_t>(sizeof(chars)),
" characters"sv,
(seen_exponent ? ""sv : " (consider using exponent notation)"sv));
if (*cp == U'_')
{
if (!prev || !is_decimal_digit(*prev))
@ -12771,8 +12769,13 @@ TOML_IMPL_NAMESPACE_START
advance_and_return_if_error_or_eof({});
continue;
}
else if (prev && *prev == U'_' && !is_decimal_digit(*cp))
else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp))
set_error_and_return_default("underscores must be followed by digits"sv);
else if TOML_UNLIKELY(length == sizeof(chars))
set_error_and_return_default("exceeds length limit of "sv,
static_cast<uint64_t>(sizeof(chars)),
" digits"sv,
(seen_exponent ? ""sv : " (consider using exponent notation)"sv));
else if (*cp == U'.')
{
// .1
@ -13107,10 +13110,8 @@ TOML_IMPL_NAMESPACE_START
set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
}
// consume value chars
static constexpr size_t underscore_padding = 32;
static constexpr size_t buffer_length = traits::min_buffer_length + underscore_padding;
char chars[buffer_length];
// consume digits
char digits[utf8_buffered_reader::max_history_length];
size_t length = {};
const utf8_codepoint* prev = {};
while (!is_eof() && !is_value_terminator(*cp))
@ -13124,16 +13125,16 @@ TOML_IMPL_NAMESPACE_START
advance_and_return_if_error_or_eof({});
continue;
}
else if (prev && *prev == U'_' && !traits::is_digit(*cp))
else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp))
set_error_and_return_default("underscores must be followed by digits"sv);
else if (!traits::is_digit(*cp))
else if TOML_UNLIKELY(!traits::is_digit(*cp))
set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
else if (length == sizeof(chars))
set_error_and_return_default("exceeds maximum length of "sv,
static_cast<uint64_t>(sizeof(chars)),
" characters"sv);
else if TOML_UNLIKELY(length == sizeof(digits))
set_error_and_return_default("exceeds length limit of "sv,
static_cast<uint64_t>(sizeof(digits)),
" digits"sv);
else
chars[length++] = static_cast<char>(cp->bytes[0]);
digits[length++] = static_cast<char>(cp->bytes[0]);
prev = cp;
advance_and_return_if_error({});
@ -13146,52 +13147,75 @@ TOML_IMPL_NAMESPACE_START
set_error_and_return_default("underscores must be followed by digits"sv);
}
// check for leading zeroes
if constexpr (base == 10)
{
if (chars[0] == '0')
set_error_and_return_default("leading zeroes are prohibited"sv);
}
// single digits can be converted trivially
if (length == 1u)
{
int64_t result;
if constexpr (base == 16)
return static_cast<int64_t>(hex_to_dec(chars[0]));
else if constexpr (base <= 10)
return static_cast<int64_t>(chars[0] - '0');
result = static_cast<int64_t>(hex_to_dec(digits[0]));
else
result = static_cast<int64_t>(digits[0] - '0');
if constexpr (traits::is_signed)
result *= sign;
return result;
}
// otherwise do the thing
uint64_t result = {};
// bin, oct and hex allow leading zeroes so trim them first
const char* end = digits + length;
const char* msd = digits;
if constexpr (base != 10)
{
const char* msd = chars;
const char* end = msd + length;
while (msd < end && *msd == '0')
msd++;
if (msd == end)
return 0ll;
uint64_t power = 1;
while (--end >= msd)
{
if constexpr (base == 16)
result += power * hex_to_dec(*end);
else
result += power * static_cast<uint64_t>(*end - '0');
power *= base;
}
}
// decimal integers do not allow leading zeroes
else
{
if TOML_UNLIKELY(digits[0] == '0')
set_error_and_return_default("leading zeroes are prohibited"sv);
}
// range check
if (result > static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()) + (sign < 0 ? 1ull : 0ull))
if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits)
set_error_and_return_default("'"sv,
std::string_view{ chars, length },
traits::full_prefix,
std::string_view{ digits, length },
"' is not representable in 64 bits"sv);
if constexpr (traits::is_signed)
return static_cast<int64_t>(result) * sign;
else
return static_cast<int64_t>(result);
// do the thing
{
uint64_t result = {};
{
uint64_t power = 1;
while (--end >= msd)
{
if constexpr (base == 16)
result += power * hex_to_dec(*end);
else
result += power * static_cast<uint64_t>(*end - '0');
power *= base;
}
}
// range check
if TOML_UNLIKELY(result > static_cast<uint64_t>((std::numeric_limits<int64_t>::max)()) + (sign < 0 ? 1ull : 0ull))
set_error_and_return_default("'"sv,
traits::full_prefix,
std::string_view{ digits, length },
"' is not representable in 64 bits"sv);
if constexpr (traits::is_signed)
return static_cast<int64_t>(result) * sign;
else
return static_cast<int64_t>(result);
}
}
TOML_NODISCARD