From f6ce95907e36e1c7c8e2e33fd0d3f22659d835fd Mon Sep 17 00:00:00 2001 From: Mark Gillard Date: Sat, 8 Jan 2022 15:17:12 +0200 Subject: [PATCH] fixed integer overflow issues introduced in 3f4a540 closes #134 --- README.md | 1 + examples/error_printer.cpp | 6 +- include/toml++/impl/parser.inl | 162 +++++++++++++++++++-------------- tests/user_feedback.cpp | 42 +++++++++ toml.hpp | 162 +++++++++++++++++++-------------- 5 files changed, 233 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 935d7f0..3792116 100644 --- a/README.md +++ b/README.md @@ -213,6 +213,7 @@ UTF-8 decoding is performed using a state machine based on Bjoern Hoehrmann's '[ - **[@bobfang1992](https://github.com/bobfang1992)** - Reported a bug and created a [wrapper in python](https://github.com/bobfang1992/pytomlpp) - **[@GiulioRomualdi](https://github.com/GiulioRomualdi)** - Added cmake+meson support - **[@levicki](https://github.com/levicki)** - Helped design some new features +- **[@moorereason](https://github.com/moorereason)** - Reported a whole bunch of bugs - **[@mosra](https://github.com/mosra)** - Created the awesome [m.css] used to generate the API docs - **[@ned14](https://github.com/ned14)** - Reported a bunch of bugs and helped design some new features - **[@okureta](https://github.com/okureta)** - Reported a bug diff --git a/examples/error_printer.cpp b/examples/error_printer.cpp index ada44de..98b3910 100644 --- a/examples/error_printer.cpp +++ b/examples/error_printer.cpp @@ -93,9 +93,11 @@ namespace R"(val = -+1)"sv, R"(val = 1_0_)"sv, R"(val = 1_0_ )"sv, - R"(val = 999999999999999999999999999999999999 )"sv, - R"(val = 9223372036854775808 )"sv, R"(val = 01 )"sv, + R"(val = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 )"sv, + R"(val = 0o1000000000000000000000 )"sv, + R"(val = 9223372036854775808 )"sv, + R"(val = 0x8000000000000000 )"sv, "########## floats"sv, R"(val = 100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0)"sv, diff --git a/include/toml++/impl/parser.inl b/include/toml++/impl/parser.inl index 861e456..0188dbb 100644 --- a/include/toml++/impl/parser.inl +++ b/include/toml++/impl/parser.inl @@ -619,40 +619,44 @@ TOML_ANON_NAMESPACE_START template <> struct parse_integer_traits<2> { - static constexpr auto scope_qualifier = "binary integer"sv; - static constexpr auto is_digit = impl::is_binary_digit; - static constexpr auto is_signed = false; - static constexpr auto min_buffer_length = 63; - static constexpr auto prefix_codepoint = U'b'; - static constexpr auto prefix = "b"sv; + static constexpr auto scope_qualifier = "binary integer"sv; + static constexpr auto is_digit = impl::is_binary_digit; + static constexpr auto is_signed = false; + static constexpr auto max_digits = 63; + static constexpr auto prefix_codepoint = U'b'; + static constexpr auto prefix = "b"sv; + static constexpr auto full_prefix = "0b"sv; }; template <> struct parse_integer_traits<8> { - static constexpr auto scope_qualifier = "octal integer"sv; - static constexpr auto is_digit = impl::is_octal_digit; - static constexpr auto is_signed = false; - static constexpr auto min_buffer_length = 21; // strlen("777777777777777777777") - static constexpr auto prefix_codepoint = U'o'; - static constexpr auto prefix = "o"sv; + static constexpr auto scope_qualifier = "octal integer"sv; + static constexpr auto is_digit = impl::is_octal_digit; + static constexpr auto is_signed = false; + static constexpr auto max_digits = 21; // strlen("777777777777777777777") + static constexpr auto prefix_codepoint = U'o'; + static constexpr auto prefix = "o"sv; + static constexpr auto full_prefix = "0o"sv; }; template <> struct parse_integer_traits<10> { - static constexpr auto scope_qualifier = "decimal integer"sv; - static constexpr auto is_digit = impl::is_decimal_digit; - static constexpr auto is_signed = true; - static constexpr auto min_buffer_length = 19; // strlen("9223372036854775807") + static constexpr auto scope_qualifier = "decimal integer"sv; + static constexpr auto is_digit = impl::is_decimal_digit; + static constexpr auto is_signed = true; + static constexpr auto max_digits = 19; // strlen("9223372036854775807") + static constexpr auto full_prefix = ""sv; }; template <> struct parse_integer_traits<16> { - static constexpr auto scope_qualifier = "hexadecimal integer"sv; - static constexpr auto is_digit = impl::is_hexadecimal_digit; - static constexpr auto is_signed = false; - static constexpr auto min_buffer_length = 16; // strlen("7FFFFFFFFFFFFFFF") - static constexpr auto prefix_codepoint = U'x'; - static constexpr auto prefix = "x"sv; + static constexpr auto scope_qualifier = "hexadecimal integer"sv; + static constexpr auto is_digit = impl::is_hexadecimal_digit; + static constexpr auto is_signed = false; + static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF") + static constexpr auto prefix_codepoint = U'x'; + static constexpr auto prefix = "x"sv; + static constexpr auto full_prefix = "0x"sv; }; TOML_PURE_GETTER @@ -1758,12 +1762,6 @@ TOML_IMPL_NAMESPACE_START char first_integer_part = '\0'; while (!is_eof() && !is_value_terminator(*cp)) { - if TOML_UNLIKELY(length == sizeof(chars)) - set_error_and_return_default("exceeds maximum length of "sv, - static_cast(sizeof(chars)), - " characters"sv, - (seen_exponent ? ""sv : " (consider using exponent notation)"sv)); - if (*cp == U'_') { if (!prev || !is_decimal_digit(*prev)) @@ -1773,8 +1771,13 @@ TOML_IMPL_NAMESPACE_START advance_and_return_if_error_or_eof({}); continue; } - else if (prev && *prev == U'_' && !is_decimal_digit(*cp)) + else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp)) set_error_and_return_default("underscores must be followed by digits"sv); + else if TOML_UNLIKELY(length == sizeof(chars)) + set_error_and_return_default("exceeds length limit of "sv, + static_cast(sizeof(chars)), + " digits"sv, + (seen_exponent ? ""sv : " (consider using exponent notation)"sv)); else if (*cp == U'.') { // .1 @@ -2109,10 +2112,8 @@ TOML_IMPL_NAMESPACE_START set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); } - // consume value chars - static constexpr size_t underscore_padding = 32; - static constexpr size_t buffer_length = traits::min_buffer_length + underscore_padding; - char chars[buffer_length]; + // consume digits + char digits[utf8_buffered_reader::max_history_length]; size_t length = {}; const utf8_codepoint* prev = {}; while (!is_eof() && !is_value_terminator(*cp)) @@ -2126,16 +2127,16 @@ TOML_IMPL_NAMESPACE_START advance_and_return_if_error_or_eof({}); continue; } - else if (prev && *prev == U'_' && !traits::is_digit(*cp)) + else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) set_error_and_return_default("underscores must be followed by digits"sv); - else if (!traits::is_digit(*cp)) + else if TOML_UNLIKELY(!traits::is_digit(*cp)) set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); - else if (length == sizeof(chars)) - set_error_and_return_default("exceeds maximum length of "sv, - static_cast(sizeof(chars)), - " characters"sv); + else if TOML_UNLIKELY(length == sizeof(digits)) + set_error_and_return_default("exceeds length limit of "sv, + static_cast(sizeof(digits)), + " digits"sv); else - chars[length++] = static_cast(cp->bytes[0]); + digits[length++] = static_cast(cp->bytes[0]); prev = cp; advance_and_return_if_error({}); @@ -2148,52 +2149,75 @@ TOML_IMPL_NAMESPACE_START set_error_and_return_default("underscores must be followed by digits"sv); } - // check for leading zeroes - if constexpr (base == 10) - { - if (chars[0] == '0') - set_error_and_return_default("leading zeroes are prohibited"sv); - } - // single digits can be converted trivially if (length == 1u) { + int64_t result; + if constexpr (base == 16) - return static_cast(hex_to_dec(chars[0])); - else if constexpr (base <= 10) - return static_cast(chars[0] - '0'); + result = static_cast(hex_to_dec(digits[0])); + else + result = static_cast(digits[0] - '0'); + + if constexpr (traits::is_signed) + result *= sign; + + return result; } - // otherwise do the thing - uint64_t result = {}; + // bin, oct and hex allow leading zeroes so trim them first + const char* end = digits + length; + const char* msd = digits; + if constexpr (base != 10) { - const char* msd = chars; - const char* end = msd + length; while (msd < end && *msd == '0') msd++; if (msd == end) return 0ll; - uint64_t power = 1; - while (--end >= msd) - { - if constexpr (base == 16) - result += power * hex_to_dec(*end); - else - result += power * static_cast(*end - '0'); - power *= base; - } + } + + // decimal integers do not allow leading zeroes + else + { + if TOML_UNLIKELY(digits[0] == '0') + set_error_and_return_default("leading zeroes are prohibited"sv); } // range check - if (result > static_cast((std::numeric_limits::max)()) + (sign < 0 ? 1ull : 0ull)) + if TOML_UNLIKELY(static_cast(end - msd) > traits::max_digits) set_error_and_return_default("'"sv, - std::string_view{ chars, length }, + traits::full_prefix, + std::string_view{ digits, length }, "' is not representable in 64 bits"sv); - if constexpr (traits::is_signed) - return static_cast(result) * sign; - else - return static_cast(result); + // do the thing + { + uint64_t result = {}; + { + uint64_t power = 1; + while (--end >= msd) + { + if constexpr (base == 16) + result += power * hex_to_dec(*end); + else + result += power * static_cast(*end - '0'); + + power *= base; + } + } + + // range check + if TOML_UNLIKELY(result > static_cast((std::numeric_limits::max)()) + (sign < 0 ? 1ull : 0ull)) + set_error_and_return_default("'"sv, + traits::full_prefix, + std::string_view{ digits, length }, + "' is not representable in 64 bits"sv); + + if constexpr (traits::is_signed) + return static_cast(result) * sign; + else + return static_cast(result); + } } TOML_NODISCARD diff --git a/tests/user_feedback.cpp b/tests/user_feedback.cpp index 0f5ffd3..443efc6 100644 --- a/tests/user_feedback.cpp +++ b/tests/user_feedback.cpp @@ -260,4 +260,46 @@ b = [] { parsing_should_fail(FILE_LINE_ARGS, "#\r"sv); } + + SECTION("github/issues/134") // https://github.com/marzer/tomlplusplus/issues/134 + { + // binary + parsing_should_fail( + FILE_LINE_ARGS, + "val = 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111"sv); // uint64_t + // max + parsing_should_fail( + FILE_LINE_ARGS, + "val = 0b10000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000"sv); // int64_t + // max + // + 1 + parse_expected_value(FILE_LINE_ARGS, + "0b01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111"sv, + INT64_MAX); // int64_t max + + // octal + parsing_should_fail(FILE_LINE_ARGS, " val = 0o1777777777777777777777"sv); // uint64_t max + parsing_should_fail(FILE_LINE_ARGS, " val = 0o1000000000000000000000"sv); // int64_t max + 1 + parse_expected_value(FILE_LINE_ARGS, " 0o0777777777777777777777"sv, INT64_MAX); + + // decimal + parsing_should_fail(FILE_LINE_ARGS, " val = 100000000000000000000"sv); + parsing_should_fail(FILE_LINE_ARGS, " val = 18446744073709551615"sv); // uint64_t max + parsing_should_fail(FILE_LINE_ARGS, " val = 10000000000000000000"sv); + parsing_should_fail(FILE_LINE_ARGS, " val = 9999999999999999999"sv); + parsing_should_fail(FILE_LINE_ARGS, " val = 9223372036854775808"sv); // int64_t max + 1 + parse_expected_value(FILE_LINE_ARGS, " 9223372036854775807"sv, INT64_MAX); + parse_expected_value(FILE_LINE_ARGS, " 1000000000000000000"sv, 1000000000000000000LL); + parse_expected_value(FILE_LINE_ARGS, " -1000000000000000000"sv, -1000000000000000000LL); + parse_expected_value(FILE_LINE_ARGS, " -9223372036854775808"sv, INT64_MIN); + parsing_should_fail(FILE_LINE_ARGS, " val = -9223372036854775809"sv); // int64_t min - 1 + parsing_should_fail(FILE_LINE_ARGS, " val = -10000000000000000000"sv); + parsing_should_fail(FILE_LINE_ARGS, " val = -18446744073709551615"sv); // -(uint64_t max) + parsing_should_fail(FILE_LINE_ARGS, " val = -100000000000000000000"sv); + + // hexadecimal + parsing_should_fail(FILE_LINE_ARGS, " val = 0xFFFFFFFFFFFFFFFF"sv); // uint64_t max + parsing_should_fail(FILE_LINE_ARGS, " val = 0x8000000000000000"sv); // int64_t max + 1 + parse_expected_value(FILE_LINE_ARGS, " 0x7FFFFFFFFFFFFFFF"sv, INT64_MAX); + } } diff --git a/toml.hpp b/toml.hpp index c17cebe..d942739 100644 --- a/toml.hpp +++ b/toml.hpp @@ -11617,40 +11617,44 @@ TOML_ANON_NAMESPACE_START template <> struct parse_integer_traits<2> { - static constexpr auto scope_qualifier = "binary integer"sv; - static constexpr auto is_digit = impl::is_binary_digit; - static constexpr auto is_signed = false; - static constexpr auto min_buffer_length = 63; - static constexpr auto prefix_codepoint = U'b'; - static constexpr auto prefix = "b"sv; + static constexpr auto scope_qualifier = "binary integer"sv; + static constexpr auto is_digit = impl::is_binary_digit; + static constexpr auto is_signed = false; + static constexpr auto max_digits = 63; + static constexpr auto prefix_codepoint = U'b'; + static constexpr auto prefix = "b"sv; + static constexpr auto full_prefix = "0b"sv; }; template <> struct parse_integer_traits<8> { - static constexpr auto scope_qualifier = "octal integer"sv; - static constexpr auto is_digit = impl::is_octal_digit; - static constexpr auto is_signed = false; - static constexpr auto min_buffer_length = 21; // strlen("777777777777777777777") - static constexpr auto prefix_codepoint = U'o'; - static constexpr auto prefix = "o"sv; + static constexpr auto scope_qualifier = "octal integer"sv; + static constexpr auto is_digit = impl::is_octal_digit; + static constexpr auto is_signed = false; + static constexpr auto max_digits = 21; // strlen("777777777777777777777") + static constexpr auto prefix_codepoint = U'o'; + static constexpr auto prefix = "o"sv; + static constexpr auto full_prefix = "0o"sv; }; template <> struct parse_integer_traits<10> { - static constexpr auto scope_qualifier = "decimal integer"sv; - static constexpr auto is_digit = impl::is_decimal_digit; - static constexpr auto is_signed = true; - static constexpr auto min_buffer_length = 19; // strlen("9223372036854775807") + static constexpr auto scope_qualifier = "decimal integer"sv; + static constexpr auto is_digit = impl::is_decimal_digit; + static constexpr auto is_signed = true; + static constexpr auto max_digits = 19; // strlen("9223372036854775807") + static constexpr auto full_prefix = ""sv; }; template <> struct parse_integer_traits<16> { - static constexpr auto scope_qualifier = "hexadecimal integer"sv; - static constexpr auto is_digit = impl::is_hexadecimal_digit; - static constexpr auto is_signed = false; - static constexpr auto min_buffer_length = 16; // strlen("7FFFFFFFFFFFFFFF") - static constexpr auto prefix_codepoint = U'x'; - static constexpr auto prefix = "x"sv; + static constexpr auto scope_qualifier = "hexadecimal integer"sv; + static constexpr auto is_digit = impl::is_hexadecimal_digit; + static constexpr auto is_signed = false; + static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF") + static constexpr auto prefix_codepoint = U'x'; + static constexpr auto prefix = "x"sv; + static constexpr auto full_prefix = "0x"sv; }; TOML_PURE_GETTER @@ -12756,12 +12760,6 @@ TOML_IMPL_NAMESPACE_START char first_integer_part = '\0'; while (!is_eof() && !is_value_terminator(*cp)) { - if TOML_UNLIKELY(length == sizeof(chars)) - set_error_and_return_default("exceeds maximum length of "sv, - static_cast(sizeof(chars)), - " characters"sv, - (seen_exponent ? ""sv : " (consider using exponent notation)"sv)); - if (*cp == U'_') { if (!prev || !is_decimal_digit(*prev)) @@ -12771,8 +12769,13 @@ TOML_IMPL_NAMESPACE_START advance_and_return_if_error_or_eof({}); continue; } - else if (prev && *prev == U'_' && !is_decimal_digit(*cp)) + else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp)) set_error_and_return_default("underscores must be followed by digits"sv); + else if TOML_UNLIKELY(length == sizeof(chars)) + set_error_and_return_default("exceeds length limit of "sv, + static_cast(sizeof(chars)), + " digits"sv, + (seen_exponent ? ""sv : " (consider using exponent notation)"sv)); else if (*cp == U'.') { // .1 @@ -13107,10 +13110,8 @@ TOML_IMPL_NAMESPACE_START set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); } - // consume value chars - static constexpr size_t underscore_padding = 32; - static constexpr size_t buffer_length = traits::min_buffer_length + underscore_padding; - char chars[buffer_length]; + // consume digits + char digits[utf8_buffered_reader::max_history_length]; size_t length = {}; const utf8_codepoint* prev = {}; while (!is_eof() && !is_value_terminator(*cp)) @@ -13124,16 +13125,16 @@ TOML_IMPL_NAMESPACE_START advance_and_return_if_error_or_eof({}); continue; } - else if (prev && *prev == U'_' && !traits::is_digit(*cp)) + else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp)) set_error_and_return_default("underscores must be followed by digits"sv); - else if (!traits::is_digit(*cp)) + else if TOML_UNLIKELY(!traits::is_digit(*cp)) set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv); - else if (length == sizeof(chars)) - set_error_and_return_default("exceeds maximum length of "sv, - static_cast(sizeof(chars)), - " characters"sv); + else if TOML_UNLIKELY(length == sizeof(digits)) + set_error_and_return_default("exceeds length limit of "sv, + static_cast(sizeof(digits)), + " digits"sv); else - chars[length++] = static_cast(cp->bytes[0]); + digits[length++] = static_cast(cp->bytes[0]); prev = cp; advance_and_return_if_error({}); @@ -13146,52 +13147,75 @@ TOML_IMPL_NAMESPACE_START set_error_and_return_default("underscores must be followed by digits"sv); } - // check for leading zeroes - if constexpr (base == 10) - { - if (chars[0] == '0') - set_error_and_return_default("leading zeroes are prohibited"sv); - } - // single digits can be converted trivially if (length == 1u) { + int64_t result; + if constexpr (base == 16) - return static_cast(hex_to_dec(chars[0])); - else if constexpr (base <= 10) - return static_cast(chars[0] - '0'); + result = static_cast(hex_to_dec(digits[0])); + else + result = static_cast(digits[0] - '0'); + + if constexpr (traits::is_signed) + result *= sign; + + return result; } - // otherwise do the thing - uint64_t result = {}; + // bin, oct and hex allow leading zeroes so trim them first + const char* end = digits + length; + const char* msd = digits; + if constexpr (base != 10) { - const char* msd = chars; - const char* end = msd + length; while (msd < end && *msd == '0') msd++; if (msd == end) return 0ll; - uint64_t power = 1; - while (--end >= msd) - { - if constexpr (base == 16) - result += power * hex_to_dec(*end); - else - result += power * static_cast(*end - '0'); - power *= base; - } + } + + // decimal integers do not allow leading zeroes + else + { + if TOML_UNLIKELY(digits[0] == '0') + set_error_and_return_default("leading zeroes are prohibited"sv); } // range check - if (result > static_cast((std::numeric_limits::max)()) + (sign < 0 ? 1ull : 0ull)) + if TOML_UNLIKELY(static_cast(end - msd) > traits::max_digits) set_error_and_return_default("'"sv, - std::string_view{ chars, length }, + traits::full_prefix, + std::string_view{ digits, length }, "' is not representable in 64 bits"sv); - if constexpr (traits::is_signed) - return static_cast(result) * sign; - else - return static_cast(result); + // do the thing + { + uint64_t result = {}; + { + uint64_t power = 1; + while (--end >= msd) + { + if constexpr (base == 16) + result += power * hex_to_dec(*end); + else + result += power * static_cast(*end - '0'); + + power *= base; + } + } + + // range check + if TOML_UNLIKELY(result > static_cast((std::numeric_limits::max)()) + (sign < 0 ? 1ull : 0ull)) + set_error_and_return_default("'"sv, + traits::full_prefix, + std::string_view{ digits, length }, + "' is not representable in 64 bits"sv); + + if constexpr (traits::is_signed) + return static_cast(result) * sign; + else + return static_cast(result); + } } TOML_NODISCARD