fix crash with pathologically-nested inputs (closes #100)

also:
- fixed parse_result natvis
- added parse_result default constructor
- added nested value limit example to error printer
This commit is contained in:
Mark Gillard 2021-05-18 00:32:35 +03:00
parent c4e00f9a56
commit a29ecda102
10 changed files with 174 additions and 32 deletions

View File

@ -9,7 +9,7 @@ assignees: marzer
<!--
Replace the HTML comments below with the requested information.
DO NOT delete this template and roll your own!
Please DO NOT delete this template and roll your own!
Thanks for contributing!
-->
@ -21,7 +21,7 @@ assignees: marzer
<!--
If you're using the single-header version of the library, the version number is right at the top of the file.
Otherwise you can find it by opening toml++/toml_version.h; it'll be represented by three defines -
TOML_LANG_MAJOR, TOML_LANG_MINOR and TOML_LANG_PATCH.
TOML_LIB_MAJOR, TOML_LIB_MINOR and TOML_LIB_PATCH.
If you're not using any particular release and are instead just living large at HEAD of master, the commit hash
would be super helpful too, though it's not critical.
@ -36,14 +36,14 @@ assignees: marzer
**C++ standard mode (e.g. 17, 20, 'latest'):**
**C++ standard mode:**
<!--
The C++ standard level you were targeting, e.g. C++17
The C++ standard level you were targeting, e.g. 17, 20, 'latest'
-->
**Target arch (e.g. x64):**
**Target arch:**
<!--
The architecture you were targeting, e.g. x86, x64, ARM
-->

View File

@ -9,7 +9,7 @@ assignees: marzer
<!--
Replace the HTML comments below with the requested information.
DO NOT delete this template and roll your own!
Please DO NOT delete this template and roll your own!
Thanks for contributing!
-->

View File

@ -43,7 +43,7 @@ namespace
"[foo] [bar]"sv,
"[foo]\n[foo]"sv,
"? = 'foo' ?"sv,
"[ [foo] ]"sv
"[ [foo] ]"sv,
"########## arrays"sv,
"val = [,]"sv,
@ -68,6 +68,7 @@ namespace
"########## values"sv,
"val = _"sv,
"val = G"sv,
"PATHOLOGICALLY_NESTED"sv, // generated inline
"########## strings"sv,
"val = \" \r \""sv,
@ -136,7 +137,18 @@ int main(int /*argc*/, char** /*argv*/)
}
else
{
auto result = toml::parse(str);
toml::parse_result result;
if (str == "PATHOLOGICALLY_NESTED"sv)
{
std::string s(1000_sz, '[');
constexpr auto start = "array = "sv;
memcpy(s.data(), start.data(), start.length());
result = toml::parse(s);
}
else
result = toml::parse(str);
if (!result)
{
std::cout << result.error();

View File

@ -63,7 +63,7 @@ TOML_NAMESPACE_START
static constexpr size_t align_ =
(alignof(toml::table) < alignof(parse_error) ? alignof(parse_error) : alignof(toml::table));
alignas(align_) unsigned char bytes[size_ + 1u];
alignas(align_) unsigned char bytes[size_];
};
mutable storage_t storage_;
@ -162,6 +162,13 @@ TOML_NAMESPACE_START
/// \brief Returns the internal toml::parse_error (const lvalue overload).
[[nodiscard]] explicit operator const parse_error& () const noexcept { return error(); }
TOML_NODISCARD_CTOR
parse_result() noexcept
: err_{ true }
{
::new (static_cast<void*>(storage_.bytes)) parse_error{ std::string{}, source_region{} };
}
TOML_NODISCARD_CTOR
explicit parse_result(toml::table&& tbl) noexcept
: err_{ false }

View File

@ -234,6 +234,11 @@ TOML_ANON_NAMESPACE_START
};
#endif
}
error_builder(const error_builder&) = delete;
error_builder(error_builder&&) = delete;
error_builder& operator=(const error_builder&) = delete;
error_builder& operator=(error_builder&&) = delete;
};
struct parse_scope final
@ -253,6 +258,11 @@ TOML_ANON_NAMESPACE_START
{
storage_ = parent_;
}
parse_scope(const parse_scope&) = delete;
parse_scope(parse_scope&&) = delete;
parse_scope& operator=(const parse_scope&) = delete;
parse_scope& operator=(parse_scope&&) = delete;
};
#define push_parse_scope_2(scope, line) parse_scope ps_##line{ current_scope, scope }
#define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line)
@ -296,6 +306,7 @@ TOML_ANON_NAMESPACE_START
[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
operator bool() const noexcept
{
return node_ != nullptr;
@ -303,6 +314,7 @@ TOML_ANON_NAMESPACE_START
[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
toml::node* get() const noexcept
{
return node_;
@ -328,6 +340,28 @@ TOML_ANON_NAMESPACE_START
node_ptr value;
};
struct parse_depth_counter final
{
size_t& depth_;
TOML_NODISCARD_CTOR
explicit parse_depth_counter(size_t& depth) noexcept
: depth_{ depth }
{
depth_++;
}
~parse_depth_counter() noexcept
{
depth_--;
}
parse_depth_counter(const parse_depth_counter&) = delete;
parse_depth_counter(parse_depth_counter&&) = delete;
parse_depth_counter& operator=(const parse_depth_counter&) = delete;
parse_depth_counter& operator=(parse_depth_counter&&) = delete;
};
}
TOML_ANON_NAMESPACE_END;
@ -383,6 +417,8 @@ TOML_IMPL_NAMESPACE_START
class parser final
{
private:
static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;
utf8_buffered_reader reader;
table root;
source_position prev_pos = { 1, 1 };
@ -393,6 +429,7 @@ TOML_IMPL_NAMESPACE_START
std::string recording_buffer; //for diagnostics
bool recording = false, recording_whitespace = true;
std::string_view current_scope;
size_t nested_values = {};
#if !TOML_EXCEPTIONS
mutable optional<toml::parse_error> err;
#endif
@ -1885,6 +1922,14 @@ TOML_IMPL_NAMESPACE_START
assert_or_assume(!is_value_terminator(*cp));
push_parse_scope("value"sv);
const parse_depth_counter depth_counter{ nested_values };
if (nested_values > max_nested_values)
set_error_and_return_default(
"exceeded maximum nested value depth of "sv,
static_cast<uint64_t>(max_nested_values),
" (TOML_MAX_NESTED_VALUES)"sv
);
// check if it begins with some control character
// (note that this will also fail for whitespace but we're assuming we've
// called consume_leading_whitespace() before calling parse_value())

View File

@ -386,6 +386,12 @@
#define TOML_PARSER 1
#endif
#ifndef TOML_MAX_NESTED_VALUES
#define TOML_MAX_NESTED_VALUES 256
// this refers to the depth of nested values, e.g. inline tables and arrays.
// 256 is crazy high! if you're hitting this limit with real input, TOML is probably the wrong tool for the job...
#endif
#ifndef DOXYGEN
#if defined(_WIN32) && !defined(TOML_WINDOWS_COMPAT)
#define TOML_WINDOWS_COMPAT 1

View File

@ -162,5 +162,19 @@ b = []
});
}
SECTION("github/issues/100") // https://github.com/marzer/tomlplusplus/issues/100
{
// this tests for two separate things that should fail gracefully, not crash:
// 1. pathologically-nested inputs
// 2. a particular sequence of malformed UTF-8
parsing_should_fail(FILE_LINE_ARGS, "fl =[ [[[[[[[[[[[[[[[\x36\x80\x86\x00\x00\x00\x2D\x36\x9F\x20\x00"sv);
std::string s(2048_sz, '[');
constexpr auto start = "fl =[ "sv;
memcpy(s.data(), start.data(), start.length());
parsing_should_fail(FILE_LINE_ARGS, std::string_view{ s });
}
}

View File

@ -128,12 +128,11 @@
</Type>
<Type Name="toml::v2::noex::parse_result">
<DisplayString Condition="!is_err">{*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage)}</DisplayString>
<DisplayString Condition="is_err">{*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage)}</DisplayString>
<DisplayString Condition="!err_">{*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage_.bytes)}</DisplayString>
<DisplayString Condition="err_">{*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage_.bytes)}</DisplayString>
<Expand>
<Item Name="[table]" Condition="!is_err">*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage)</Item>
<Item Name="[error]" Condition="is_err">*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage)</Item>
<Item Name="is_err" ExcludeView="simple">is_err</Item>
<Item Name="[table]" Condition="!err_">*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage_.bytes)</Item>
<Item Name="[error]" Condition="err_">*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage_.bytes)</Item>
</Expand>
</Type>

View File

@ -401,6 +401,12 @@
#define TOML_PARSER 1
#endif
#ifndef TOML_MAX_NESTED_VALUES
#define TOML_MAX_NESTED_VALUES 256
// this refers to the depth of nested values, e.g. inline tables and arrays.
// 256 is crazy high! if you're hitting this limit with real input, TOML is probably the wrong tool for the job...
#endif
#ifndef DOXYGEN
#if defined(_WIN32) && !defined(TOML_WINDOWS_COMPAT)
#define TOML_WINDOWS_COMPAT 1
@ -6257,7 +6263,7 @@ TOML_NAMESPACE_START
static constexpr size_t align_ =
(alignof(toml::table) < alignof(parse_error) ? alignof(parse_error) : alignof(toml::table));
alignas(align_) unsigned char bytes[size_ + 1u];
alignas(align_) unsigned char bytes[size_];
};
mutable storage_t storage_;
@ -6336,6 +6342,13 @@ TOML_NAMESPACE_START
[[nodiscard]] explicit operator parse_error && () noexcept { return std::move(error()); }
[[nodiscard]] explicit operator const parse_error& () const noexcept { return error(); }
TOML_NODISCARD_CTOR
parse_result() noexcept
: err_{ true }
{
::new (static_cast<void*>(storage_.bytes)) parse_error{ std::string{}, source_region{} };
}
TOML_NODISCARD_CTOR
explicit parse_result(toml::table&& tbl) noexcept
: err_{ false }
@ -8934,6 +8947,11 @@ TOML_ANON_NAMESPACE_START
};
#endif
}
error_builder(const error_builder&) = delete;
error_builder(error_builder&&) = delete;
error_builder& operator=(const error_builder&) = delete;
error_builder& operator=(error_builder&&) = delete;
};
struct parse_scope final
@ -8953,6 +8971,11 @@ TOML_ANON_NAMESPACE_START
{
storage_ = parent_;
}
parse_scope(const parse_scope&) = delete;
parse_scope(parse_scope&&) = delete;
parse_scope& operator=(const parse_scope&) = delete;
parse_scope& operator=(parse_scope&&) = delete;
};
#define push_parse_scope_2(scope, line) parse_scope ps_##line{ current_scope, scope }
#define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line)
@ -8996,6 +9019,7 @@ TOML_ANON_NAMESPACE_START
[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
operator bool() const noexcept
{
return node_ != nullptr;
@ -9003,6 +9027,7 @@ TOML_ANON_NAMESPACE_START
[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
toml::node* get() const noexcept
{
return node_;
@ -9028,6 +9053,28 @@ TOML_ANON_NAMESPACE_START
node_ptr value;
};
struct parse_depth_counter final
{
size_t& depth_;
TOML_NODISCARD_CTOR
explicit parse_depth_counter(size_t& depth) noexcept
: depth_{ depth }
{
depth_++;
}
~parse_depth_counter() noexcept
{
depth_--;
}
parse_depth_counter(const parse_depth_counter&) = delete;
parse_depth_counter(parse_depth_counter&&) = delete;
parse_depth_counter& operator=(const parse_depth_counter&) = delete;
parse_depth_counter& operator=(parse_depth_counter&&) = delete;
};
}
TOML_ANON_NAMESPACE_END;
@ -9083,6 +9130,8 @@ TOML_IMPL_NAMESPACE_START
class parser final
{
private:
static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;
utf8_buffered_reader reader;
table root;
source_position prev_pos = { 1, 1 };
@ -9093,6 +9142,7 @@ TOML_IMPL_NAMESPACE_START
std::string recording_buffer; //for diagnostics
bool recording = false, recording_whitespace = true;
std::string_view current_scope;
size_t nested_values = {};
#if !TOML_EXCEPTIONS
mutable optional<toml::parse_error> err;
#endif
@ -10583,6 +10633,14 @@ TOML_IMPL_NAMESPACE_START
assert_or_assume(!is_value_terminator(*cp));
push_parse_scope("value"sv);
const parse_depth_counter depth_counter{ nested_values };
if (nested_values > max_nested_values)
set_error_and_return_default(
"exceeded maximum nested value depth of "sv,
static_cast<uint64_t>(max_nested_values),
" (TOML_MAX_NESTED_VALUES)"sv
);
// check if it begins with some control character
// (note that this will also fail for whitespace but we're assuming we've
// called consume_leading_whitespace() before calling parse_value())

View File

@ -191,23 +191,24 @@ def main():
if m:
defines[m.group(1)] = defined
ignore_list = ( # macros that are meant to stay public (user configs etc)
'INCLUDE_TOMLPLUSPLUS_H',
'TOML_API',
'TOML_UNRELEASED_FEATURES',
'TOML_LARGE_FILES',
'TOML_PARSER',
'TOML_WINDOWS_COMPAT',
'TOML_EXCEPTIONS',
'TOML_LIB_SINGLE_HEADER',
'TOML_LIB_MAJOR',
'TOML_LIB_MINOR',
'TOML_LIB_PATCH',
'TOML_LANG_MAJOR',
'TOML_LANG_MINOR',
'TOML_LANG_PATCH',
'TOML_UNDEF_MACROS',
'TOML_HEADER_ONLY',
'TOML_ALL_INLINE'
r'INCLUDE_TOMLPLUSPLUS_H',
r'TOML_API',
r'TOML_UNRELEASED_FEATURES',
r'TOML_LARGE_FILES',
r'TOML_PARSER',
r'TOML_WINDOWS_COMPAT',
r'TOML_EXCEPTIONS',
r'TOML_LIB_SINGLE_HEADER',
r'TOML_LIB_MAJOR',
r'TOML_LIB_MINOR',
r'TOML_LIB_PATCH',
r'TOML_LANG_MAJOR',
r'TOML_LANG_MINOR',
r'TOML_LANG_PATCH',
r'TOML_UNDEF_MACROS',
r'TOML_HEADER_ONLY',
r'TOML_ALL_INLINE',
r'TOML_MAX_NESTED_VALUES'
)
set_defines = []
for define, currently_set in defines.items():