From e37e6d2bced5b2290dd35eea3d3753a9e1069402 Mon Sep 17 00:00:00 2001 From: Mark Gillard Date: Sun, 9 Jan 2022 13:01:56 +0200 Subject: [PATCH] fixed treating non-ASCII horizontal whitespace as valid (closes #135) also: - added `parse_result::at_path()` --- CHANGELOG.md | 75 +++--- examples/error_printer.cpp | 1 + include/toml++/impl/at_path.h | 2 +- include/toml++/impl/key.h | 19 -- include/toml++/impl/parse_result.h | 386 +++++++++++++++++------------ include/toml++/impl/parser.inl | 51 +++- tests/user_feedback.cpp | 18 ++ toml.hpp | 309 +++++++++++++---------- 8 files changed, 499 insertions(+), 362 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 125ccfc..86a2d03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,9 +18,7 @@ template: ## Unreleased This release will be a major version bump, so it's ABI breaks all around. -Any changes that might cause code or build systems to break are indicated with ⚠️. - -Highlights are indicated with ❤️. +Any changes that are likely to cause migration issues (API changes, build system breakage, etc.) are indicated with ⚠️. #### Fixes: - ⚠️ fixed `toml::table` init-list constructor requiring double-brackets @@ -28,50 +26,50 @@ Highlights are indicated with ❤️. - ⚠️ fixed incorrect `noexcept` specifications on many functions - ⚠️ fixed missing `TOML_API` on some interfaces - fixed `toml::json_formatter` not formatting inf and nan incorrectly -- fixed a number of spec conformance issues (#127, #128, #129, #130, #131, #132) (@moorereason) +- fixed a number of spec conformance issues (#127, #128, #129, #130, #131, #132, #135) (@moorereason) - fixed an illegal table redefinition edge case (#112) (@python36) - fixed documentation issues +- fixed GCC bug causing memory leak during parse failures (#123, #124) (@rsmmr, @ronalabraham) - fixed incorrect handling of vertical whitespace in keys when printing TOML to streams - fixed incorrect source position in redefinition error messages -- fixed memory leak during parse failures when compiled using GCC (#123, #124) (@rsmmr, @ronalabraham) -- fixed missing `#include ` -- fixed missing `#include ` +- fixed missing includes ``, `` - fixed parser not correctly round-tripping the format of binary and octal integers in some cases - fixed some incorrect unicode scalar sequence transformations (#125) - fixed strong exception guarantee edge-cases in `toml::table` and `toml::array` #### Additions: -- ❤️ added `operator->` to `toml::value` for class types -- ❤️ added `toml::at_path()`, `toml::node::at_path()` and `toml::node_view::at_path()` for qualified path-based lookups (#118) (@ben-crowhurst) -- ❤️ added `toml::key` - provides a facility to access the source_regions of parsed keys (#82) (@vaartis) -- ❤️ added `toml::yaml_formatter` -- ❤️ added support for Unicode 14.0 -- added `parse_benchmark` example -- added `toml::array::at()` (same semantics as `std::vector::at()`) -- added `toml::array::prune()` -- added `toml::array::replace()` (#109) (@LebJe) -- added `toml::array::resize()` param `default_init_flags` -- added `toml::date_time` converting constructors from `toml::date` and `toml::time` -- added `toml::format_flags::allow_binary_integers` -- added `toml::format_flags::allow_hexadecimal_integers` -- added `toml::format_flags::allow_octal_integers` -- added `toml::format_flags::allow_real_tabs_in_strings` -- added `toml::format_flags::allow_unicode_strings` -- added `toml::format_flags::indent_array_elements` (#120) (@W4RH4WK) -- added `toml::format_flags::indent_sub_tables` (#120) (@W4RH4WK) +- added value flags to array + table insert methods (#44) (@levicki) +- added support for Unicode 14.0 +- added support for ref categories and cv-qualifiers in `toml::node::ref()` +- added magic `toml::value_flags` constant `toml::preserve_source_value_flags` +- added clang's enum annotation attributes to all enums +- added `TOML_ENABLE_FORMATTERS` option +- added `toml::yaml_formatter` +- added `toml::value` copy+move constructor overloads with flags override +- added `toml::table::prune()` +- added `toml::table::lower_bound()` (same semantics as `std::map::lower_bound()`) +- added `toml::table::emplace_hint()` (same semantics as `std::map::emplace_hint()`) +- added `toml::table::at()` (same semantics as `std::map::at()`) +- added `toml::node_view::operator==` +- added `toml::key` - provides a facility to access the source_regions of parsed keys (#82) (@vaartis) +- added `toml::is_key<>` and toml::is_key_or_convertible<>` metafunctions - added `toml::format_flags::relaxed_float_precision` (#89) (@vaartis) - added `toml::format_flags::quote_infinities_and_nans` -- added `toml::is_key<>` and toml::is_key_or_convertible<>` metafunctions -- added `toml::node_view::operator==` -- added `toml::table::at()` (same semantics as `std::map::at()`) -- added `toml::table::emplace_hint()` (same semantics as `std::map::emplace_hint()`) -- added `toml::table::lower_bound()` (same semantics as `std::map::lower_bound()`) -- added `toml::table::prune()` -- added `toml::value` copy+move constructor overloads with flags override -- added `TOML_ENABLE_FORMATTERS` option -- added clang's enum annotation attributes to all enums -- added magic `toml::value_flags` constant `toml::preserve_source_value_flags` -- added value flags to array + table insert methods (#44) (@levicki) +- added `toml::format_flags::indent_sub_tables` (#120) (@W4RH4WK) +- added `toml::format_flags::indent_array_elements` (#120) (@W4RH4WK) +- added `toml::format_flags::allow_unicode_strings` +- added `toml::format_flags::allow_real_tabs_in_strings` +- added `toml::format_flags::allow_octal_integers` +- added `toml::format_flags::allow_hexadecimal_integers` +- added `toml::format_flags::allow_binary_integers` +- added `toml::date_time` converting constructors from `toml::date` and `toml::time` +- added `toml::at_path()`, `toml::node::at_path()` and `toml::node_view::at_path()` for qualified path-based lookups (#118) (@ben-crowhurst) +- added `toml::array::resize()` param `default_init_flags` +- added `toml::array::replace()` (#109) (@LebJe) +- added `toml::array::prune()` +- added `toml::array::at()` (same semantics as `std::vector::at()`) +- added `parse_benchmark` example +- added `operator->` to `toml::value` for class types #### Changes: - ⚠️ `toml::format_flags` is now backed by `uint64_t` (was previously `uint8_t`) @@ -83,8 +81,7 @@ Highlights are indicated with ❤️. - ⚠️ renamed `TOML_PARSER` option to `TOML_ENABLE_PARSER` (`TOML_PARSER` will continue to work but is deprecated) - ⚠️ renamed `TOML_UNRELEASED_FEATURES` to `TOML_ENABLE_UNRELEASED_FEATURES` (`TOML_UNRELEASED_FEATURES` will continue to work but is deprecated) - ⚠️ renamed `TOML_WINDOWS_COMPAT` to `TOML_ENABLE_WINDOWS_COMPAT` (`TOML_WINDOWS_COMPAT` will continue to work but is deprecated) -- ❤️ `toml::node::ref()` now supports explicit ref categories and cv-qualifiers -- ❤️ applied clang-format to all the things 🎉️ +- applied clang-format to all the things 🎉️ - exposed `TOML_NAMESPACE_START` and `TOML_NAMESPACE_END` macros to help with ADL specialization scenarios - improved performance of parser - made date/time constructors accept any integral types @@ -95,7 +92,7 @@ Highlights are indicated with ❤️. #### Removals: - ⚠️ removed `toml::format_flags::allow_value_format_flags` - ⚠️ removed `TOML_LARGE_FILES` (it is now default - explicitly setting `TOML_LARGE_FILES` to `0` will invoke an `#error`) -- removed unnecessary template machinery (esp. where ostreams were involved) +- ⚠️ removed unnecessary template machinery (esp. where ostreams were involved) - removed unnecessary uses of `final` #### Build system: diff --git a/examples/error_printer.cpp b/examples/error_printer.cpp index 98b3910..1a56bb2 100644 --- a/examples/error_printer.cpp +++ b/examples/error_printer.cpp @@ -22,6 +22,7 @@ namespace "# bar\bkek"sv, "# \xf1\x63"sv, "# val1 = 1\fval2 = 2"sv, + "foo = 1\n\u2000\nbar = 2"sv, "########## inline tables"sv, "val = {,}"sv, diff --git a/include/toml++/impl/at_path.h b/include/toml++/impl/at_path.h index 468a7f1..f287263 100644 --- a/include/toml++/impl/at_path.h +++ b/include/toml++/impl/at_path.h @@ -39,7 +39,7 @@ TOML_NAMESPACE_START /// \ecpp ///
/// Additionally, TOML allows '.' (period) characters to appear in keys if they are quoted strings. - /// This function makes no allowance for this this, instead treating all period characters as sub-table delimiters. + /// This function makes no allowance for this, instead treating all period characters as sub-table delimiters. /// If you have periods in your table keys, first consider: /// 1. Not doing that /// 2. Using node_view::operator[] instead. diff --git a/include/toml++/impl/key.h b/include/toml++/impl/key.h index f2766f2..426191a 100644 --- a/include/toml++/impl/key.h +++ b/include/toml++/impl/key.h @@ -312,25 +312,6 @@ TOML_NAMESPACE_START /// @} - /// \name Iterators (ADL) - /// @{ - - /// \brief Returns an iterator to the first character in a key's backing string. - TOML_PURE_INLINE_GETTER - friend const_iterator begin(const key& k) noexcept - { - return k.begin(); - } - - /// \brief Returns an iterator to one-past-the-last character in a key's backing string. - TOML_PURE_INLINE_GETTER - friend const_iterator end(const key& k) noexcept - { - return k.end(); - } - - /// @} - /// \brief Prints the key's underlying string out to the stream. friend std::ostream& operator<<(std::ostream& lhs, const key& rhs) { diff --git a/include/toml++/impl/parse_result.h b/include/toml++/impl/parse_result.h index 14e98ca..196ae98 100644 --- a/include/toml++/impl/parse_result.h +++ b/include/toml++/impl/parse_result.h @@ -30,7 +30,7 @@ TOML_NAMESPACE_START /// or a toml::parse_error. Most member functions assume a particular one of these two states, /// and calling them when in the wrong state will cause errors (e.g. attempting to access the /// error object when parsing was successful). \cpp - /// parse_result result = toml::parse_file("config.toml"); + /// toml::parse_result result = toml::parse_file("config.toml"); /// if (result) /// do_stuff_with_a_table(result); //implicitly converts to table& /// else @@ -45,7 +45,7 @@ TOML_NAMESPACE_START /// (error occurred at line 1, column 13 of 'config.toml') /// \eout /// - /// Getting node_views (`operator[]`) and using the iterator accessor functions (`begin(), end()` etc.) are + /// Getting node_views (`operator[]`, `at_path()`) and using the iterator accessor functions (`begin()`, `end()` etc.) are /// unconditionally safe; when parsing fails these just return 'empty' values. A ranged-for loop on a failed /// parse_result is also safe since `begin()` and `end()` return the same iterator and will not lead to any /// dereferences and iterations. @@ -88,117 +88,7 @@ TOML_NAMESPACE_START /// \brief A BidirectionalIterator for iterating over const key-value pairs in a wrapped toml::table. using const_iterator = const_table_iterator; - /// \brief Returns true if parsing succeeeded. - TOML_NODISCARD - bool succeeded() const noexcept - { - return !err_; - } - - /// \brief Returns true if parsing failed. - TOML_NODISCARD - bool failed() const noexcept - { - return err_; - } - - /// \brief Returns true if parsing succeeded. - TOML_NODISCARD - explicit operator bool() const noexcept - { - return !err_; - } - - /// \brief Returns the internal toml::table. - TOML_NODISCARD - toml::table& table() & noexcept - { - TOML_ASSERT_ASSUME(!err_); - return *get_as(storage_); - } - - /// \brief Returns the internal toml::table (rvalue overload). - TOML_NODISCARD - toml::table&& table() && noexcept - { - TOML_ASSERT_ASSUME(!err_); - return static_cast(*get_as(storage_)); - } - - /// \brief Returns the internal toml::table (const lvalue overload). - TOML_NODISCARD - const toml::table& table() const& noexcept - { - TOML_ASSERT_ASSUME(!err_); - return *get_as(storage_); - } - - /// \brief Returns the internal toml::parse_error. - TOML_NODISCARD - parse_error& error() & noexcept - { - TOML_ASSERT_ASSUME(err_); - return *get_as(storage_); - } - - /// \brief Returns the internal toml::parse_error (rvalue overload). - TOML_NODISCARD - parse_error&& error() && noexcept - { - TOML_ASSERT_ASSUME(err_); - return static_cast(*get_as(storage_)); - } - - /// \brief Returns the internal toml::parse_error (const lvalue overload). - TOML_NODISCARD - const parse_error& error() const& noexcept - { - TOML_ASSERT_ASSUME(err_); - return *get_as(storage_); - } - - /// \brief Returns the internal toml::table. - TOML_NODISCARD - operator toml::table&() noexcept - { - return table(); - } - - /// \brief Returns the internal toml::table (rvalue overload). - TOML_NODISCARD - operator toml::table&&() noexcept - { - return std::move(table()); - } - - /// \brief Returns the internal toml::table (const lvalue overload). - TOML_NODISCARD - operator const toml::table&() const noexcept - { - return table(); - } - - /// \brief Returns the internal toml::parse_error. - TOML_NODISCARD - explicit operator parse_error&() noexcept - { - return error(); - } - - /// \brief Returns the internal toml::parse_error (rvalue overload). - TOML_NODISCARD - explicit operator parse_error&&() noexcept - { - return std::move(error()); - } - - /// \brief Returns the internal toml::parse_error (const lvalue overload). - TOML_NODISCARD - explicit operator const parse_error&() const noexcept - { - return error(); - } - + /// \brief Default constructs an 'error' result. TOML_NODISCARD_CTOR parse_result() noexcept // : err_{ true } @@ -259,69 +149,134 @@ TOML_NAMESPACE_START destroy(); } - /// \brief Gets a node_view for the selected key-value pair in the wrapped table. - /// - /// \param key The key used for the lookup. - /// - /// \returns A view of the value at the given key if parsing was successful and a matching key existed, - /// or an empty node view. - /// - /// \see toml::node_view + /// \name Result state + /// @{ + + /// \brief Returns true if parsing succeeeded. TOML_NODISCARD - node_view operator[](std::string_view key) noexcept + bool succeeded() const noexcept { - return err_ ? node_view{} : table()[key]; + return !err_; } - /// \brief Gets a node_view for the selected key-value pair in the wrapped table (const overload). - /// - /// \param key The key used for the lookup. - /// - /// \returns A view of the value at the given key if parsing was successful and a matching key existed, - /// or an empty node view. - /// - /// \see toml::node_view + /// \brief Returns true if parsing failed. TOML_NODISCARD - node_view operator[](std::string_view key) const noexcept + bool failed() const noexcept { - return err_ ? node_view{} : table()[key]; + return err_; } -#if TOML_ENABLE_WINDOWS_COMPAT - - /// \brief Gets a node_view for the selected key-value pair in the wrapped table. - /// - /// \availability This overload is only available when #TOML_ENABLE_WINDOWS_COMPAT is enabled. - /// - /// \param key The key used for the lookup. - /// - /// \returns A view of the value at the given key if parsing was successful and a matching key existed, - /// or an empty node view. - /// - /// \see toml::node_view + /// \brief Returns true if parsing succeeded. TOML_NODISCARD - node_view operator[](std::wstring_view key) noexcept + explicit operator bool() const noexcept { - return err_ ? node_view{} : table()[key]; + return !err_; } - /// \brief Gets a node_view for the selected key-value pair in the wrapped table (const overload). - /// - /// \availability This overload is only available when #TOML_ENABLE_WINDOWS_COMPAT is enabled. - /// - /// \param key The key used for the lookup. - /// - /// \returns A view of the value at the given key if parsing was successful and a matching key existed, - /// or an empty node view. - /// - /// \see toml::node_view + /// @} + + /// \name Successful parses + /// @{ + + /// \brief Returns the internal toml::table. TOML_NODISCARD - node_view operator[](std::wstring_view key) const noexcept + toml::table& table() & noexcept { - return err_ ? node_view{} : table()[key]; + TOML_ASSERT_ASSUME(!err_); + return *get_as(storage_); } -#endif // TOML_ENABLE_WINDOWS_COMPAT + /// \brief Returns the internal toml::table (rvalue overload). + TOML_NODISCARD + toml::table&& table() && noexcept + { + TOML_ASSERT_ASSUME(!err_); + return static_cast(*get_as(storage_)); + } + + /// \brief Returns the internal toml::table (const lvalue overload). + TOML_NODISCARD + const toml::table& table() const& noexcept + { + TOML_ASSERT_ASSUME(!err_); + return *get_as(storage_); + } + + /// \brief Returns the internal toml::table. + TOML_NODISCARD + /* implicit */ operator toml::table&() noexcept + { + return table(); + } + + /// \brief Returns the internal toml::table (rvalue overload). + TOML_NODISCARD + /* implicit */ operator toml::table&&() noexcept + { + return std::move(table()); + } + + /// \brief Returns the internal toml::table (const lvalue overload). + TOML_NODISCARD + /* implicit */ operator const toml::table&() const noexcept + { + return table(); + } + + /// @} + + /// \name Failed parses + /// @{ + + /// \brief Returns the internal toml::parse_error. + TOML_NODISCARD + parse_error& error() & noexcept + { + TOML_ASSERT_ASSUME(err_); + return *get_as(storage_); + } + + /// \brief Returns the internal toml::parse_error (rvalue overload). + TOML_NODISCARD + parse_error&& error() && noexcept + { + TOML_ASSERT_ASSUME(err_); + return static_cast(*get_as(storage_)); + } + + /// \brief Returns the internal toml::parse_error (const lvalue overload). + TOML_NODISCARD + const parse_error& error() const& noexcept + { + TOML_ASSERT_ASSUME(err_); + return *get_as(storage_); + } + + /// \brief Returns the internal toml::parse_error. + TOML_NODISCARD + explicit operator parse_error&() noexcept + { + return error(); + } + + /// \brief Returns the internal toml::parse_error (rvalue overload). + TOML_NODISCARD + explicit operator parse_error&&() noexcept + { + return std::move(error()); + } + + /// \brief Returns the internal toml::parse_error (const lvalue overload). + TOML_NODISCARD + explicit operator const parse_error&() const noexcept + { + return error(); + } + + /// @} + + /// \name Iterators + /// @{ /// \brief Returns an iterator to the first key-value pair in the wrapped table. /// \remarks Returns a default-constructed 'nothing' iterator if the parsing failed. @@ -371,6 +326,117 @@ TOML_NAMESPACE_START return err_ ? const_table_iterator{} : table().cend(); } + /// @} + + /// \name Node views + /// @{ + + /// \brief Gets a node_view for the selected key-value pair in the wrapped table. + /// + /// \param key The key used for the lookup. + /// + /// \returns A view of the value at the given key if parsing was successful and a matching key existed, + /// or an empty node view. + /// + /// \see toml::node_view + TOML_NODISCARD + node_view operator[](std::string_view key) noexcept + { + return err_ ? node_view{} : table()[key]; + } + + /// \brief Gets a node_view for the selected key-value pair in the wrapped table (const overload). + /// + /// \param key The key used for the lookup. + /// + /// \returns A view of the value at the given key if parsing was successful and a matching key existed, + /// or an empty node view. + /// + /// \see toml::node_view + TOML_NODISCARD + node_view operator[](std::string_view key) const noexcept + { + return err_ ? node_view{} : table()[key]; + } + + /// \brief Returns a view of the subnode matching a fully-qualified "TOML path". + /// + /// \see #toml::at_path(node&, std::string_view) + TOML_NODISCARD + node_view at_path(std::string_view path) noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + + /// \brief Returns a const view of the subnode matching a fully-qualified "TOML path". + /// + /// \see #toml::at_path(node&, std::string_view) + TOML_NODISCARD + node_view at_path(std::string_view path) const noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + +#if TOML_ENABLE_WINDOWS_COMPAT + + /// \brief Gets a node_view for the selected key-value pair in the wrapped table. + /// + /// \availability This overload is only available when #TOML_ENABLE_WINDOWS_COMPAT is enabled. + /// + /// \param key The key used for the lookup. + /// + /// \returns A view of the value at the given key if parsing was successful and a matching key existed, + /// or an empty node view. + /// + /// \see toml::node_view + TOML_NODISCARD + node_view operator[](std::wstring_view key) noexcept + { + return err_ ? node_view{} : table()[key]; + } + + /// \brief Gets a node_view for the selected key-value pair in the wrapped table (const overload). + /// + /// \availability This overload is only available when #TOML_ENABLE_WINDOWS_COMPAT is enabled. + /// + /// \param key The key used for the lookup. + /// + /// \returns A view of the value at the given key if parsing was successful and a matching key existed, + /// or an empty node view. + /// + /// \see toml::node_view + TOML_NODISCARD + node_view operator[](std::wstring_view key) const noexcept + { + return err_ ? node_view{} : table()[key]; + } + + /// \brief Returns a view of the subnode matching a fully-qualified "TOML path". + /// + /// \availability This overload is only available when #TOML_ENABLE_WINDOWS_COMPAT is enabled. + /// + /// \see #toml::at_path(node&, std::string_view) + TOML_NODISCARD + node_view at_path(std::wstring_view path) noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + + /// \brief Returns a const view of the subnode matching a fully-qualified "TOML path". + /// + /// \availability This overload is only available when #TOML_ENABLE_WINDOWS_COMPAT is enabled. + /// + /// \see #toml::at_path(node&, std::string_view) + TOML_NODISCARD + node_view at_path(std::wstring_view path) const noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + +#endif // TOML_ENABLE_WINDOWS_COMPAT + + /// @} + #if TOML_ENABLE_FORMATTERS /// \brief Prints the held error or table object out to a text stream. diff --git a/include/toml++/impl/parser.inl b/include/toml++/impl/parser.inl index 0188dbb..3eca4e7 100644 --- a/include/toml++/impl/parser.inl +++ b/include/toml++/impl/parser.inl @@ -686,9 +686,9 @@ TOML_ANON_NAMESPACE_START TOML_INTERNAL_LINKAGE std::string_view to_sv(const utf8_codepoint& cp) noexcept { - if TOML_UNLIKELY(cp.value <= U'\x1F') + if (cp.value <= U'\x1F') return impl::control_char_escapes[cp.value]; - else if TOML_UNLIKELY(cp.value == U'\x7F') + else if (cp.value == U'\x7F') return "\\u007F"sv; else return std::string_view{ cp.bytes, cp.count }; @@ -703,15 +703,21 @@ TOML_ANON_NAMESPACE_START return ""sv; } + struct escaped_codepoint + { + const utf8_codepoint& cp; + }; + template TOML_ATTR(nonnull) TOML_INTERNAL_LINKAGE TOML_NEVER_INLINE void concatenate(char*& write_pos, char* const buf_end, const T& arg) noexcept { - static_assert(impl::is_one_of, std::string_view, int64_t, uint64_t, double>, - "concatenate inputs are limited to std::string_view, int64_t, uint64_t and double to keep " - "instantiations to a minimum as an anti-bloat measure (hint: to_sv will probably help)"); + static_assert( + impl::is_one_of, std::string_view, int64_t, uint64_t, double, escaped_codepoint>, + "concatenate inputs are limited to [std::string_view, int64_t, uint64_t, double, escaped_codepoint] to " + "keep instantiations at a minimum as an anti-bloat measure (hint: to_sv will probably help)"); if (write_pos >= buf_end) return; @@ -750,6 +756,25 @@ TOML_ANON_NAMESPACE_START concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); #endif } + else if constexpr (std::is_same_v) + { + if (arg.cp.value <= U'\x7F') + concatenate(write_pos, buf_end, to_sv(arg.cp)); + else + { + auto val = static_cast(arg.cp.value); + const auto digits = val > 0xFFFFu ? 8u : 4u; + constexpr auto mask = uint_least32_t{ 0xFu }; + char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; + for (auto i = 2u + digits; i-- > 2u;) + { + const auto hexdig = val & mask; + buf[i] = static_cast(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); + val >>= 4; + } + concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); + } + } else static_assert(impl::dependent_false, "Evaluated unreachable branch!"); } @@ -1143,6 +1168,9 @@ TOML_IMPL_NAMESPACE_START bool consumed = false; while (!is_eof() && is_horizontal_whitespace(*cp)) { + if TOML_UNLIKELY(!is_ascii_horizontal_whitespace(*cp)) + set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{ *cp }, "'"sv); + consumed = true; advance_and_return_if_error({}); } @@ -1155,17 +1183,19 @@ TOML_IMPL_NAMESPACE_START if TOML_UNLIKELY(is_match(*cp, U'\v', U'\f')) set_error_and_return_default( - R"(vertical tabs '\v' and form-feeds '\f' are not legal whitespace in TOML.)"sv); + R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv); if (*cp == U'\r') { advance_and_return_if_error({}); // skip \r if TOML_UNLIKELY(is_eof()) - set_error_and_return_default("expected \\n after \\r, saw EOF"sv); + set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv); if TOML_UNLIKELY(*cp != U'\n') - set_error_and_return_default("expected \\n after \\r, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected '\\n' after '\\r', saw '"sv, + escaped_codepoint{ *cp }, + "'"sv); } else if (*cp != U'\n') return false; @@ -2807,7 +2837,7 @@ TOML_IMPL_NAMESPACE_START utf8_buffered_reader::max_history_length - 2u; if TOML_UNLIKELY(!eof_while_scanning && advance_count > max_numeric_value_length) set_error_and_return_default("numeric value too long to identify type - cannot exceed "sv, - max_numeric_value_length, + static_cast(max_numeric_value_length), " characters"sv); val.reset(new value{ parse_integer<10>() }); @@ -3359,7 +3389,8 @@ TOML_IMPL_NAMESPACE_START return_after_error({}); } - // create the key first since the key buffer will likely get overritten during value parsing (inline tables) + // create the key first since the key buffer will likely get overwritten during value parsing (inline + // tables) auto last_key = make_key(key_buffer.size() - 1u); // now we can actually parse the value diff --git a/tests/user_feedback.cpp b/tests/user_feedback.cpp index 443efc6..4d136be 100644 --- a/tests/user_feedback.cpp +++ b/tests/user_feedback.cpp @@ -302,4 +302,22 @@ b = [] parsing_should_fail(FILE_LINE_ARGS, " val = 0x8000000000000000"sv); // int64_t max + 1 parse_expected_value(FILE_LINE_ARGS, " 0x7FFFFFFFFFFFFFFF"sv, INT64_MAX); } + + SECTION("github/issues/135") // https://github.com/marzer/tomlplusplus/issues/135 + { + parsing_should_succeed(FILE_LINE_ARGS, "0=0"sv); + parsing_should_succeed(FILE_LINE_ARGS, "1=1"sv); + parsing_should_succeed(FILE_LINE_ARGS, "2=2"sv); + + parsing_should_succeed(FILE_LINE_ARGS, + "0=0\n" + "1=1\n" + "2=2\n"sv); + + parsing_should_fail(FILE_LINE_ARGS, + "0=0\n" + "\u2000\u2000\n" + "1=1\n" + "2=2\n"sv); + } } diff --git a/toml.hpp b/toml.hpp index 2bbdafc..71824ce 100644 --- a/toml.hpp +++ b/toml.hpp @@ -6196,18 +6196,6 @@ TOML_NAMESPACE_START return key_.data() + key_.length(); } - TOML_PURE_INLINE_GETTER - friend const_iterator begin(const key& k) noexcept - { - return k.begin(); - } - - TOML_PURE_INLINE_GETTER - friend const_iterator end(const key& k) noexcept - { - return k.end(); - } - friend std::ostream& operator<<(std::ostream& lhs, const key& rhs) { impl::print_to_stream(lhs, rhs.key_); @@ -8406,102 +8394,6 @@ TOML_NAMESPACE_START using const_iterator = const_table_iterator; - TOML_NODISCARD - bool succeeded() const noexcept - { - return !err_; - } - - TOML_NODISCARD - bool failed() const noexcept - { - return err_; - } - - TOML_NODISCARD - explicit operator bool() const noexcept - { - return !err_; - } - - TOML_NODISCARD - toml::table& table() & noexcept - { - TOML_ASSERT_ASSUME(!err_); - return *get_as(storage_); - } - - TOML_NODISCARD - toml::table&& table() && noexcept - { - TOML_ASSERT_ASSUME(!err_); - return static_cast(*get_as(storage_)); - } - - TOML_NODISCARD - const toml::table& table() const& noexcept - { - TOML_ASSERT_ASSUME(!err_); - return *get_as(storage_); - } - - TOML_NODISCARD - parse_error& error() & noexcept - { - TOML_ASSERT_ASSUME(err_); - return *get_as(storage_); - } - - TOML_NODISCARD - parse_error&& error() && noexcept - { - TOML_ASSERT_ASSUME(err_); - return static_cast(*get_as(storage_)); - } - - TOML_NODISCARD - const parse_error& error() const& noexcept - { - TOML_ASSERT_ASSUME(err_); - return *get_as(storage_); - } - - TOML_NODISCARD - operator toml::table&() noexcept - { - return table(); - } - - TOML_NODISCARD - operator toml::table&&() noexcept - { - return std::move(table()); - } - - TOML_NODISCARD - operator const toml::table&() const noexcept - { - return table(); - } - - TOML_NODISCARD - explicit operator parse_error&() noexcept - { - return error(); - } - - TOML_NODISCARD - explicit operator parse_error&&() noexcept - { - return std::move(error()); - } - - TOML_NODISCARD - explicit operator const parse_error&() const noexcept - { - return error(); - } - TOML_NODISCARD_CTOR parse_result() noexcept // : err_{ true } @@ -8560,32 +8452,100 @@ TOML_NAMESPACE_START } TOML_NODISCARD - node_view operator[](std::string_view key) noexcept + bool succeeded() const noexcept { - return err_ ? node_view{} : table()[key]; + return !err_; } TOML_NODISCARD - node_view operator[](std::string_view key) const noexcept + bool failed() const noexcept { - return err_ ? node_view{} : table()[key]; - } - -#if TOML_ENABLE_WINDOWS_COMPAT - - TOML_NODISCARD - node_view operator[](std::wstring_view key) noexcept - { - return err_ ? node_view{} : table()[key]; + return err_; } TOML_NODISCARD - node_view operator[](std::wstring_view key) const noexcept + explicit operator bool() const noexcept { - return err_ ? node_view{} : table()[key]; + return !err_; } -#endif // TOML_ENABLE_WINDOWS_COMPAT + TOML_NODISCARD + toml::table& table() & noexcept + { + TOML_ASSERT_ASSUME(!err_); + return *get_as(storage_); + } + + TOML_NODISCARD + toml::table&& table() && noexcept + { + TOML_ASSERT_ASSUME(!err_); + return static_cast(*get_as(storage_)); + } + + TOML_NODISCARD + const toml::table& table() const& noexcept + { + TOML_ASSERT_ASSUME(!err_); + return *get_as(storage_); + } + + TOML_NODISCARD + /* implicit */ operator toml::table&() noexcept + { + return table(); + } + + TOML_NODISCARD + /* implicit */ operator toml::table&&() noexcept + { + return std::move(table()); + } + + TOML_NODISCARD + /* implicit */ operator const toml::table&() const noexcept + { + return table(); + } + + TOML_NODISCARD + parse_error& error() & noexcept + { + TOML_ASSERT_ASSUME(err_); + return *get_as(storage_); + } + + TOML_NODISCARD + parse_error&& error() && noexcept + { + TOML_ASSERT_ASSUME(err_); + return static_cast(*get_as(storage_)); + } + + TOML_NODISCARD + const parse_error& error() const& noexcept + { + TOML_ASSERT_ASSUME(err_); + return *get_as(storage_); + } + + TOML_NODISCARD + explicit operator parse_error&() noexcept + { + return error(); + } + + TOML_NODISCARD + explicit operator parse_error&&() noexcept + { + return std::move(error()); + } + + TOML_NODISCARD + explicit operator const parse_error&() const noexcept + { + return error(); + } TOML_NODISCARD table_iterator begin() noexcept @@ -8623,6 +8583,58 @@ TOML_NAMESPACE_START return err_ ? const_table_iterator{} : table().cend(); } + TOML_NODISCARD + node_view operator[](std::string_view key) noexcept + { + return err_ ? node_view{} : table()[key]; + } + + TOML_NODISCARD + node_view operator[](std::string_view key) const noexcept + { + return err_ ? node_view{} : table()[key]; + } + + TOML_NODISCARD + node_view at_path(std::string_view path) noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + + TOML_NODISCARD + node_view at_path(std::string_view path) const noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + +#if TOML_ENABLE_WINDOWS_COMPAT + + TOML_NODISCARD + node_view operator[](std::wstring_view key) noexcept + { + return err_ ? node_view{} : table()[key]; + } + + TOML_NODISCARD + node_view operator[](std::wstring_view key) const noexcept + { + return err_ ? node_view{} : table()[key]; + } + + TOML_NODISCARD + node_view at_path(std::wstring_view path) noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + + TOML_NODISCARD + node_view at_path(std::wstring_view path) const noexcept + { + return err_ ? node_view{} : table().at_path(path); + } + +#endif // TOML_ENABLE_WINDOWS_COMPAT + #if TOML_ENABLE_FORMATTERS friend std::ostream& operator<<(std::ostream& os, const parse_result& result) @@ -11693,9 +11705,9 @@ TOML_ANON_NAMESPACE_START TOML_INTERNAL_LINKAGE std::string_view to_sv(const utf8_codepoint& cp) noexcept { - if TOML_UNLIKELY(cp.value <= U'\x1F') + if (cp.value <= U'\x1F') return impl::control_char_escapes[cp.value]; - else if TOML_UNLIKELY(cp.value == U'\x7F') + else if (cp.value == U'\x7F') return "\\u007F"sv; else return std::string_view{ cp.bytes, cp.count }; @@ -11710,15 +11722,21 @@ TOML_ANON_NAMESPACE_START return ""sv; } + struct escaped_codepoint + { + const utf8_codepoint& cp; + }; + template TOML_ATTR(nonnull) TOML_INTERNAL_LINKAGE TOML_NEVER_INLINE void concatenate(char*& write_pos, char* const buf_end, const T& arg) noexcept { - static_assert(impl::is_one_of, std::string_view, int64_t, uint64_t, double>, - "concatenate inputs are limited to std::string_view, int64_t, uint64_t and double to keep " - "instantiations to a minimum as an anti-bloat measure (hint: to_sv will probably help)"); + static_assert( + impl::is_one_of, std::string_view, int64_t, uint64_t, double, escaped_codepoint>, + "concatenate inputs are limited to [std::string_view, int64_t, uint64_t, double, escaped_codepoint] to " + "keep instantiations at a minimum as an anti-bloat measure (hint: to_sv will probably help)"); if (write_pos >= buf_end) return; @@ -11757,6 +11775,25 @@ TOML_ANON_NAMESPACE_START concatenate(write_pos, buf_end, to_sv(std::move(ss).str())); #endif } + else if constexpr (std::is_same_v) + { + if (arg.cp.value <= U'\x7F') + concatenate(write_pos, buf_end, to_sv(arg.cp)); + else + { + auto val = static_cast(arg.cp.value); + const auto digits = val > 0xFFFFu ? 8u : 4u; + constexpr auto mask = uint_least32_t{ 0xFu }; + char buf[10] = { '\\', digits > 4 ? 'U' : 'u' }; + for (auto i = 2u + digits; i-- > 2u;) + { + const auto hexdig = val & mask; + buf[i] = static_cast(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig)); + val >>= 4; + } + concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u }); + } + } else static_assert(impl::dependent_false, "Evaluated unreachable branch!"); } @@ -12150,6 +12187,9 @@ TOML_IMPL_NAMESPACE_START bool consumed = false; while (!is_eof() && is_horizontal_whitespace(*cp)) { + if TOML_UNLIKELY(!is_ascii_horizontal_whitespace(*cp)) + set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{ *cp }, "'"sv); + consumed = true; advance_and_return_if_error({}); } @@ -12162,17 +12202,19 @@ TOML_IMPL_NAMESPACE_START if TOML_UNLIKELY(is_match(*cp, U'\v', U'\f')) set_error_and_return_default( - R"(vertical tabs '\v' and form-feeds '\f' are not legal whitespace in TOML.)"sv); + R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv); if (*cp == U'\r') { advance_and_return_if_error({}); // skip \r if TOML_UNLIKELY(is_eof()) - set_error_and_return_default("expected \\n after \\r, saw EOF"sv); + set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv); if TOML_UNLIKELY(*cp != U'\n') - set_error_and_return_default("expected \\n after \\r, saw '"sv, to_sv(*cp), "'"sv); + set_error_and_return_default("expected '\\n' after '\\r', saw '"sv, + escaped_codepoint{ *cp }, + "'"sv); } else if (*cp != U'\n') return false; @@ -13813,7 +13855,7 @@ TOML_IMPL_NAMESPACE_START utf8_buffered_reader::max_history_length - 2u; if TOML_UNLIKELY(!eof_while_scanning && advance_count > max_numeric_value_length) set_error_and_return_default("numeric value too long to identify type - cannot exceed "sv, - max_numeric_value_length, + static_cast(max_numeric_value_length), " characters"sv); val.reset(new value{ parse_integer<10>() }); @@ -14365,7 +14407,8 @@ TOML_IMPL_NAMESPACE_START return_after_error({}); } - // create the key first since the key buffer will likely get overritten during value parsing (inline tables) + // create the key first since the key buffer will likely get overwritten during value parsing (inline + // tables) auto last_key = make_key(key_buffer.size() - 1u); // now we can actually parse the value