diff --git a/examples/error_printer.cpp b/examples/error_printer.cpp new file mode 100644 index 0000000..df4bc5b --- /dev/null +++ b/examples/error_printer.cpp @@ -0,0 +1,118 @@ +// This file is a part of toml++ and is subject to the the terms of the MIT license. +// Copyright (c) 2019-2020 Mark Gillard +// See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. +// SPDX-License-Identifier: MIT +/* + + This example is one of diagnostics; it forces a set of specific parsing + failures and prints their error messages to stdout so you can see what the + default error messages look like. + +*/ +#include +#include "utf8_console.h" +#define TOML_EXCEPTIONS 0 +#define TOML_UNRELEASED_FEATURES 0 +#include +using namespace std::string_view_literals; +using toml::operator""_sz; + +namespace +{ + inline constexpr std::string_view invalid_parses[] = + { + "########## comments"sv, + "# bar\rkek"sv, + "# bar\bkek"sv, + + "########## inline tables"sv, + "val = {,}"sv, + "val = {a='b',}"sv, // allowed when TOML_UNRELEASED_FEATURES == 1 + "val = {a='b',,}"sv, + "val = {a='b',"sv, + "val = {a='b',\n c='d'}"sv, // allowed when TOML_UNRELEASED_FEATURES == 1 + "val = {?='b'}"sv, + + "########## tables"sv, + "[foo"sv, + "[foo] ?"sv, + "[foo] [bar]"sv, + "[foo]\n[foo]"sv, + "? = 'foo' ?"sv, + + "########## arrays"sv, + "val = [,]"sv, + "val = ['a',,]"sv, + "val = ['a',"sv, + + "########## key-value pairs"sv, + "val = 'foo' ?"sv, + "val = "sv, + "val "sv, + "val ?"sv, + "val = ]"sv, + "[foo]\nbar = 'kek'\nbar = 'kek2'"sv, + "[foo]\nbar = 'kek'\nbar = 7"sv, + "[foo.bar]\n[foo]\nbar = 'kek'"sv, + "[foo]\nbar = 'kek'\nbar.kek = 7"sv, + "[foo]\nbar.? = 'kek'"sv, + + "########## values"sv, + "val = _"sv, + "val = G"sv, + + "########## strings"sv, + "val = \" \r \""sv, + R"(val = ")"sv, + R"(val = "\g")"sv, + R"(val = "\x20")"sv, // allowed when TOML_UNRELEASED_FEATURES == 1 + R"(val = "\uFFF")"sv, + R"(val = "\uFFFG")"sv, + R"(val = "\UFFFFFFF")"sv, + R"(val = "\UFFFFFGF")"sv, + R"(val = "\uD801")"sv, + R"(val = "\U00110000")"sv, + R"(val = """ """""")"sv, + R"(val = ''' '''''')"sv, + "val = '\n'"sv, + }; +} + + +int main(int /*argc*/, char** /*argv*/) +{ + std::ios_base::sync_with_stdio(false); + init_utf8_console(); + + for (auto str : invalid_parses) + { + if (str.empty()) + continue; + + if (str.substr(0_sz, 10_sz) == "##########"sv) + { + const auto substr = str.substr(11_sz); + size_t cols = 80_sz; + for (size_t i = (cols - substr.length()) / 2_sz - 1_sz; i-- > 0_sz; ) + { + std::cout.put('#'); + cols--; + } + std::cout.put(' '); + std::cout << substr; + std::cout.put(' '); + cols -= substr.length() + 2_sz; + while (cols--) + std::cout.put('#'); + } + else + { + auto result = toml::parse(str); + if (!result) + std::cout << result.error(); + } + + std::cout << "\n\n"sv; + } + return 0; +} diff --git a/examples/meson.build b/examples/meson.build index a0604df..28eced0 100644 --- a/examples/meson.build +++ b/examples/meson.build @@ -24,3 +24,10 @@ toml_generator = executable( include_directories : inc, cpp_args : args ) + +error_printer = executable( + 'error_printer', + [ 'error_printer.cpp' ], + include_directories : inc, + cpp_args : args +) diff --git a/examples/simple_parser.cpp b/examples/simple_parser.cpp index 3bdc009..1fee906 100644 --- a/examples/simple_parser.cpp +++ b/examples/simple_parser.cpp @@ -17,6 +17,7 @@ using namespace std::string_view_literals; int main(int argc, char** argv) { + std::ios_base::sync_with_stdio(false); init_utf8_console(); auto path = std::string{ argc > 1 ? argv[1] : "example.toml" }; diff --git a/examples/toml_generator.cpp b/examples/toml_generator.cpp index c77f13c..870db8d 100644 --- a/examples/toml_generator.cpp +++ b/examples/toml_generator.cpp @@ -108,6 +108,7 @@ namespace int main(int argc, char** argv) { + std::ios_base::sync_with_stdio(false); init_utf8_console(); srand(static_cast(time(nullptr))); diff --git a/examples/toml_to_json_transcoder.cpp b/examples/toml_to_json_transcoder.cpp index fb5f074..39fef07 100644 --- a/examples/toml_to_json_transcoder.cpp +++ b/examples/toml_to_json_transcoder.cpp @@ -17,6 +17,7 @@ using namespace std::string_view_literals; int main(int argc, char** argv) { + std::ios_base::sync_with_stdio(false); init_utf8_console(); // read from a file if a path argument is given diff --git a/include/toml++/toml.h b/include/toml++/toml.h index 8dcb099..906130b 100644 --- a/include/toml++/toml.h +++ b/include/toml++/toml.h @@ -3,7 +3,10 @@ //# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. // SPDX-License-Identifier: MIT -#pragma once +//# {{ +#ifndef INCLUDE_TOMLPLUSPLUS_H +#define INCLUDE_TOMLPLUSPLUS_H +//# }} //# Note: most of these would be included transitively but //# they're listed explicitly here because this file @@ -489,3 +492,7 @@ /// - Facebook: [marzer](https://www.facebook.com/marzer) /// - LinkedIn: [marzer](https://www.linkedin.com/in/marzer/) /// + +//# {{ +#endif // INCLUDE_TOMLPLUSPLUS_H +//# }} diff --git a/include/toml++/toml_parser.hpp b/include/toml++/toml_parser.hpp index 506fb83..60faf9d 100644 --- a/include/toml++/toml_parser.hpp +++ b/include/toml++/toml_parser.hpp @@ -155,15 +155,25 @@ namespace TOML_INTERNAL_NAMESPACE struct parse_scope final { - std::vector& stack_; + std::string_view& storage_; + std::string_view parent_; TOML_NODISCARD_CTOR - explicit parse_scope(std::vector& stack) noexcept : stack_{ stack } {} - ~parse_scope() noexcept { stack_.pop_back(); } + explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept + : storage_{ current_scope }, + parent_{ current_scope } + { + storage_ = new_scope; + } + + ~parse_scope() noexcept + { + storage_ = parent_; + } }; - #define push_parse_scope_2(desc, line) scope_stack.push_back(desc); parse_scope ps_##line{ scope_stack } - #define push_parse_scope_1(desc, line) push_parse_scope_2(desc, line) - #define push_parse_scope(desc) push_parse_scope_1(desc, __LINE__) + #define push_parse_scope_2(scope, line) parse_scope ps_##line{ current_scope, scope } + #define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line) + #define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__) struct parsed_key final { @@ -247,8 +257,8 @@ namespace toml::impl std::vector dotted_key_tables; std::vector table_arrays; std::string recording_buffer; //for diagnostics - bool recording = false; - std::vector scope_stack; + bool recording = false, recording_whitespace = true; + std::string_view current_scope; #if !TOML_EXCEPTIONS mutable optional err; #endif @@ -271,14 +281,13 @@ namespace toml::impl return; #endif - const auto current_scope = scope_stack.empty() ? ""sv : scope_stack.back(); static constexpr auto buf_size = 512_sz; char buf[buf_size]; auto write_pos = buf; const auto max_write_pos = buf + (buf_size - 1_sz); //allow for null terminator concatenate(write_pos, max_write_pos, "Error while parsing "sv); concatenate(write_pos, max_write_pos, current_scope); - concatenate(write_pos, max_write_pos, "; "sv); + concatenate(write_pos, max_write_pos, ": "sv); (concatenate(write_pos, max_write_pos, reason), ...); *write_pos = '\0'; #if TOML_EXCEPTIONS @@ -326,7 +335,10 @@ namespace toml::impl #endif if (recording && !is_eof()) - recording_buffer.append(cp->as_view()); + { + if (recording_whitespace || !(is_whitespace(*cp) || is_line_break(*cp))) + recording_buffer.append(cp->as_view()); + } } void start_recording(bool include_current = true) noexcept @@ -334,6 +346,7 @@ namespace toml::impl return_if_error(); recording = true; + recording_whitespace = true; recording_buffer.clear(); if (include_current && !is_eof()) recording_buffer.append(cp->as_view()); @@ -348,6 +361,8 @@ namespace toml::impl { if (pop_bytes >= recording_buffer.length()) recording_buffer.clear(); + else if (pop_bytes == 1_sz) + recording_buffer.pop_back(); else recording_buffer.erase( recording_buffer.begin() + static_cast(recording_buffer.length() - pop_bytes), @@ -427,16 +442,13 @@ namespace toml::impl // toml/issues/567 (disallow non-TAB control characters in comments) if (is_nontab_control_character(*cp)) set_error_and_return_default( - "control characters " - "other than TAB (U+0009) are explicitly prohibited from appearing " - "in comments."sv + "control characters other than TAB (U+0009) are explicitly prohibited"sv ); // toml/pull/720 (disallow surrogates in comments) else if (is_unicode_surrogate(*cp)) set_error_and_return_default( - "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited " - "from appearing in comments."sv + "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv ); } advance_and_return_if_error({}); @@ -542,12 +554,9 @@ namespace toml::impl } } - // skip the escaped character - const auto escaped_codepoint = cp->value; - advance_and_return_if_error_or_eof({}); - + bool skipped_escaped_codepoint = false; assert_not_eof(); - switch (escaped_codepoint) + switch (const auto escaped_codepoint = *cp) { // 'regular' escape codes case U'b': str += TOML_STRING_PREFIX('\b'); break; @@ -563,7 +572,7 @@ namespace toml::impl if constexpr (!TOML_LANG_UNRELEASED) // toml/pull/709 (\xHH unicode scalar sequences) { set_error_and_return_default( - "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier."sv + "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv ); } [[fallthrough]]; @@ -571,13 +580,18 @@ namespace toml::impl case U'U': { push_parse_scope("unicode scalar escape sequence"sv); - uint32_t place_value = escaped_codepoint == U'U' ? 0x10000000u : (escaped_codepoint == U'u' ? 0x1000u : 0x10u); + advance_and_return_if_error_or_eof({}); + skipped_escaped_codepoint = true; + + uint32_t place_value = escaped_codepoint == U'U' + ? 0x10000000u + : (escaped_codepoint == U'u' ? 0x1000u : 0x10u); uint32_t sequence_value{}; while (place_value) { set_error_and_return_if_eof({}); if (!is_hexadecimal_digit(*cp)) - set_error_and_return_default("expected hex digit, saw '\\"sv, *cp, "'"sv); + set_error_and_return_default("expected hex digit, saw '"sv, *cp, "'"sv); sequence_value += place_value * hex_to_dec(*cp); place_value /= 16u; advance_and_return_if_error({}); @@ -585,10 +599,10 @@ namespace toml::impl if (is_unicode_surrogate(sequence_value)) set_error_and_return_default( - "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited."sv + "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv ); else if (sequence_value > 0x10FFFFu) - set_error_and_return_default("values greater than U+10FFFF are invalid."sv); + set_error_and_return_default("values greater than U+10FFFF are invalid"sv); else if (sequence_value <= 0x7Fu) //ascii str += static_cast(sequence_value & 0x7Fu); else if (sequence_value <= 0x7FFu) @@ -616,6 +630,10 @@ namespace toml::impl default: set_error_and_return_default("unknown escape sequence '\\"sv, *cp, "'"sv); } + + // skip the escaped character + if (!skipped_escaped_codepoint) + advance_and_return_if_error_or_eof({}); } else TOML_LIKELY { @@ -679,7 +697,7 @@ namespace toml::impl // handle escapes else if (*cp == U'\\') { - advance_and_return_if_error({}); // skip the '\' + advance_and_return_if_error_or_eof({}); // skip the '\' skipping_whitespace = false; escaped = true; continue; @@ -701,7 +719,7 @@ namespace toml::impl // handle control characters if (is_nontab_control_character(*cp)) set_error_and_return_default( - "unescaped control characters other than TAB (U+0009) are explicitly prohibited."sv + "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv ); // handle surrogates in strings (1.0.0 and later) @@ -709,7 +727,7 @@ namespace toml::impl { if (is_unicode_surrogate(*cp)) set_error_and_return_default( - "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited."sv + "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv ); } @@ -826,7 +844,7 @@ namespace toml::impl // handle control characters if (is_nontab_control_character(*cp)) set_error_and_return_default( - "control characters other than TAB (U+0009) are explicitly prohibited."sv + "control characters other than TAB (U+0009) are explicitly prohibited"sv ); // handle surrogates in strings (1.0.0 and later) @@ -834,7 +852,7 @@ namespace toml::impl { if (is_unicode_surrogate(*cp)) set_error_and_return_default( - "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited."sv + "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv ); } @@ -1661,7 +1679,7 @@ namespace toml::impl if (!val->ref_cast().is_homogeneous()) set_error_at( begin_pos, - "arrays cannot contain values of different types before TOML 1.0.0." + "arrays cannot contain values of different types before TOML 1.0.0" ); } } @@ -1688,7 +1706,7 @@ namespace toml::impl // underscores at the beginning else if (*cp == U'_') - set_error_and_return_default("values may not begin with underscores."sv); + set_error_and_return_default("values may not begin with underscores"sv); return_if_error({}); if (val) @@ -2124,6 +2142,7 @@ namespace toml::impl push_parse_scope("key"sv); parsed_key key; + recording_whitespace = false; while (!is_error()) { @@ -2139,7 +2158,11 @@ namespace toml::impl // "quoted key segment" else if (is_string_delimiter(*cp)) + { + recording_whitespace = true; key.segments.push_back(parse_string()); + recording_whitespace = false; + } // ??? else @@ -2151,14 +2174,8 @@ namespace toml::impl consume_leading_whitespace(); // eof or no more key to come - if (is_eof()) + if (is_eof() || *cp != U'.') break; - if (*cp != U'.') - { - if (recording) - stop_recording(1_sz); - break; - } // was a dotted key, so go around again to consume the next segment advance_and_return_if_error_or_eof({}); @@ -2179,7 +2196,8 @@ namespace toml::impl // get the key start_recording(); - auto key = parse_key(); //parse_key() calls stop_recording() + auto key = parse_key(); + stop_recording(1_sz); // skip past any whitespace that followed the key consume_leading_whitespace(); @@ -2232,7 +2250,8 @@ namespace toml::impl // get the actual key start_recording(); - key = parse_key(); //parse_key() calls stop_recording() + key = parse_key(); + stop_recording(1_sz); return_if_error({}); // skip past any whitespace that followed the key @@ -2452,6 +2471,8 @@ namespace toml::impl // "quoted keys" else if (is_bare_key_character(*cp) || is_string_delimiter(*cp)) { + push_parse_scope("key-value pair"sv); + parse_key_value_pair_and_insert(current_table); // handle the rest of the line after the kvp @@ -2530,10 +2551,7 @@ namespace toml::impl #endif if (cp) - { - scope_stack.reserve(20_sz); parse_document(); - } update_region_ends(root); } diff --git a/python/generate_single_header.py b/python/generate_single_header.py index 5994e83..6ed5a63 100644 --- a/python/generate_single_header.py +++ b/python/generate_single_header.py @@ -53,9 +53,6 @@ class Preprocessor: self.processed_includes.append(incl) text = read_all_text_from_file(path.join(get_script_folder(), '..', 'include', 'toml++', incl)) text = re.sub(r'//[#!]\s*[{][{].*?//[#!]\s*[}][}]', '', text, 0, re.I | re.S) - text = re.sub(r'^\s*#\s*pragma\s+once\s*$', '', text, 0, re.I | re.M) - text = re.sub(r'^\s*//\s*clang-format\s+.+?$', '', text, 0, re.I | re.M) - text = re.sub(r'^\s*//\s*SPDX-License-Identifier:.+?$', '', text, 0, re.I | re.M) self.current_level += 1 text = re.sub(r'^\s*#\s*include\s+"(.+?)"', lambda m : self.preprocess(m), text, 0, re.I | re.M) self.current_level -= 1 @@ -83,6 +80,9 @@ def main(): # preprocess header(s) source_text = Preprocessor()('toml.h') source_text = re.sub('\r\n', '\n', source_text, 0, re.I | re.M) # convert windows newlines + source_text = re.sub(r'^\s*#\s*pragma\s+once\s*$', '', source_text, 0, re.I | re.M) # 'pragma once' + source_text = re.sub(r'^\s*//\s*clang-format\s+.+?$', '', source_text, 0, re.I | re.M) # clang-format directives + source_text = re.sub(r'^\s*//\s*SPDX-License-Identifier:.+?$', '', source_text, 0, re.I | re.M) # spdx source_text = re.sub('(?:(?:\n|^)[ \t]*//[/#!<]+[^\n]*)+\n', '\n', source_text, 0, re.I | re.M) # remove 'magic' comment blocks source_text = re.sub('(?://[/#!<].*?)\n', '\n', source_text, 0, re.I | re.M) # remove 'magic' comments source_text = re.sub('([^ \t])[ \t]+\n', '\\1\n', source_text, 0, re.I | re.M) # remove trailing whitespace @@ -169,8 +169,8 @@ v0.5.0: https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.5 print('//', file=output_file) print(make_divider(), file=output_file) print('''// clang-format off -#ifndef TOMLPLUSPLUS_SINGLE_HEADER_H -#define TOMLPLUSPLUS_SINGLE_HEADER_H +#ifndef INCLUDE_TOMLPLUSPLUS_H +#define INCLUDE_TOMLPLUSPLUS_H #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-pragmas" @@ -182,7 +182,7 @@ v0.5.0: https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.5 #ifdef __GNUC__ #pragma GCC diagnostic pop #endif -#endif // TOMLPLUSPLUS_SINGLE_HEADER_H +#endif // INCLUDE_TOMLPLUSPLUS_H // clang-format on''', file=output_file) diff --git a/toml.hpp b/toml.hpp index fc3e052..55ae6d5 100644 --- a/toml.hpp +++ b/toml.hpp @@ -42,8 +42,8 @@ // //---------------------------------------------------------------------------------------------------------------------- // clang-format off -#ifndef TOMLPLUSPLUS_SINGLE_HEADER_H -#define TOMLPLUSPLUS_SINGLE_HEADER_H +#ifndef INCLUDE_TOMLPLUSPLUS_H +#define INCLUDE_TOMLPLUSPLUS_H #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-pragmas" @@ -6802,15 +6802,25 @@ namespace TOML_INTERNAL_NAMESPACE struct parse_scope final { - std::vector& stack_; + std::string_view& storage_; + std::string_view parent_; TOML_NODISCARD_CTOR - explicit parse_scope(std::vector& stack) noexcept : stack_{ stack } {} - ~parse_scope() noexcept { stack_.pop_back(); } + explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept + : storage_{ current_scope }, + parent_{ current_scope } + { + storage_ = new_scope; + } + + ~parse_scope() noexcept + { + storage_ = parent_; + } }; - #define push_parse_scope_2(desc, line) scope_stack.push_back(desc); parse_scope ps_##line{ scope_stack } - #define push_parse_scope_1(desc, line) push_parse_scope_2(desc, line) - #define push_parse_scope(desc) push_parse_scope_1(desc, __LINE__) + #define push_parse_scope_2(scope, line) parse_scope ps_##line{ current_scope, scope } + #define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line) + #define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__) struct parsed_key final { @@ -6894,8 +6904,8 @@ namespace toml::impl std::vector dotted_key_tables; std::vector table_arrays; std::string recording_buffer; //for diagnostics - bool recording = false; - std::vector scope_stack; + bool recording = false, recording_whitespace = true; + std::string_view current_scope; #if !TOML_EXCEPTIONS mutable optional err; #endif @@ -6918,14 +6928,13 @@ namespace toml::impl return; #endif - const auto current_scope = scope_stack.empty() ? ""sv : scope_stack.back(); static constexpr auto buf_size = 512_sz; char buf[buf_size]; auto write_pos = buf; const auto max_write_pos = buf + (buf_size - 1_sz); //allow for null terminator concatenate(write_pos, max_write_pos, "Error while parsing "sv); concatenate(write_pos, max_write_pos, current_scope); - concatenate(write_pos, max_write_pos, "; "sv); + concatenate(write_pos, max_write_pos, ": "sv); (concatenate(write_pos, max_write_pos, reason), ...); *write_pos = '\0'; #if TOML_EXCEPTIONS @@ -6973,7 +6982,10 @@ namespace toml::impl #endif if (recording && !is_eof()) - recording_buffer.append(cp->as_view()); + { + if (recording_whitespace || !(is_whitespace(*cp) || is_line_break(*cp))) + recording_buffer.append(cp->as_view()); + } } void start_recording(bool include_current = true) noexcept @@ -6981,6 +6993,7 @@ namespace toml::impl return_if_error(); recording = true; + recording_whitespace = true; recording_buffer.clear(); if (include_current && !is_eof()) recording_buffer.append(cp->as_view()); @@ -6995,6 +7008,8 @@ namespace toml::impl { if (pop_bytes >= recording_buffer.length()) recording_buffer.clear(); + else if (pop_bytes == 1_sz) + recording_buffer.pop_back(); else recording_buffer.erase( recording_buffer.begin() + static_cast(recording_buffer.length() - pop_bytes), @@ -7074,16 +7089,13 @@ namespace toml::impl // toml/issues/567 (disallow non-TAB control characters in comments) if (is_nontab_control_character(*cp)) set_error_and_return_default( - "control characters " - "other than TAB (U+0009) are explicitly prohibited from appearing " - "in comments."sv + "control characters other than TAB (U+0009) are explicitly prohibited"sv ); // toml/pull/720 (disallow surrogates in comments) else if (is_unicode_surrogate(*cp)) set_error_and_return_default( - "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited " - "from appearing in comments."sv + "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv ); } advance_and_return_if_error({}); @@ -7189,12 +7201,9 @@ namespace toml::impl } } - // skip the escaped character - const auto escaped_codepoint = cp->value; - advance_and_return_if_error_or_eof({}); - + bool skipped_escaped_codepoint = false; assert_not_eof(); - switch (escaped_codepoint) + switch (const auto escaped_codepoint = *cp) { // 'regular' escape codes case U'b': str += TOML_STRING_PREFIX('\b'); break; @@ -7210,7 +7219,7 @@ namespace toml::impl if constexpr (!TOML_LANG_UNRELEASED) // toml/pull/709 (\xHH unicode scalar sequences) { set_error_and_return_default( - "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier."sv + "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv ); } [[fallthrough]]; @@ -7218,13 +7227,18 @@ namespace toml::impl case U'U': { push_parse_scope("unicode scalar escape sequence"sv); - uint32_t place_value = escaped_codepoint == U'U' ? 0x10000000u : (escaped_codepoint == U'u' ? 0x1000u : 0x10u); + advance_and_return_if_error_or_eof({}); + skipped_escaped_codepoint = true; + + uint32_t place_value = escaped_codepoint == U'U' + ? 0x10000000u + : (escaped_codepoint == U'u' ? 0x1000u : 0x10u); uint32_t sequence_value{}; while (place_value) { set_error_and_return_if_eof({}); if (!is_hexadecimal_digit(*cp)) - set_error_and_return_default("expected hex digit, saw '\\"sv, *cp, "'"sv); + set_error_and_return_default("expected hex digit, saw '"sv, *cp, "'"sv); sequence_value += place_value * hex_to_dec(*cp); place_value /= 16u; advance_and_return_if_error({}); @@ -7232,10 +7246,10 @@ namespace toml::impl if (is_unicode_surrogate(sequence_value)) set_error_and_return_default( - "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited."sv + "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv ); else if (sequence_value > 0x10FFFFu) - set_error_and_return_default("values greater than U+10FFFF are invalid."sv); + set_error_and_return_default("values greater than U+10FFFF are invalid"sv); else if (sequence_value <= 0x7Fu) //ascii str += static_cast(sequence_value & 0x7Fu); else if (sequence_value <= 0x7FFu) @@ -7263,6 +7277,10 @@ namespace toml::impl default: set_error_and_return_default("unknown escape sequence '\\"sv, *cp, "'"sv); } + + // skip the escaped character + if (!skipped_escaped_codepoint) + advance_and_return_if_error_or_eof({}); } else TOML_LIKELY { @@ -7326,7 +7344,7 @@ namespace toml::impl // handle escapes else if (*cp == U'\\') { - advance_and_return_if_error({}); // skip the '\' + advance_and_return_if_error_or_eof({}); // skip the '\' skipping_whitespace = false; escaped = true; continue; @@ -7348,7 +7366,7 @@ namespace toml::impl // handle control characters if (is_nontab_control_character(*cp)) set_error_and_return_default( - "unescaped control characters other than TAB (U+0009) are explicitly prohibited."sv + "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv ); // handle surrogates in strings (1.0.0 and later) @@ -7356,7 +7374,7 @@ namespace toml::impl { if (is_unicode_surrogate(*cp)) set_error_and_return_default( - "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited."sv + "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv ); } @@ -7473,7 +7491,7 @@ namespace toml::impl // handle control characters if (is_nontab_control_character(*cp)) set_error_and_return_default( - "control characters other than TAB (U+0009) are explicitly prohibited."sv + "control characters other than TAB (U+0009) are explicitly prohibited"sv ); // handle surrogates in strings (1.0.0 and later) @@ -7481,7 +7499,7 @@ namespace toml::impl { if (is_unicode_surrogate(*cp)) set_error_and_return_default( - "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited."sv + "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv ); } @@ -8306,7 +8324,7 @@ namespace toml::impl if (!val->ref_cast().is_homogeneous()) set_error_at( begin_pos, - "arrays cannot contain values of different types before TOML 1.0.0." + "arrays cannot contain values of different types before TOML 1.0.0" ); } } @@ -8333,7 +8351,7 @@ namespace toml::impl // underscores at the beginning else if (*cp == U'_') - set_error_and_return_default("values may not begin with underscores."sv); + set_error_and_return_default("values may not begin with underscores"sv); return_if_error({}); if (val) @@ -8769,6 +8787,7 @@ namespace toml::impl push_parse_scope("key"sv); parsed_key key; + recording_whitespace = false; while (!is_error()) { @@ -8784,7 +8803,11 @@ namespace toml::impl // "quoted key segment" else if (is_string_delimiter(*cp)) + { + recording_whitespace = true; key.segments.push_back(parse_string()); + recording_whitespace = false; + } // ??? else @@ -8796,14 +8819,8 @@ namespace toml::impl consume_leading_whitespace(); // eof or no more key to come - if (is_eof()) + if (is_eof() || *cp != U'.') break; - if (*cp != U'.') - { - if (recording) - stop_recording(1_sz); - break; - } // was a dotted key, so go around again to consume the next segment advance_and_return_if_error_or_eof({}); @@ -8824,7 +8841,8 @@ namespace toml::impl // get the key start_recording(); - auto key = parse_key(); //parse_key() calls stop_recording() + auto key = parse_key(); + stop_recording(1_sz); // skip past any whitespace that followed the key consume_leading_whitespace(); @@ -8877,7 +8895,8 @@ namespace toml::impl // get the actual key start_recording(); - key = parse_key(); //parse_key() calls stop_recording() + key = parse_key(); + stop_recording(1_sz); return_if_error({}); // skip past any whitespace that followed the key @@ -9096,6 +9115,8 @@ namespace toml::impl // "quoted keys" else if (is_bare_key_character(*cp) || is_string_delimiter(*cp)) { + push_parse_scope("key-value pair"sv); + parse_key_value_pair_and_insert(current_table); // handle the rest of the line after the kvp @@ -9173,10 +9194,7 @@ namespace toml::impl #endif if (cp) - { - scope_stack.reserve(20_sz); parse_document(); - } update_region_ends(root); } @@ -9607,5 +9625,5 @@ namespace toml #ifdef __GNUC__ #pragma GCC diagnostic pop #endif -#endif // TOMLPLUSPLUS_SINGLE_HEADER_H +#endif // INCLUDE_TOMLPLUSPLUS_H // clang-format on diff --git a/vs/error_printer.vcxproj b/vs/error_printer.vcxproj new file mode 100644 index 0000000..c81021e --- /dev/null +++ b/vs/error_printer.vcxproj @@ -0,0 +1,71 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 16.0 + {DAB4634D-8145-4860-AE45-5198E76FF324} + 10.0 + + + + Application + true + v142 + MultiByte + + + Application + false + v142 + true + MultiByte + + + Application + true + v142 + MultiByte + + + Application + false + v142 + true + MultiByte + + + + + + + + $(SolutionDir)..\examples\ + + + + + + + + + + + + \ No newline at end of file diff --git a/vs/simple_parser.vcxproj b/vs/simple_parser.vcxproj index a333a95..896c776 100644 --- a/vs/simple_parser.vcxproj +++ b/vs/simple_parser.vcxproj @@ -63,6 +63,7 @@ + diff --git a/vs/toml++.sln b/vs/toml++.sln index 8cc745c..83f19fe 100644 --- a/vs/toml++.sln +++ b/vs/toml++.sln @@ -47,6 +47,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple_parser", "simple_par EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toml_generator", "toml_generator.vcxproj", "{23CE3B73-FEE7-436C-9B4E-3DFB202EE9A2}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "error_printer", "error_printer.vcxproj", "{DAB4634D-8145-4860-AE45-5198E76FF324}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -215,6 +217,14 @@ Global {23CE3B73-FEE7-436C-9B4E-3DFB202EE9A2}.Release|Win32.Build.0 = Release|Win32 {23CE3B73-FEE7-436C-9B4E-3DFB202EE9A2}.Release|x64.ActiveCfg = Release|x64 {23CE3B73-FEE7-436C-9B4E-3DFB202EE9A2}.Release|x64.Build.0 = Release|x64 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Debug|Win32.ActiveCfg = Debug|Win32 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Debug|Win32.Build.0 = Debug|Win32 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Debug|x64.ActiveCfg = Debug|x64 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Debug|x64.Build.0 = Debug|x64 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Release|Win32.ActiveCfg = Release|Win32 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Release|Win32.Build.0 = Release|Win32 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Release|x64.ActiveCfg = Release|x64 + {DAB4634D-8145-4860-AE45-5198E76FF324}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -239,6 +249,7 @@ Global {0CAD095A-C9F2-49FC-9C9F-4508498BE488} = {4E25CF88-D7D8-4A9C-A52E-0D78281E82EC} {259FCEE5-3442-4076-9547-2BA793ECA1CB} = {412816A5-9D22-4A30-BCDF-ABFB54BB3735} {23CE3B73-FEE7-436C-9B4E-3DFB202EE9A2} = {412816A5-9D22-4A30-BCDF-ABFB54BB3735} + {DAB4634D-8145-4860-AE45-5198E76FF324} = {412816A5-9D22-4A30-BCDF-ABFB54BB3735} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {0926DDCC-88CD-4839-A82D-D9B99E02A0B1} diff --git a/vs/toml_generator.vcxproj b/vs/toml_generator.vcxproj index e412e8d..e2107bd 100644 --- a/vs/toml_generator.vcxproj +++ b/vs/toml_generator.vcxproj @@ -65,5 +65,8 @@ + + + \ No newline at end of file diff --git a/vs/toml_to_json_transcoder.vcxproj b/vs/toml_to_json_transcoder.vcxproj index 5f1f58b..9f57e2f 100644 --- a/vs/toml_to_json_transcoder.vcxproj +++ b/vs/toml_to_json_transcoder.vcxproj @@ -65,5 +65,8 @@ + + + \ No newline at end of file