From 761690d4a6353d1b4595f5dc3e6d166b1d06c7aa Mon Sep 17 00:00:00 2001 From: Mark Gillard Date: Thu, 25 Jun 2020 17:33:01 +0300 Subject: [PATCH] fixed BOM check causing EOF on very short iostream inputs also fixed a number of small parsing conformance issues --- .gitmodules | 6 + examples/error_printer.cpp | 2 + include/toml++/toml_parser.hpp | 54 +++- include/toml++/toml_utf8_streams.h | 19 +- include/toml++/toml_version.h | 2 +- meson.build | 2 +- python/ci_single_header_check.py | 51 +--- python/generate_conformance_tests.py | 126 ++++++++ python/generate_documentation.py | 98 +------ python/generate_single_header.py | 57 +--- python/generate_unicode_functions.py | 49 +--- python/utils.py | 105 +++++++ tests/conformance.cpp | 340 ++++++++++++++++++++++ tests/meson.build | 1 + tests/parsing_floats.cpp | 53 ++-- tests/parsing_tables.cpp | 2 + tests/tests.h | 18 +- tests/unicode_generated.cpp | 4 +- toml.hpp | 74 +++-- vs/test_char.vcxproj | 1 + vs/test_char8.vcxproj | 1 + vs/test_char8_noexcept.vcxproj | 1 + vs/test_char8_strict.vcxproj | 1 + vs/test_char8_strict_noexcept.vcxproj | 1 + vs/test_char_noexcept.vcxproj | 1 + vs/test_char_strict.vcxproj | 1 + vs/test_char_strict_noexcept.vcxproj | 1 + vs/test_x86_char.vcxproj | 1 + vs/test_x86_char8.vcxproj | 1 + vs/test_x86_char8_noexcept.vcxproj | 1 + vs/test_x86_char8_strict.vcxproj | 1 + vs/test_x86_char8_strict_noexcept.vcxproj | 1 + vs/test_x86_char_noexcept.vcxproj | 1 + vs/test_x86_char_strict.vcxproj | 1 + vs/test_x86_char_strict_noexcept.vcxproj | 1 + vs/toml++.vcxproj | 2 + vs/toml++.vcxproj.filters | 6 + 37 files changed, 793 insertions(+), 294 deletions(-) create mode 100644 python/generate_conformance_tests.py create mode 100644 python/utils.py create mode 100644 tests/conformance.cpp diff --git a/.gitmodules b/.gitmodules index 99fcad6..870ae96 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,9 @@ [submodule "extern/tloptional"] path = extern/tloptional url = https://github.com/TartanLlama/optional.git +[submodule "extern/toml-test"] + path = extern/toml-test + url = https://github.com/BurntSushi/toml-test.git +[submodule "extern/toml-spec-tests"] + path = extern/toml-spec-tests + url = https://github.com/iarna/toml-spec-tests.git diff --git a/examples/error_printer.cpp b/examples/error_printer.cpp index 9e95ed9..db20718 100644 --- a/examples/error_printer.cpp +++ b/examples/error_printer.cpp @@ -34,11 +34,13 @@ namespace "val = {?='b'}"sv, "########## tables"sv, + "[]"sv, "[foo"sv, "[foo] ?"sv, "[foo] [bar]"sv, "[foo]\n[foo]"sv, "? = 'foo' ?"sv, + "[ [foo] ]"sv "########## arrays"sv, "val = [,]"sv, diff --git a/include/toml++/toml_parser.hpp b/include/toml++/toml_parser.hpp index d6eef85..fd39a10 100644 --- a/include/toml++/toml_parser.hpp +++ b/include/toml++/toml_parser.hpp @@ -1022,6 +1022,7 @@ namespace toml::impl size_t length = {}; const utf8_codepoint* prev = {}; bool seen_decimal = false, seen_exponent = false; + char first_integer_part = '\0'; while (!is_eof() && !is_value_terminator(*cp)) { if (*cp == U'_') @@ -1037,8 +1038,14 @@ namespace toml::impl set_error_and_return_default("underscores must be followed by digits."sv); else if (*cp == U'.') { + // .1 + // -.1 + // +.1 (no integer part) + if (!first_integer_part) + set_error_and_return_default("expected decimal digit, saw '.'"sv); + // 1.0e+.10 (exponent cannot have '.') - if (seen_exponent) + else if (seen_exponent) set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv); // 1.0.e+.10 @@ -1051,8 +1058,11 @@ namespace toml::impl } else if (is_match(*cp, U'e', U'E')) { + if (prev && !is_decimal_digit(*prev)) + set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); + // 1.0ee+10 (multiple 'e') - if (seen_exponent) + else if (seen_exponent) set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); seen_decimal = true; // implied @@ -1068,11 +1078,20 @@ namespace toml::impl else if (!is_match(*prev, U'e', U'E')) set_error_and_return_default("expected exponent digit, saw '"sv, *cp, '\''); } - else if (!is_decimal_digit(*cp)) - set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); else if (length == sizeof(chars)) set_error_and_return_default("exceeds maximum length of "sv, sizeof(chars), " characters."sv); - + else if (is_decimal_digit(*cp)) + { + if (!seen_decimal) + { + if (!first_integer_part) + first_integer_part = static_cast(cp->bytes[0]); + else if (first_integer_part == '0') + set_error_and_return_default("leading zeroes are prohibited"sv); + } + } + else + set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); chars[length++] = static_cast(cp->bytes[0]); prev = cp; @@ -1087,10 +1106,10 @@ namespace toml::impl set_error_and_return_if_eof({}); set_error_and_return_default("underscores must be followed by digits."sv); } - else if (is_match(*prev, U'e', U'E', U'+', U'-')) + else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.')) { set_error_and_return_if_eof({}); - set_error_and_return_default("expected exponent digit, saw '"sv, *cp, '\''); + set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); } } @@ -2288,17 +2307,30 @@ namespace toml::impl // skip first '[' advance_and_return_if_error_or_eof({}); + // skip past any whitespace that followed the '[' + const bool had_leading_whitespace = consume_leading_whitespace(); + set_error_and_return_if_eof({}); + // skip second '[' (if present) if (*cp == U'[') { + if (had_leading_whitespace) + set_error_and_return_default( + "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv + ); + is_arr = true; advance_and_return_if_error_or_eof({}); + + // skip past any whitespace that followed the '[' + consume_leading_whitespace(); + set_error_and_return_if_eof({}); } - // skip past any whitespace that followed the '[' - consume_leading_whitespace(); - set_error_and_return_if_eof({}); - + // check for a premature closing ']' + if (*cp == U']') + set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv); + // get the actual key start_recording(); key = parse_key(); diff --git a/include/toml++/toml_utf8_streams.h b/include/toml++/toml_utf8_streams.h index 5830ab7..9afd06c 100644 --- a/include/toml++/toml_utf8_streams.h +++ b/include/toml++/toml_utf8_streams.h @@ -93,22 +93,17 @@ namespace toml::impl explicit utf8_byte_stream(std::basic_istream& stream) : source{ &stream } { - if (!*source) + if (!source->good()) // eof, fail, bad return; - using stream_traits = typename std::remove_pointer_t::traits_type; const auto initial_pos = source->tellg(); - size_t bom_pos{}; Char bom[3]; - for (; bom_pos < 3_sz && *source; bom_pos++) - { - const auto next = source->get(); - if (next == stream_traits::eof()) - break; - bom[bom_pos] = static_cast(next); - } - if (!*source || bom_pos < 3_sz || memcmp(utf8_byte_order_mark.data(), bom, 3_sz) != 0) - source->seekg(initial_pos); + source->read(bom, 3); + if (source->bad() || (source->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3_sz) == 0)) + return; + + source->clear(); + source->seekg(initial_pos, std::ios::beg); } [[nodiscard]] TOML_ALWAYS_INLINE diff --git a/include/toml++/toml_version.h b/include/toml++/toml_version.h index eef050b..0ae8132 100644 --- a/include/toml++/toml_version.h +++ b/include/toml++/toml_version.h @@ -7,7 +7,7 @@ #define TOML_LIB_MAJOR 1 #define TOML_LIB_MINOR 3 -#define TOML_LIB_PATCH 2 +#define TOML_LIB_PATCH 3 #define TOML_LANG_MAJOR 1 #define TOML_LANG_MINOR 0 diff --git a/meson.build b/meson.build index acdab76..39fd095 100644 --- a/meson.build +++ b/meson.build @@ -1,7 +1,7 @@ project( 'tomlplusplus', 'cpp', - version : '1.3.2', + version : '1.3.3', license : 'MIT', default_options : [ 'cpp_std=c++17', diff --git a/python/ci_single_header_check.py b/python/ci_single_header_check.py index e0120d7..5a43bde 100644 --- a/python/ci_single_header_check.py +++ b/python/ci_single_header_check.py @@ -5,46 +5,18 @@ # SPDX-License-Identifier: MIT import sys -import os import os.path as path -import traceback +import utils import hashlib -import subprocess -from shutil import which - - - -def is_tool(name): - return which(name) is not None - - - -def get_script_folder(): - return path.dirname(path.realpath(sys.argv[0])) - - - -def read_all_text_from_file(path): - print("Reading {}".format(path)) - with open(path, 'r', encoding='utf-8') as file: - text = file.read() - return text - - - -def run_python_script(script_path, *args): - subprocess.check_call( - ['py' if is_tool('py') else 'python3', script_path] + [arg for arg in args] - ) def main(): - hpp_path = path.join(get_script_folder(), '..', 'toml.hpp') - hash1 = hashlib.sha1(read_all_text_from_file(hpp_path).encode('utf-8')).hexdigest() + hpp_path = path.join(utils.get_script_folder(), '..', 'toml.hpp') + hash1 = hashlib.sha1(utils.read_all_text_from_file(hpp_path).encode('utf-8')).hexdigest() print("Hash 1: {}".format(hash1)) - run_python_script('generate_single_header.py') - hash2 = hashlib.sha1(read_all_text_from_file(hpp_path).encode('utf-8')).hexdigest() + utils.run_python_script('generate_single_header.py') + hash2 = hashlib.sha1(utils.read_all_text_from_file(hpp_path).encode('utf-8')).hexdigest() print("Hash 2: {}".format(hash2)) if (hash1 != hash2): print( @@ -58,15 +30,4 @@ def main(): if __name__ == '__main__': - try: - sys.exit(main()) - except Exception as err: - print( - 'Fatal error: [{}] {}'.format( - type(err).__name__, - str(err) - ), - file=sys.stderr - ) - traceback.print_exc(file=sys.stderr) - sys.exit(-1) + utils.run(main) diff --git a/python/generate_conformance_tests.py b/python/generate_conformance_tests.py new file mode 100644 index 0000000..8712b33 --- /dev/null +++ b/python/generate_conformance_tests.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# This file is a part of toml++ and is subject to the the terms of the MIT license. +# Copyright (c) 2019-2020 Mark Gillard +# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. +# SPDX-License-Identifier: MIT + +import sys +import os.path as path +import utils +import io +import re + + + +def open_test_file(name): + test_file_path = path.join(utils.get_script_folder(), '..', 'tests', name) + print("Writing to {}".format(test_file_path)) + return open(test_file_path, 'w', encoding='utf-8', newline='\n') + + + +def emit_preamble(test_file): + write = lambda txt: print(txt, file=test_file) + write('#include "tests.h"') + write('using namespace toml::impl;') + write('') + write('#if !TOML_UNRELEASED_FEATURES // todo: improve conformance script to remove this') + write('') + + + +def sanitize(s): + return re.sub(r'[ -]+', '_', s, 0, re.I | re.M) + + + +def emit_invalid_tests(test_file, name, source_folder, skip_list=None): + constants_buf = io.StringIO('', newline='\n') + constants = lambda txt: print(txt, file=constants_buf) + tests_buf = io.StringIO('', newline='\n') + tests = lambda txt: print(txt, file=tests_buf) + + constants('namespace // invalid test data for {}'.format(name)) + constants('{') + + tests('TEST_CASE("conformance - invalid inputs from {}")'.format(name)) + tests('{') + + #files = [path.splitext(path.split(f)[1])[0] for f in utils.get_all_files(source_folder,all="*.toml")] + files = [path.split(f) for f in utils.get_all_files(source_folder, all="*.toml")] + files = [(f[0], *path.splitext(f[1])) for f in files] + for dir,file,ext in files: + if skip_list and file in skip_list: + continue + identifier = sanitize(file) + constants('\t static constexpr auto {} = S(R"({})"sv);'.format( + identifier, + utils.read_all_text_from_file(path.join(dir, file + ext)).strip() + )) + + tests('\tparsing_should_fail(FILE_LINE_ARGS, {});'.format(identifier)) + + constants('}') + tests('}') + + write = lambda txt: print(txt, file=test_file) + write(constants_buf.getvalue()) + write(tests_buf.getvalue()) + +def emit_appendix(test_file): + write = lambda txt: print(txt, file=test_file) + write('') + write('#endif // !TOML_UNRELEASED_FEATURES') + + + +def main(): + extern_root = path.join(utils.get_script_folder(), '..', 'extern') + with open_test_file('conformance.cpp') as test_file: + emit_preamble(test_file) + emit_invalid_tests( + test_file, + 'BurntSushi/toml-test', + path.join(extern_root, 'toml-test', 'tests', 'invalid'), + ( + # false negatives after TOML 0.4.0 + 'array-mixed-types-arrays-and-ints', + 'array-mixed-types-ints-and-floats', + 'array-mixed-types-strings-and-ints' + ) + ) + emit_invalid_tests( + test_file, + 'iarna/toml-spec-tests', + path.join(extern_root, 'toml-spec-tests', 'errors'), + ( + # I handle these internally, they get broken by I/O + 'comment-control-1', + 'comment-control-2', + 'comment-control-3', + 'comment-control-4', + 'string-basic-control-1', + 'string-basic-control-2', + 'string-basic-control-3', + 'string-basic-control-4', + 'string-basic-multiline-control-1', + 'string-basic-multiline-control-2', + 'string-basic-multiline-control-3', + 'string-basic-multiline-control-4', + 'string-literal-control-1', + 'string-literal-control-2', + 'string-literal-control-3', + 'string-literal-control-4', + 'string-literal-multiline-control-1', + 'string-literal-multiline-control-2', + 'string-literal-multiline-control-3', + 'string-literal-multiline-control-4', + + ) + ) + emit_appendix(test_file) + + + +if __name__ == '__main__': + utils.run(main) diff --git a/python/generate_documentation.py b/python/generate_documentation.py index b414012..4c94b87 100644 --- a/python/generate_documentation.py +++ b/python/generate_documentation.py @@ -5,17 +5,15 @@ # SPDX-License-Identifier: MIT import sys -import re import os import os.path as path +import utils +import re import traceback -import datetime import subprocess import random -import concurrent.futures -import shutil +import concurrent.futures as futures import html -import fnmatch import bs4 as soup @@ -90,46 +88,6 @@ string_literals = [ '_toml' ] -def is_tool(name): - return shutil.which(name) is not None - - - -def is_collection(val): - if isinstance(val, (list, tuple, dict, set)): - return True - return False - - - -def read_all_text_from_file(path): - print("Reading {}".format(path)) - with open(path, 'r', encoding='utf-8') as file: - text = file.read() - return text - - - -def get_all_files(dir, all=None, any=None): - files = [f for f in [path.join(dir, f) for f in os.listdir(dir)] if path.isfile(f)] - if (files and all is not None): - if (not is_collection(all)): - all = (all,) - all = [f for f in all if f is not None] - for fil in all: - files = fnmatch.filter(files, fil) - - if (files and any is not None): - if (not is_collection(any)): - any = (any,) - any = [f for f in any if f is not None] - if any: - results = set() - for fil in any: - results.update(fnmatch.filter(files, fil)) - files = [f for f in results] - return files - class HTMLDocument(object): @@ -186,7 +144,7 @@ class HTMLDocument(object): def html_find_parent(tag, names, cutoff=None): - if not is_collection(names): + if not utils.is_collection(names): names = [ names ] parent = tag.parent while (parent is not None): @@ -224,7 +182,7 @@ def html_shallow_search(starting_tag, names, filter = None): if isinstance(starting_tag, soup.NavigableString): return [] - if not is_collection(names): + if not utils.is_collection(names): names = [ names ] if starting_tag.name in names: @@ -947,23 +905,9 @@ def postprocess_file(dir, file, fixes): -def delete_directory(dir_path): - if (path.exists(dir_path)): - print('Deleting {}'.format(dir_path)) - shutil.rmtree(dir_path) -def get_script_folder(): - return path.dirname(path.realpath(sys.argv[0])) - - - -def run_python_script(script_path, *args): - subprocess.check_call( - ['py' if is_tool('py') else 'python3', script_path] + [arg for arg in args] - ) - def preprocess_xml(xml_dir): @@ -975,7 +919,7 @@ def main(): global _threadError num_threads = os.cpu_count() * 2 - root_dir = path.join(get_script_folder(), '..') + root_dir = path.join(utils.get_script_folder(), '..') docs_dir = path.join(root_dir, 'docs') xml_dir = path.join(docs_dir, 'xml') html_dir = path.join(docs_dir, 'html') @@ -983,8 +927,8 @@ def main(): doxygen = path.join(mcss_dir, 'documentation', 'doxygen.py') # delete any previously generated html and xml - delete_directory(xml_dir) - delete_directory(html_dir) + utils.delete_directory(xml_dir) + utils.delete_directory(html_dir) # run doxygen subprocess.check_call( ['doxygen', 'Doxyfile'], shell=True, cwd=docs_dir ) @@ -993,10 +937,10 @@ def main(): preprocess_xml(xml_dir) # run doxygen.py (m.css) - run_python_script(doxygen, path.join(docs_dir, 'Doxyfile-mcss'), '--no-doxygen') + utils.run_python_script(doxygen, path.join(docs_dir, 'Doxyfile-mcss'), '--no-doxygen') # delete xml - delete_directory(xml_dir) + utils.delete_directory(xml_dir) # post-process html files fixes = [ @@ -1012,11 +956,11 @@ def main(): , ExtDocLinksFix() , EnableIfFix() ] - files = [path.split(f) for f in get_all_files(html_dir, any=('*.html', '*.htm'))] + files = [path.split(f) for f in utils.get_all_files(html_dir, any=('*.html', '*.htm'))] if files: - with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(files), num_threads)) as executor: + with futures.ThreadPoolExecutor(max_workers=min(len(files), num_threads)) as executor: jobs = { executor.submit(postprocess_file, dir, file, fixes) : file for dir, file in files } - for job in concurrent.futures.as_completed(jobs): + for job in futures.as_completed(jobs): if _threadError: executor.shutdown(False) break @@ -1024,20 +968,8 @@ def main(): file = jobs[job] print('Finished processing {}.'.format(file)) if _threadError: - sys.exit(1) + return 1 if __name__ == '__main__': - try: - main() - except Exception as err: - print( - 'Error: [{}] {}'.format( - type(err).__name__, - str(err) - ), - file=sys.stderr - ) - traceback.print_exc(file=sys.stderr) - sys.exit(1) - sys.exit() + utils.run(main) diff --git a/python/generate_single_header.py b/python/generate_single_header.py index 5c29acb..e0bac5c 100644 --- a/python/generate_single_header.py +++ b/python/generate_single_header.py @@ -5,45 +5,19 @@ # SPDX-License-Identifier: MIT import sys -import re -import os import os.path as path -import traceback - - - - -def get_script_folder(): - return path.dirname(path.realpath(sys.argv[0])) - - - -def read_all_text_from_file(path): - print("Reading {}".format(path)) - with open(path, 'r', encoding='utf-8') as file: - text = file.read() - return text - - - -def repeat_pattern(pattern, count): - if len(pattern) == 1: - return pattern * count - - text = '' - for i in range(0, count): - text = text + pattern[i % len(pattern)] - return text +import utils +import re def make_divider(text = None, text_col = 40, pattern = '-'): if (text is None): - return "//" + repeat_pattern(pattern, 118) + return "//" + utils.repeat_pattern(pattern, 118) else: - text = "//{} {} ".format(repeat_pattern(pattern, text_col - 2), text); + text = "//{} {} ".format(utils.repeat_pattern(pattern, text_col - 2), text); if (len(text) < 120): - return text + repeat_pattern(pattern, 120 - len(text)) + return text + utils.repeat_pattern(pattern, 120 - len(text)) else: return text @@ -62,7 +36,7 @@ class Preprocessor: return '' self.processed_includes.append(incl) - text = read_all_text_from_file(path.join(get_script_folder(), '..', 'include', 'toml++', incl)).strip() + '\n' + text = utils.read_all_text_from_file(path.join(utils.get_script_folder(), '..', 'include', 'toml++', incl)).strip() + '\n' text = re.sub('\r\n', '\n', text, 0, re.I | re.M) # convert windows newlines text = re.sub(r'//[#!]\s*[{][{].*?//[#!]\s*[}][}]*?\n', '', text, 0, re.I | re.S) # strip {{ }} blocks self.current_level += 1 @@ -160,10 +134,10 @@ TOML language specifications: Latest: https://github.com/toml-lang/toml/blob/master/README.md v1.0.0-rc.1: https://toml.io/en/v1.0.0-rc.1 v0.5.0: https://toml.io/en/v0.5.0''') - preamble.append(read_all_text_from_file(path.join(get_script_folder(), '..', 'LICENSE'))) + preamble.append(utils.read_all_text_from_file(path.join(utils.get_script_folder(), '..', 'LICENSE'))) # write the output file - output_file_path = path.join(get_script_folder(), '..', 'toml.hpp') + output_file_path = path.join(utils.get_script_folder(), '..', 'toml.hpp') print("Writing to {}".format(output_file_path)) with open(output_file_path,'w', encoding='utf-8', newline='\n') as output_file: if (len(preamble) > 0): @@ -192,17 +166,6 @@ v0.5.0: https://toml.io/en/v0.5.0''') + if __name__ == '__main__': - try: - main() - except Exception as err: - print( - 'Fatal error: [{}] {}'.format( - type(err).__name__, - str(err) - ), - file=sys.stderr - ) - traceback.print_exc(file=sys.stderr) - sys.exit(1) - sys.exit() + utils.run(main) diff --git a/python/generate_unicode_functions.py b/python/generate_unicode_functions.py index a13eb5f..f77e8ea 100644 --- a/python/generate_unicode_functions.py +++ b/python/generate_unicode_functions.py @@ -7,15 +7,15 @@ # godbolt session for experimenting with this script: https://godbolt.org/z/Vp-zzE import sys -import re -import os import os.path as path +import utils +import re import math import requests -import traceback import bisect + #### SETTINGS / MISC ################################################################################################## @@ -97,17 +97,6 @@ def is_pow2(v): -def in_collection(target, collection): - for v in collection: - if isinstance(v, (list, tuple, dict, set, range)): - if target in v: - return True - elif v == target: - return True - return False - - - def binary_search(elements, value): index = bisect.bisect_left(elements, value) if index < len(elements) and elements[index] == value: @@ -178,7 +167,6 @@ def ceil(val): def calc_child_size(span_size): - if span_size <= G.word_size: return span_size elif span_size <= G.word_size * G.word_size: @@ -218,6 +206,7 @@ def chunks(l, n): #### SPARSE RANGE ##################################################################################################### + class SparseRange: def __init__(self, *inital_values): @@ -463,6 +452,7 @@ class SparseRange: #### CODEPOINT CHUNK ################################################################################################## + class CodepointChunk: class __Data: @@ -1098,11 +1088,6 @@ def emit_character_function(name, header_file, test_file, codepoints, *character -def get_script_folder(): - return path.dirname(path.realpath(sys.argv[0])) - - - def append_codepoint(codepoints, codepoint, category): # if (0xD800 <= codepoint <= 0xF8FF # surrogates & private use area # or 0x40000 <= codepoint <= 0xDFFFF # planes 4-13 @@ -1138,7 +1123,7 @@ def write_to_files(codepoints, header_file, test_file): emit_character_function('is_hexadecimal_digit', header_file, test_file, codepoints, ('a', 'f'), ('A', 'F'), ('0', '9')) - both(' #if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys)') + both('#if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys)') both('') unicode_exclusions = SparseRange() unicode_exclusions.add(0, 127) # ascii block @@ -1146,7 +1131,7 @@ def write_to_files(codepoints, header_file, test_file): emit_category_function('is_unicode_letter', header_file, test_file, codepoints, ('Ll', 'Lm', 'Lo', 'Lt', 'Lu'), unicode_exclusions) emit_category_function('is_unicode_number', header_file, test_file, codepoints, ('Nd', 'Nl'), unicode_exclusions) emit_category_function('is_unicode_combining_mark', header_file, test_file, codepoints, ('Mn', 'Mc'), unicode_exclusions) - both(' #endif // TOML_LANG_UNRELEASED') + both('#endif // TOML_LANG_UNRELEASED') header('} // toml::impl') @@ -1155,7 +1140,7 @@ def main(): # get unicode character database codepoint_list = '' - codepoint_file_path = path.join(get_script_folder(), 'UnicodeData.txt') + codepoint_file_path = path.join(utils.get_script_folder(), 'UnicodeData.txt') if (not path.exists(codepoint_file_path)): print("Couldn't find unicode database file, will download") response = requests.get( @@ -1197,8 +1182,8 @@ def main(): codepoints.sort(key=lambda r:r[0]) # write the output files - header_file_path = path.join(get_script_folder(), '..', 'include', 'toml++', 'toml_utf8_generated.h') - test_file_path = path.join(get_script_folder(), '..', 'tests', 'unicode_generated.cpp') + header_file_path = path.join(utils.get_script_folder(), '..', 'include', 'toml++', 'toml_utf8_generated.h') + test_file_path = path.join(utils.get_script_folder(), '..', 'tests', 'unicode_generated.cpp') print("Writing to {}".format(header_file_path)) with open(header_file_path, 'w', encoding='utf-8', newline='\n') as header_file: if G.generate_tests: @@ -1211,16 +1196,4 @@ def main(): if __name__ == '__main__': - try: - main() - except Exception as err: - print( - 'Fatal error: [{}] {}'.format( - type(err).__name__, - str(err) - ), - file=sys.stderr - ) - traceback.print_exc(file=sys.stderr) - sys.exit(1) - sys.exit() + utils.run(main) diff --git a/python/utils.py b/python/utils.py new file mode 100644 index 0000000..5d28431 --- /dev/null +++ b/python/utils.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# This file is a part of toml++ and is subject to the the terms of the MIT license. +# Copyright (c) 2019-2020 Mark Gillard +# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. +# SPDX-License-Identifier: MIT + +import sys +import os +import os.path as path +import subprocess +import traceback +import shutil +import fnmatch + + + +def is_tool(name): + return shutil.which(name) is not None + + + +def get_script_folder(): + return path.dirname(path.realpath(sys.argv[0])) + + + +def read_all_text_from_file(path): + print("Reading {}".format(path)) + with open(path, 'r', encoding='utf-8') as file: + text = file.read() + return text + + + +def run_python_script(script_path, *args): + subprocess.check_call( + ['py' if is_tool('py') else 'python3', script_path] + [arg for arg in args] + ) + + + +def repeat_pattern(pattern, count): + if len(pattern) == 1: + return pattern * count + + text = '' + for i in range(0, count): + text = text + pattern[i % len(pattern)] + return text + + + +def delete_directory(dir_path): + if (path.exists(dir_path)): + print('Deleting {}'.format(dir_path)) + shutil.rmtree(dir_path) + + + +def is_collection(val): + if isinstance(val, (list, tuple, dict, set, range)): + return True + return False + + + +def get_all_files(dir, all=None, any=None): + files = [f for f in [path.join(dir, f) for f in os.listdir(dir)] if path.isfile(f)] + if (files and all is not None): + if (not is_collection(all)): + all = (all,) + all = [f for f in all if f is not None] + for fil in all: + files = fnmatch.filter(files, fil) + + if (files and any is not None): + if (not is_collection(any)): + any = (any,) + any = [f for f in any if f is not None] + if any: + results = set() + for fil in any: + results.update(fnmatch.filter(files, fil)) + files = [f for f in results] + return files + + + +def run(main_func): + try: + result = main_func() + if result is None: + sys.exit(0) + else: + sys.exit(int(result)) + except Exception as err: + print( + 'Fatal error: [{}] {}'.format( + type(err).__name__, + str(err) + ), + file=sys.stderr + ) + traceback.print_exc(file=sys.stderr) + sys.exit(-1) diff --git a/tests/conformance.cpp b/tests/conformance.cpp new file mode 100644 index 0000000..97ab1b1 --- /dev/null +++ b/tests/conformance.cpp @@ -0,0 +1,340 @@ +#include "tests.h" +using namespace toml::impl; + +#if !TOML_UNRELEASED_FEATURES // todo: improve conformance script to remove this + +namespace // invalid test data for BurntSushi/toml-test +{ + static constexpr auto datetime_malformed_no_leads = S(R"(no-leads = 1987-7-05T17:45:00Z)"sv); + static constexpr auto datetime_malformed_no_secs = S(R"(no-secs = 1987-07-05T17:45Z)"sv); + static constexpr auto datetime_malformed_no_t = S(R"(no-t = 1987-07-0517:45:00Z)"sv); + static constexpr auto datetime_malformed_with_milli = S(R"(with-milli = 1987-07-5T17:45:00.12Z)"sv); + static constexpr auto duplicate_key_table = S(R"([fruit] +type = "apple" + +[fruit.type] +apple = "yes")"sv); + static constexpr auto duplicate_keys = S(R"(dupe = false +dupe = true)"sv); + static constexpr auto duplicate_tables = S(R"([a] +[a])"sv); + static constexpr auto empty_implicit_table = S(R"([naughty..naughty])"sv); + static constexpr auto empty_table = S(R"([])"sv); + static constexpr auto float_leading_zero_neg = S(R"(leading-zero = -03.14)"sv); + static constexpr auto float_leading_zero_pos = S(R"(leading-zero = +03.14)"sv); + static constexpr auto float_leading_zero = S(R"(leading-zero = 03.14)"sv); + static constexpr auto float_no_leading_zero = S(R"(answer = .12345 +neganswer = -.12345)"sv); + static constexpr auto float_no_trailing_digits = S(R"(answer = 1. +neganswer = -1.)"sv); + static constexpr auto float_underscore_after_point = S(R"(bad = 1._2)"sv); + static constexpr auto float_underscore_after = S(R"(bad = 1.2_)"sv); + static constexpr auto float_underscore_before_point = S(R"(bad = 1_.2)"sv); + static constexpr auto float_underscore_before = S(R"(bad = _1.2)"sv); + static constexpr auto inline_table_linebreak = S(R"(simple = { a = 1 +})"sv); + static constexpr auto integer_leading_zero_neg = S(R"(leading-zero = -012)"sv); + static constexpr auto integer_leading_zero_pos = S(R"(leading-zero = +012)"sv); + static constexpr auto integer_leading_zero = S(R"(leading-zero = 012)"sv); + static constexpr auto integer_underscore_after = S(R"(bad = 123_)"sv); + static constexpr auto integer_underscore_before = S(R"(bad = _123)"sv); + static constexpr auto integer_underscore_double = S(R"(bad = 1__23)"sv); + static constexpr auto key_after_array = S(R"([[agencies]] owner = "S Cjelli")"sv); + static constexpr auto key_after_table = S(R"([error] this = "should not be here")"sv); + static constexpr auto key_empty = S(R"(= 1)"sv); + static constexpr auto key_hash = S(R"(a# = 1)"sv); + static constexpr auto key_newline = S(R"(a += 1)"sv); + static constexpr auto key_no_eol = S(R"(a = 1 b = 2)"sv); + static constexpr auto key_open_bracket = S(R"([abc = 1)"sv); + static constexpr auto key_single_open_bracket = S(R"([)"sv); + static constexpr auto key_space = S(R"(a b = 1)"sv); + static constexpr auto key_start_bracket = S(R"([a] +[xyz = 5 +[b])"sv); + static constexpr auto key_two_equals = S(R"(key= = 1)"sv); + static constexpr auto llbrace = S(R"([ [table]])"sv); + static constexpr auto multi_line_inline_table = S(R"(json_like = { + first = "Tom", + last = "Preston-Werner" +})"sv); + static constexpr auto multi_line_string_no_close = S(R"(invalid = """ + this will fail)"sv); + static constexpr auto rrbrace = S(R"([[table] ])"sv); + static constexpr auto string_bad_byte_escape = S(R"(naughty = "\xAg")"sv); + static constexpr auto string_bad_codepoint = S(R"(invalid-codepoint = "This string contains a non scalar unicode codepoint \uD801")"sv); + static constexpr auto string_bad_escape = S(R"(invalid-escape = "This string has a bad \a escape character.")"sv); + static constexpr auto string_bad_slash_escape = S(R"(invalid-escape = "This string has a bad \/ escape character.")"sv); + static constexpr auto string_bad_uni_esc = S(R"(str = "val\ue")"sv); + static constexpr auto string_byte_escapes = S(R"(answer = "\x33")"sv); + static constexpr auto string_no_close = S(R"(no-ending-quote = "One time, at band camp)"sv); + static constexpr auto table_array_implicit = S(R"(# This test is a bit tricky. It should fail because the first use of +# `[[albums.songs]]` without first declaring `albums` implies that `albums` +# must be a table. The alternative would be quite weird. Namely, it wouldn't +# comply with the TOML spec: "Each double-bracketed sub-table will belong to +# the most *recently* defined table element *above* it." +# +# This is in contrast to the *valid* test, table-array-implicit where +# `[[albums.songs]]` works by itself, so long as `[[albums]]` isn't declared +# later. (Although, `[albums]` could be.) +[[albums.songs]] +name = "Glory Days" + +[[albums]] +name = "Born in the USA")"sv); + static constexpr auto table_array_malformed_bracket = S(R"([[albums] +name = "Born to Run")"sv); + static constexpr auto table_array_malformed_empty = S(R"([[]] +name = "Born to Run")"sv); + static constexpr auto table_empty = S(R"([])"sv); + static constexpr auto table_nested_brackets_close = S(R"([a]b] +zyx = 42)"sv); + static constexpr auto table_nested_brackets_open = S(R"([a[b] +zyx = 42)"sv); + static constexpr auto table_whitespace = S(R"([invalid key])"sv); + static constexpr auto table_with_pound = S(R"([key#group] +answer = 42)"sv); + static constexpr auto text_after_array_entries = S(R"(array = [ + "Is there life after an array separator?", No + "Entry" +])"sv); + static constexpr auto text_after_integer = S(R"(answer = 42 the ultimate answer?)"sv); + static constexpr auto text_after_string = S(R"(string = "Is there life after strings?" No.)"sv); + static constexpr auto text_after_table = S(R"([error] this shouldn't be here)"sv); + static constexpr auto text_before_array_separator = S(R"(array = [ + "Is there life before an array separator?" No, + "Entry" +])"sv); + static constexpr auto text_in_array = S(R"(array = [ + "Entry 1", + I don't belong, + "Entry 2", +])"sv); +} + +TEST_CASE("conformance - invalid inputs from BurntSushi/toml-test") +{ + parsing_should_fail(FILE_LINE_ARGS, datetime_malformed_no_leads); + parsing_should_fail(FILE_LINE_ARGS, datetime_malformed_no_secs); + parsing_should_fail(FILE_LINE_ARGS, datetime_malformed_no_t); + parsing_should_fail(FILE_LINE_ARGS, datetime_malformed_with_milli); + parsing_should_fail(FILE_LINE_ARGS, duplicate_key_table); + parsing_should_fail(FILE_LINE_ARGS, duplicate_keys); + parsing_should_fail(FILE_LINE_ARGS, duplicate_tables); + parsing_should_fail(FILE_LINE_ARGS, empty_implicit_table); + parsing_should_fail(FILE_LINE_ARGS, empty_table); + parsing_should_fail(FILE_LINE_ARGS, float_leading_zero_neg); + parsing_should_fail(FILE_LINE_ARGS, float_leading_zero_pos); + parsing_should_fail(FILE_LINE_ARGS, float_leading_zero); + parsing_should_fail(FILE_LINE_ARGS, float_no_leading_zero); + parsing_should_fail(FILE_LINE_ARGS, float_no_trailing_digits); + parsing_should_fail(FILE_LINE_ARGS, float_underscore_after_point); + parsing_should_fail(FILE_LINE_ARGS, float_underscore_after); + parsing_should_fail(FILE_LINE_ARGS, float_underscore_before_point); + parsing_should_fail(FILE_LINE_ARGS, float_underscore_before); + parsing_should_fail(FILE_LINE_ARGS, inline_table_linebreak); + parsing_should_fail(FILE_LINE_ARGS, integer_leading_zero_neg); + parsing_should_fail(FILE_LINE_ARGS, integer_leading_zero_pos); + parsing_should_fail(FILE_LINE_ARGS, integer_leading_zero); + parsing_should_fail(FILE_LINE_ARGS, integer_underscore_after); + parsing_should_fail(FILE_LINE_ARGS, integer_underscore_before); + parsing_should_fail(FILE_LINE_ARGS, integer_underscore_double); + parsing_should_fail(FILE_LINE_ARGS, key_after_array); + parsing_should_fail(FILE_LINE_ARGS, key_after_table); + parsing_should_fail(FILE_LINE_ARGS, key_empty); + parsing_should_fail(FILE_LINE_ARGS, key_hash); + parsing_should_fail(FILE_LINE_ARGS, key_newline); + parsing_should_fail(FILE_LINE_ARGS, key_no_eol); + parsing_should_fail(FILE_LINE_ARGS, key_open_bracket); + parsing_should_fail(FILE_LINE_ARGS, key_single_open_bracket); + parsing_should_fail(FILE_LINE_ARGS, key_space); + parsing_should_fail(FILE_LINE_ARGS, key_start_bracket); + parsing_should_fail(FILE_LINE_ARGS, key_two_equals); + parsing_should_fail(FILE_LINE_ARGS, llbrace); + parsing_should_fail(FILE_LINE_ARGS, multi_line_inline_table); + parsing_should_fail(FILE_LINE_ARGS, multi_line_string_no_close); + parsing_should_fail(FILE_LINE_ARGS, rrbrace); + parsing_should_fail(FILE_LINE_ARGS, string_bad_byte_escape); + parsing_should_fail(FILE_LINE_ARGS, string_bad_codepoint); + parsing_should_fail(FILE_LINE_ARGS, string_bad_escape); + parsing_should_fail(FILE_LINE_ARGS, string_bad_slash_escape); + parsing_should_fail(FILE_LINE_ARGS, string_bad_uni_esc); + parsing_should_fail(FILE_LINE_ARGS, string_byte_escapes); + parsing_should_fail(FILE_LINE_ARGS, string_no_close); + parsing_should_fail(FILE_LINE_ARGS, table_array_implicit); + parsing_should_fail(FILE_LINE_ARGS, table_array_malformed_bracket); + parsing_should_fail(FILE_LINE_ARGS, table_array_malformed_empty); + parsing_should_fail(FILE_LINE_ARGS, table_empty); + parsing_should_fail(FILE_LINE_ARGS, table_nested_brackets_close); + parsing_should_fail(FILE_LINE_ARGS, table_nested_brackets_open); + parsing_should_fail(FILE_LINE_ARGS, table_whitespace); + parsing_should_fail(FILE_LINE_ARGS, table_with_pound); + parsing_should_fail(FILE_LINE_ARGS, text_after_array_entries); + parsing_should_fail(FILE_LINE_ARGS, text_after_integer); + parsing_should_fail(FILE_LINE_ARGS, text_after_string); + parsing_should_fail(FILE_LINE_ARGS, text_after_table); + parsing_should_fail(FILE_LINE_ARGS, text_before_array_separator); + parsing_should_fail(FILE_LINE_ARGS, text_in_array); +} + +namespace // invalid test data for iarna/toml-spec-tests +{ + static constexpr auto array_of_tables_1 = S(R"(# INVALID TOML DOC +fruit = [] + +[[fruit]] # Not allowed)"sv); + static constexpr auto array_of_tables_2 = S(R"(# INVALID TOML DOC +[[fruit]] + name = "apple" + + [[fruit.variety]] + name = "red delicious" + + # This table conflicts with the previous table + [fruit.variety] + name = "granny smith")"sv); + static constexpr auto bare_key_1 = S(R"(bare!key = 123)"sv); + static constexpr auto bare_key_2 = S(R"(barekey + = 123)"sv); + static constexpr auto bare_key_3 = S(R"(barekey =)"sv); + static constexpr auto inline_table_imutable_1 = S(R"([product] +type = { name = "Nail" } +type.edible = false # INVALID)"sv); + static constexpr auto inline_table_imutable_2 = S(R"([product] +type.name = "Nail" +type = { edible = false } # INVALID)"sv); + static constexpr auto inline_table_trailing_comma = S(R"(abc = { abc = 123, })"sv); + static constexpr auto int_0_padded = S(R"(int = 0123)"sv); + static constexpr auto int_signed_bin = S(R"(bin = +0b10)"sv); + static constexpr auto int_signed_hex = S(R"(hex = +0xab)"sv); + static constexpr auto int_signed_oct = S(R"(oct = +0o23)"sv); + static constexpr auto key_value_pair_1 = S(R"(key = # INVALID)"sv); + static constexpr auto key_value_pair_2 = S(R"(first = "Tom" last = "Preston-Werner" # INVALID)"sv); + static constexpr auto multiple_dot_key = S(R"(# THE FOLLOWING IS INVALID + +# This defines the value of fruit.apple to be an integer. +fruit.apple = 1 + +# But then this treats fruit.apple like it's a table. +# You can't turn an integer into a table. +fruit.apple.smooth = true)"sv); + static constexpr auto multiple_key = S(R"(# DO NOT DO THIS +name = "Tom" +name = "Pradyun")"sv); + static constexpr auto no_key_name = S(R"(= "no key name" # INVALID)"sv); + static constexpr auto string_basic_multiline_invalid_backslash = S(R"(a = """ + foo \ \n + bar""")"sv); + static constexpr auto string_basic_multiline_out_of_range_unicode_escape_1 = S(R"(a = """\UFFFFFFFF""")"sv); + static constexpr auto string_basic_multiline_out_of_range_unicode_escape_2 = S(R"(a = """\U00D80000""")"sv); + static constexpr auto string_basic_multiline_quotes = S(R"(str5 = """Here are three quotation marks: """.""")"sv); + static constexpr auto string_basic_multiline_unknown_escape = S(R"(a = """\@""")"sv); + static constexpr auto string_basic_out_of_range_unicode_escape_1 = S(R"(a = "\UFFFFFFFF")"sv); + static constexpr auto string_basic_out_of_range_unicode_escape_2 = S(R"(a = "\U00D80000")"sv); + static constexpr auto string_basic_unknown_escape = S(R"(a = "\@")"sv); + static constexpr auto string_literal_multiline_quotes = S(R"(apos15 = '''Here are fifteen apostrophes: '''''''''''''''''' # INVALID)"sv); + static constexpr auto table_1 = S(R"(# DO NOT DO THIS + +[fruit] +apple = "red" + +[fruit] +orange = "orange")"sv); + static constexpr auto table_2 = S(R"(# DO NOT DO THIS EITHER + +[fruit] +apple = "red" + +[fruit.apple] +texture = "smooth")"sv); + static constexpr auto table_3 = S(R"([fruit] +apple.color = "red" +apple.taste.sweet = true + +[fruit.apple] # INVALID)"sv); + static constexpr auto table_4 = S(R"([fruit] +apple.color = "red" +apple.taste.sweet = true + +[fruit.apple.taste] # INVALID)"sv); + static constexpr auto table_invalid_1 = S(R"([fruit.physical] # subtable, but to which parent element should it belong? + color = "red" + shape = "round" + +[[fruit]] # parser must throw an error upon discovering that "fruit" is + # an array rather than a table + name = "apple")"sv); + static constexpr auto table_invalid_2 = S(R"(# INVALID TOML DOC +fruit = [] + +[[fruit]] # Not allowed)"sv); + static constexpr auto table_invalid_3 = S(R"(# INVALID TOML DOC +[[fruit]] + name = "apple" + + [[fruit.variety]] + name = "red delicious" + + # INVALID: This table conflicts with the previous array of tables + [fruit.variety] + name = "granny smith" + + [fruit.physical] + color = "red" + shape = "round")"sv); + static constexpr auto table_invalid_4 = S(R"(# INVALID TOML DOC +[[fruit]] + name = "apple" + + [[fruit.variety]] + name = "red delicious" + + [fruit.physical] + color = "red" + shape = "round" + + # INVALID: This array of tables conflicts with the previous table + [[fruit.physical]] + color = "green")"sv); +} + +TEST_CASE("conformance - invalid inputs from iarna/toml-spec-tests") +{ + parsing_should_fail(FILE_LINE_ARGS, array_of_tables_1); + parsing_should_fail(FILE_LINE_ARGS, array_of_tables_2); + parsing_should_fail(FILE_LINE_ARGS, bare_key_1); + parsing_should_fail(FILE_LINE_ARGS, bare_key_2); + parsing_should_fail(FILE_LINE_ARGS, bare_key_3); + parsing_should_fail(FILE_LINE_ARGS, inline_table_imutable_1); + parsing_should_fail(FILE_LINE_ARGS, inline_table_imutable_2); + parsing_should_fail(FILE_LINE_ARGS, inline_table_trailing_comma); + parsing_should_fail(FILE_LINE_ARGS, int_0_padded); + parsing_should_fail(FILE_LINE_ARGS, int_signed_bin); + parsing_should_fail(FILE_LINE_ARGS, int_signed_hex); + parsing_should_fail(FILE_LINE_ARGS, int_signed_oct); + parsing_should_fail(FILE_LINE_ARGS, key_value_pair_1); + parsing_should_fail(FILE_LINE_ARGS, key_value_pair_2); + parsing_should_fail(FILE_LINE_ARGS, multiple_dot_key); + parsing_should_fail(FILE_LINE_ARGS, multiple_key); + parsing_should_fail(FILE_LINE_ARGS, no_key_name); + parsing_should_fail(FILE_LINE_ARGS, string_basic_multiline_invalid_backslash); + parsing_should_fail(FILE_LINE_ARGS, string_basic_multiline_out_of_range_unicode_escape_1); + parsing_should_fail(FILE_LINE_ARGS, string_basic_multiline_out_of_range_unicode_escape_2); + parsing_should_fail(FILE_LINE_ARGS, string_basic_multiline_quotes); + parsing_should_fail(FILE_LINE_ARGS, string_basic_multiline_unknown_escape); + parsing_should_fail(FILE_LINE_ARGS, string_basic_out_of_range_unicode_escape_1); + parsing_should_fail(FILE_LINE_ARGS, string_basic_out_of_range_unicode_escape_2); + parsing_should_fail(FILE_LINE_ARGS, string_basic_unknown_escape); + parsing_should_fail(FILE_LINE_ARGS, string_literal_multiline_quotes); + parsing_should_fail(FILE_LINE_ARGS, table_1); + parsing_should_fail(FILE_LINE_ARGS, table_2); + parsing_should_fail(FILE_LINE_ARGS, table_3); + parsing_should_fail(FILE_LINE_ARGS, table_4); + parsing_should_fail(FILE_LINE_ARGS, table_invalid_1); + parsing_should_fail(FILE_LINE_ARGS, table_invalid_2); + parsing_should_fail(FILE_LINE_ARGS, table_invalid_3); + parsing_should_fail(FILE_LINE_ARGS, table_invalid_4); +} + + +#endif // !TOML_UNRELEASED_FEATURES diff --git a/tests/meson.build b/tests/meson.build index a68059c..41a134d 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -1,4 +1,5 @@ test_sources = [ + 'conformance.cpp', 'impl_toml.cpp', 'impl_catch2.cpp', 'tests.cpp', diff --git a/tests/parsing_floats.cpp b/tests/parsing_floats.cpp index e9fc92e..a4a9063 100644 --- a/tests/parsing_floats.cpp +++ b/tests/parsing_floats.cpp @@ -51,6 +51,35 @@ TEST_CASE("parsing - floats") } ); + // "A float consists of an integer part followed by a fractional part and/or an exponent part" + // (i.e. omitting the leading digits before the '.' is not legal in TOML) + parsing_should_fail(FILE_LINE_ARGS, S("flt = .1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +.1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -.1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = .1e1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = .1e+1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = .1e-1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +.1e1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +.1e+1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +.1e-1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -.1e1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -.1e+1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -.1e-1"sv)); + + // likewise, so is omitting digits _after_ the '.' + parsing_should_fail(FILE_LINE_ARGS, S("flt = 1."sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +1."sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -1."sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = 1.e1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = 1.e+1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = 1.e-1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +1.e1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +1.e+1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = +1.e-1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -1.e1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -1.e+1"sv)); + parsing_should_fail(FILE_LINE_ARGS, S("flt = -1.e-1"sv)); + // value tests parse_expected_value( FILE_LINE_ARGS, "1e1"sv, 1e1); parse_expected_value( FILE_LINE_ARGS, "1e+1"sv, 1e+1); @@ -69,30 +98,6 @@ TEST_CASE("parsing - floats") parse_expected_value( FILE_LINE_ARGS, "-1.0"sv, -1.0); parse_expected_value( FILE_LINE_ARGS, "-1.0e1"sv, -1.0e1); parse_expected_value( FILE_LINE_ARGS, "-1.0e-1"sv, -1.0e-1); - parse_expected_value( FILE_LINE_ARGS, ".1"sv, .1); - parse_expected_value( FILE_LINE_ARGS, "+.1"sv, +.1); - parse_expected_value( FILE_LINE_ARGS, "-.1"sv, -.1); - parse_expected_value( FILE_LINE_ARGS, "1."sv, 1.); - parse_expected_value( FILE_LINE_ARGS, "+1."sv, +1.); - parse_expected_value( FILE_LINE_ARGS, "-1."sv, -1.); - parse_expected_value( FILE_LINE_ARGS, "1.e1"sv, 1.e1); - parse_expected_value( FILE_LINE_ARGS, "1.e+1"sv, 1.e+1); - parse_expected_value( FILE_LINE_ARGS, "1.e-1"sv, 1.e-1); - parse_expected_value( FILE_LINE_ARGS, "+1.e1"sv, +1.e1); - parse_expected_value( FILE_LINE_ARGS, "+1.e+1"sv, +1.e+1); - parse_expected_value( FILE_LINE_ARGS, "+1.e-1"sv, +1.e-1); - parse_expected_value( FILE_LINE_ARGS, "-1.e1"sv, -1.e1); - parse_expected_value( FILE_LINE_ARGS, "-1.e+1"sv, -1.e+1); - parse_expected_value( FILE_LINE_ARGS, "-1.e-1"sv, -1.e-1); - parse_expected_value( FILE_LINE_ARGS, ".1e1"sv, .1e1); - parse_expected_value( FILE_LINE_ARGS, ".1e+1"sv, .1e+1); - parse_expected_value( FILE_LINE_ARGS, ".1e-1"sv, .1e-1); - parse_expected_value( FILE_LINE_ARGS, "+.1e1"sv, +.1e1); - parse_expected_value( FILE_LINE_ARGS, "+.1e+1"sv, +.1e+1); - parse_expected_value( FILE_LINE_ARGS, "+.1e-1"sv, +.1e-1); - parse_expected_value( FILE_LINE_ARGS, "-.1e1"sv, -.1e1); - parse_expected_value( FILE_LINE_ARGS, "-.1e+1"sv, -.1e+1); - parse_expected_value( FILE_LINE_ARGS, "-.1e-1"sv, -.1e-1); parse_expected_value( FILE_LINE_ARGS, "0.1"sv, 0.1); parse_expected_value( FILE_LINE_ARGS, "0.001"sv, 0.001); parse_expected_value( FILE_LINE_ARGS, "0.100"sv, 0.100); diff --git a/tests/parsing_tables.cpp b/tests/parsing_tables.cpp index 4321299..9879944 100644 --- a/tests/parsing_tables.cpp +++ b/tests/parsing_tables.cpp @@ -162,6 +162,8 @@ apple.taste.sweet = true REQUIRE(tbl[S("fruit")][S("orange")].as()); } ); + + parsing_should_fail(FILE_LINE_ARGS, S(R"([])"sv)); } TEST_CASE("parsing - inline tables") diff --git a/tests/tests.h b/tests/tests.h index ccb603c..f3c4e8a 100644 --- a/tests/tests.h +++ b/tests/tests.h @@ -193,13 +193,17 @@ inline bool parsing_should_fail( return false; }; - return run_tests([=]() { (void)toml::parse(toml_str); }) - && run_tests([=]() - { - std::basic_stringstream, std::allocator> ss; - ss.write(toml_str.data(), static_cast(toml_str.length())); - (void)toml::parse(ss); - }); + auto result = run_tests([=]() + { + [[maybe_unused]] auto res = toml::parse(toml_str); + }); + result = result && run_tests([=]() + { + std::basic_stringstream, std::allocator> ss; + ss.write(toml_str.data(), static_cast(toml_str.length())); + [[maybe_unused]] auto res = toml::parse(ss); + }); + return result; #else diff --git a/tests/unicode_generated.cpp b/tests/unicode_generated.cpp index b895356..260a0e1 100644 --- a/tests/unicode_generated.cpp +++ b/tests/unicode_generated.cpp @@ -24,7 +24,7 @@ TEST_CASE("unicode - is_hexadecimal_digit") REQUIRE(not_in(fn, r)); } - #if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) +#if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) TEST_CASE("unicode - is_unicode_letter") { @@ -704,4 +704,4 @@ TEST_CASE("unicode - is_unicode_combining_mark") REQUIRE(!fn(v)); } - #endif // TOML_LANG_UNRELEASED +#endif // TOML_LANG_UNRELEASED diff --git a/toml.hpp b/toml.hpp index 59bd8e9..0fb5f32 100644 --- a/toml.hpp +++ b/toml.hpp @@ -1,6 +1,6 @@ //---------------------------------------------------------------------------------------------------------------------- // -// toml++ v1.3.2 +// toml++ v1.3.3 // https://github.com/marzer/tomlplusplus // SPDX-License-Identifier: MIT // @@ -380,7 +380,7 @@ #define TOML_LIB_MAJOR 1 #define TOML_LIB_MINOR 3 -#define TOML_LIB_PATCH 2 +#define TOML_LIB_PATCH 3 #define TOML_LANG_MAJOR 1 #define TOML_LANG_MINOR 0 @@ -5246,22 +5246,17 @@ namespace toml::impl explicit utf8_byte_stream(std::basic_istream& stream) : source{ &stream } { - if (!*source) + if (!source->good()) // eof, fail, bad return; - using stream_traits = typename std::remove_pointer_t::traits_type; const auto initial_pos = source->tellg(); - size_t bom_pos{}; Char bom[3]; - for (; bom_pos < 3_sz && *source; bom_pos++) - { - const auto next = source->get(); - if (next == stream_traits::eof()) - break; - bom[bom_pos] = static_cast(next); - } - if (!*source || bom_pos < 3_sz || memcmp(utf8_byte_order_mark.data(), bom, 3_sz) != 0) - source->seekg(initial_pos); + source->read(bom, 3); + if (source->bad() || (source->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3_sz) == 0)) + return; + + source->clear(); + source->seekg(initial_pos, std::ios::beg); } [[nodiscard]] TOML_ALWAYS_INLINE @@ -7724,6 +7719,7 @@ namespace toml::impl size_t length = {}; const utf8_codepoint* prev = {}; bool seen_decimal = false, seen_exponent = false; + char first_integer_part = '\0'; while (!is_eof() && !is_value_terminator(*cp)) { if (*cp == U'_') @@ -7739,8 +7735,14 @@ namespace toml::impl set_error_and_return_default("underscores must be followed by digits."sv); else if (*cp == U'.') { + // .1 + // -.1 + // +.1 (no integer part) + if (!first_integer_part) + set_error_and_return_default("expected decimal digit, saw '.'"sv); + // 1.0e+.10 (exponent cannot have '.') - if (seen_exponent) + else if (seen_exponent) set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv); // 1.0.e+.10 @@ -7753,8 +7755,11 @@ namespace toml::impl } else if (is_match(*cp, U'e', U'E')) { + if (prev && !is_decimal_digit(*prev)) + set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); + // 1.0ee+10 (multiple 'e') - if (seen_exponent) + else if (seen_exponent) set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); seen_decimal = true; // implied @@ -7770,10 +7775,20 @@ namespace toml::impl else if (!is_match(*prev, U'e', U'E')) set_error_and_return_default("expected exponent digit, saw '"sv, *cp, '\''); } - else if (!is_decimal_digit(*cp)) - set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); else if (length == sizeof(chars)) set_error_and_return_default("exceeds maximum length of "sv, sizeof(chars), " characters."sv); + else if (is_decimal_digit(*cp)) + { + if (!seen_decimal) + { + if (!first_integer_part) + first_integer_part = static_cast(cp->bytes[0]); + else if (first_integer_part == '0') + set_error_and_return_default("leading zeroes are prohibited"sv); + } + } + else + set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); chars[length++] = static_cast(cp->bytes[0]); prev = cp; @@ -7788,10 +7803,10 @@ namespace toml::impl set_error_and_return_if_eof({}); set_error_and_return_default("underscores must be followed by digits."sv); } - else if (is_match(*prev, U'e', U'E', U'+', U'-')) + else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.')) { set_error_and_return_if_eof({}); - set_error_and_return_default("expected exponent digit, saw '"sv, *cp, '\''); + set_error_and_return_default("expected decimal digit, saw '"sv, *cp, '\''); } } @@ -8988,16 +9003,29 @@ namespace toml::impl // skip first '[' advance_and_return_if_error_or_eof({}); + // skip past any whitespace that followed the '[' + const bool had_leading_whitespace = consume_leading_whitespace(); + set_error_and_return_if_eof({}); + // skip second '[' (if present) if (*cp == U'[') { + if (had_leading_whitespace) + set_error_and_return_default( + "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv + ); + is_arr = true; advance_and_return_if_error_or_eof({}); + + // skip past any whitespace that followed the '[' + consume_leading_whitespace(); + set_error_and_return_if_eof({}); } - // skip past any whitespace that followed the '[' - consume_leading_whitespace(); - set_error_and_return_if_eof({}); + // check for a premature closing ']' + if (*cp == U']') + set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv); // get the actual key start_recording(); diff --git a/vs/test_char.vcxproj b/vs/test_char.vcxproj index e323d40..cc43b14 100644 --- a/vs/test_char.vcxproj +++ b/vs/test_char.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char8.vcxproj b/vs/test_char8.vcxproj index 7a566c4..6b43a47 100644 --- a/vs/test_char8.vcxproj +++ b/vs/test_char8.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char8_noexcept.vcxproj b/vs/test_char8_noexcept.vcxproj index ad3496f..1d20551 100644 --- a/vs/test_char8_noexcept.vcxproj +++ b/vs/test_char8_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char8_strict.vcxproj b/vs/test_char8_strict.vcxproj index 56fa6c9..a03ef61 100644 --- a/vs/test_char8_strict.vcxproj +++ b/vs/test_char8_strict.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char8_strict_noexcept.vcxproj b/vs/test_char8_strict_noexcept.vcxproj index 972bb88..2f4737d 100644 --- a/vs/test_char8_strict_noexcept.vcxproj +++ b/vs/test_char8_strict_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char_noexcept.vcxproj b/vs/test_char_noexcept.vcxproj index f42a203..cbbd7be 100644 --- a/vs/test_char_noexcept.vcxproj +++ b/vs/test_char_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char_strict.vcxproj b/vs/test_char_strict.vcxproj index 746a9ee..fc0642a 100644 --- a/vs/test_char_strict.vcxproj +++ b/vs/test_char_strict.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_char_strict_noexcept.vcxproj b/vs/test_char_strict_noexcept.vcxproj index 50bfb89..f7d14ff 100644 --- a/vs/test_char_strict_noexcept.vcxproj +++ b/vs/test_char_strict_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char.vcxproj b/vs/test_x86_char.vcxproj index 6a6338c..7b75f12 100644 --- a/vs/test_x86_char.vcxproj +++ b/vs/test_x86_char.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char8.vcxproj b/vs/test_x86_char8.vcxproj index 8584ab0..70cf70d 100644 --- a/vs/test_x86_char8.vcxproj +++ b/vs/test_x86_char8.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char8_noexcept.vcxproj b/vs/test_x86_char8_noexcept.vcxproj index ff2a4e2..cb23cc7 100644 --- a/vs/test_x86_char8_noexcept.vcxproj +++ b/vs/test_x86_char8_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char8_strict.vcxproj b/vs/test_x86_char8_strict.vcxproj index 73d0861..8832d8b 100644 --- a/vs/test_x86_char8_strict.vcxproj +++ b/vs/test_x86_char8_strict.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char8_strict_noexcept.vcxproj b/vs/test_x86_char8_strict_noexcept.vcxproj index 0a1baf1..9076670 100644 --- a/vs/test_x86_char8_strict_noexcept.vcxproj +++ b/vs/test_x86_char8_strict_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char_noexcept.vcxproj b/vs/test_x86_char_noexcept.vcxproj index 3ecaf81..c6f9db0 100644 --- a/vs/test_x86_char_noexcept.vcxproj +++ b/vs/test_x86_char_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char_strict.vcxproj b/vs/test_x86_char_strict.vcxproj index 55030f6..c104f9b 100644 --- a/vs/test_x86_char_strict.vcxproj +++ b/vs/test_x86_char_strict.vcxproj @@ -61,6 +61,7 @@ ..\tests\ + NotUsing diff --git a/vs/test_x86_char_strict_noexcept.vcxproj b/vs/test_x86_char_strict_noexcept.vcxproj index c1ddd67..c70c370 100644 --- a/vs/test_x86_char_strict_noexcept.vcxproj +++ b/vs/test_x86_char_strict_noexcept.vcxproj @@ -63,6 +63,7 @@ ..\tests\ + NotUsing diff --git a/vs/toml++.vcxproj b/vs/toml++.vcxproj index e26b683..4fc4735 100644 --- a/vs/toml++.vcxproj +++ b/vs/toml++.vcxproj @@ -94,9 +94,11 @@ + + diff --git a/vs/toml++.vcxproj.filters b/vs/toml++.vcxproj.filters index b671843..c250b79 100644 --- a/vs/toml++.vcxproj.filters +++ b/vs/toml++.vcxproj.filters @@ -114,6 +114,12 @@ + + python + + + python +