tomlplusplus/tests/parsing_strings.cpp

// This file is a part of toml++ and is subject to the the terms of the MIT license.
// Copyright (c) 2019-2020 Mark Gillard <mark.gillard@outlook.com.au>
// See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
// SPDX-License-Identifier: MIT

#include "tests.h"

TEST_CASE("parsing - strings")
{
	parsing_should_succeed(
		FILE_LINE_ARGS,
		S(R"(
str = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."

str1 = """
Roses are red
Violets are blue"""

str2 = """

Roses are red
Violets are blue"""
)"sv),
		[](table&& tbl)
		{
			CHECK(tbl[S("str")] == S("I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."sv));
			CHECK(tbl[S("str1")] == S("Roses are red\nViolets are blue"sv));
			CHECK(tbl[S("str2")] == S("\nRoses are red\nViolets are blue"sv));
		}
	);

	parsing_should_succeed(
		FILE_LINE_ARGS,
		S(R"(
# The following strings are byte-for-byte equivalent:
str1 = "The quick brown fox jumps over the lazy dog."

str2 = """
The quick brown \


  fox jumps over \
    the lazy dog."""

str3 = """\
       The quick brown \
       fox jumps over \
       the lazy dog.\
       """

str4 = """Here are two quotation marks: "". Simple enough."""
# str5 = """Here are three quotation marks: """."""  # INVALID
str5 = """Here are three quotation marks: ""\"."""
str6 = """Here are fifteen quotation marks: ""\"""\"""\"""\"""\"."""

# "This," she said, "is just a pointless statement."
str7 = """"This," she said, "is just a pointless statement.""""
)"sv),
		[](table&& tbl)
		{
			static constexpr auto quick_brown_fox = S("The quick brown fox jumps over the lazy dog."sv);
			CHECK(tbl[S("str1")] == quick_brown_fox);
			CHECK(tbl[S("str2")] == quick_brown_fox);
			CHECK(tbl[S("str3")] == quick_brown_fox);
			CHECK(tbl[S("str4")] == S(R"(Here are two quotation marks: "". Simple enough.)"sv));
			CHECK(tbl[S("str5")] == S(R"(Here are three quotation marks: """.)"sv));
			CHECK(tbl[S("str6")] == S(R"(Here are fifteen quotation marks: """"""""""""""".)"sv));
			CHECK(tbl[S("str7")] == S(R"("This," she said, "is just a pointless statement.")"sv));
		}
	);

	parsing_should_fail(FILE_LINE_ARGS, S(R"(str5 = """Here are three quotation marks: """.""")"sv));

	parsing_should_succeed(
		FILE_LINE_ARGS,
		S(R"(
# What you see is what you get.
winpath  = 'C:\Users\nodejs\templates'
winpath2 = '\\ServerX\admin$\system32\'
quoted   = 'Tom "Dubs" Preston-Werner'
regex    = '<\i\c*\s*>'
regex2 = '''I [dw]on't need \d{2} apples'''
lines  = '''
The first newline is
trimmed in raw strings.
   All other whitespace
   is preserved.
'''
lines2  = '''

The first newline is
trimmed in raw strings.
   All other whitespace
   is preserved.
'''
)"sv),
		[](table&& tbl)
		{
			CHECK(tbl[S("winpath")] == S(R"(C:\Users\nodejs\templates)"sv));
			CHECK(tbl[S("winpath2")] == S(R"(\\ServerX\admin$\system32\)"sv));
			CHECK(tbl[S("quoted")] == S(R"(Tom "Dubs" Preston-Werner)"sv));
			CHECK(tbl[S("regex")] == S(R"(<\i\c*\s*>)"sv));
			CHECK(tbl[S("regex2")] == S(R"(I [dw]on't need \d{2} apples)"sv));
			CHECK(tbl[S("lines")] == S(R"(The first newline is
trimmed in raw strings.
   All other whitespace
   is preserved.
)"sv));
			CHECK(tbl[S("lines2")] == S(R"(
The first newline is
trimmed in raw strings.
   All other whitespace
   is preserved.
)"sv));
		}
	);

	parsing_should_succeed(
		FILE_LINE_ARGS,
		S(R"(
quot15 = '''Here are fifteen quotation marks: """""""""""""""'''

# apos15 = '''Here are fifteen apostrophes: ''''''''''''''''''  # INVALID
apos15 = "Here are fifteen apostrophes: '''''''''''''''"

# 'That's still pointless', she said.
str = ''''That's still pointless', she said.'''
)"sv),
		[](table&& tbl)
		{
			CHECK(tbl[S("quot15")] == S(R"(Here are fifteen quotation marks: """"""""""""""")"sv));
			CHECK(tbl[S("apos15")] == S(R"(Here are fifteen apostrophes: ''''''''''''''')"sv));
			CHECK(tbl[S("str")] == S(R"('That's still pointless', she said.)"sv));
		}
	);

	parsing_should_fail(FILE_LINE_ARGS, S(R"(apos15 = '''Here are fifteen apostrophes: ''''''''''''''''''  # INVALID)"sv));

	// value tests
	parse_expected_value(
		FILE_LINE_ARGS,
		R"("The quick brown fox jumps over the lazy dog")"sv,
		 S("The quick brown fox jumps over the lazy dog"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
		R"('The quick brown fox jumps over the lazy dog')"sv,
		 S("The quick brown fox jumps over the lazy dog"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
	  R"("""The quick brown fox jumps over the lazy dog""")"sv,
		 S("The quick brown fox jumps over the lazy dog"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
	  R"('''The quick brown fox jumps over the lazy dog''')"sv,
		 S("The quick brown fox jumps over the lazy dog"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
		R"("Ýôú'ℓℓ λáƭè ₥è áƒƭèř ƭλïƨ - #")"sv,
	   S(R"(Ýôú'ℓℓ λáƭè ₥è áƒƭèř ƭλïƨ - #)"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
		R"(" Âñδ ωλèñ \"'ƨ ářè ïñ ƭλè ƨƭřïñϱ, áℓôñϱ ωïƭλ # \"")"sv,
	   S(R"( Âñδ ωλèñ "'ƨ ářè ïñ ƭλè ƨƭřïñϱ, áℓôñϱ ωïƭλ # ")"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
		R"("Ýôú δôñ'ƭ ƭλïñƙ ƨô₥è úƨèř ωôñ'ƭ δô ƭλáƭ?")"sv,
	   S(R"(Ýôú δôñ'ƭ ƭλïñƙ ƨô₥è úƨèř ωôñ'ƭ δô ƭλáƭ?)"sv));
	parse_expected_value(
		FILE_LINE_ARGS,
		R"("\"\u03B1\u03B2\u03B3\"")"sv,
	     S("\"\u03B1\u03B2\u03B3\""sv));

	// toml/pull/709 (\xHH unicode scalars)
	#if TOML_LANG_UNRELEASED
		parse_expected_value(
			FILE_LINE_ARGS,
			R"("\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\x11\xFF\xEE")"sv,
			 S("\u0000\u0010\u0020\u0030\u0040\u0050\u0060\u0070\u0080\u0090\u0011\u00FF\u00EE"sv));
	#else
		parsing_should_fail(FILE_LINE_ARGS, R"(str = "\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\x11\xFF\xEE")"sv);
	#endif

	// check 8-digit \U scalars with insufficient digits
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U1234567")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U123456")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U12345")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U1234")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U123")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U12")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\U1")"sv);

	// check 4-digit \u scalars with insufficient digits
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\u123")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\u12")"sv);
	parsing_should_fail(FILE_LINE_ARGS,R"(str = "\u1")"sv);

	// check 2-digit \x scalars with insufficient digits
	parsing_should_fail(FILE_LINE_ARGS, R"(str = "\x1")"sv);

	// ML string examples from https://github.com/toml-lang/toml/issues/725
	parse_expected_value(FILE_LINE_ARGS,  R"( """ """          )"sv,	S(R"( )"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( """ """"         )"sv,	S(R"( ")"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( """ """""        )"sv,	S(R"( "")"sv));
	parsing_should_fail(FILE_LINE_ARGS, R"(v= """ """"""       )"sv);
	parse_expected_value(FILE_LINE_ARGS,  R"( ''' '''          )"sv,	S(R"( )"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( ''' ''''         )"sv,	S(R"( ')"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( ''' '''''        )"sv,	S(R"( '')"sv));
	parsing_should_fail(FILE_LINE_ARGS, R"(v= ''' ''''''       )"sv);
	parse_expected_value(FILE_LINE_ARGS,  R"( """"""           )"sv,	S(R"()"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( """" """         )"sv,	S(R"(" )"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( """"" """        )"sv,	S(R"("" )"sv));
	parsing_should_fail(FILE_LINE_ARGS, R"(v= """""" """       )"sv);
	parse_expected_value(FILE_LINE_ARGS,  R"( ''''''           )"sv,	S(R"()"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( '''' '''         )"sv,	S(R"(' )"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( ''''' '''        )"sv,	S(R"('' )"sv));
	parsing_should_fail(FILE_LINE_ARGS, R"(v= '''''' '''       )"sv);
	parse_expected_value(FILE_LINE_ARGS,  R"( """""\""""""     )"sv,	S(R"(""""")"sv));
	parse_expected_value(FILE_LINE_ARGS,  R"( """""\"""\"""""" )"sv,	S(R"("""""""")"sv));
}