From 5e683e9a73c2894104bf93410db7fbc72902cbdc Mon Sep 17 00:00:00 2001 From: Mark Gillard Date: Sat, 18 Apr 2020 16:14:07 +0300 Subject: [PATCH] fixed `is_unicode_XXXXXX` functions being wrong in some cases also: - added tests for unicode functions - changed `TOML_LIKELY` semantics to work with gcc-style intrinsics - greatly improved unicode-related codegen - parser refactoring --- include/toml++/toml.h | 13 +- include/toml++/toml_default_formatter.hpp | 6 +- include/toml++/toml_parser.hpp | 129 +- include/toml++/toml_preprocessor.h | 16 +- include/toml++/toml_print_to_stream.h | 8 +- include/toml++/toml_utf8.h | 18 +- include/toml++/toml_utf8_generated.h | 1666 +++++++++-------- include/toml++/toml_utf8_streams.h | 12 +- python/generate_unicode_functions.py | 1417 +++++++++----- tests/manipulating_arrays.cpp | 2 +- tests/manipulating_tables.cpp | 4 +- tests/manipulating_values.cpp | 2 +- tests/meson.build | 2 + tests/parsing_arrays.cpp | 4 +- tests/parsing_booleans.cpp | 2 +- tests/parsing_comments.cpp | 4 +- tests/parsing_dates_and_times.cpp | 2 +- tests/parsing_floats.cpp | 6 +- tests/parsing_integers.cpp | 8 +- tests/parsing_key_value_pairs.cpp | 14 +- tests/parsing_spec_example.cpp | 2 +- tests/parsing_strings.cpp | 8 +- tests/parsing_tables.cpp | 14 +- tests/tests.cpp | 12 +- tests/tests.h | 34 +- tests/unicode.cpp | 86 + tests/unicode.h | 84 + tests/unicode_generated.cpp | 2040 +++++++++++++++++++++ toml.hpp | 1773 +++++++++--------- vs/test_char.vcxproj | 5 +- vs/test_char8.vcxproj | 2 + vs/test_char8_noexcept.vcxproj | 2 + vs/test_char8_strict.vcxproj | 2 + vs/test_char8_strict_noexcept.vcxproj | 2 + vs/test_char_noexcept.vcxproj | 2 + vs/test_char_strict.vcxproj | 2 + vs/test_char_strict_noexcept.vcxproj | 2 + vs/test_x86_char.vcxproj | 2 + vs/test_x86_char8.vcxproj | 2 + vs/test_x86_char8_noexcept.vcxproj | 2 + vs/test_x86_char8_strict.vcxproj | 2 + vs/test_x86_char8_strict_noexcept.vcxproj | 2 + vs/test_x86_char_noexcept.vcxproj | 2 + vs/test_x86_char_strict.vcxproj | 2 + vs/test_x86_char_strict_noexcept.vcxproj | 2 + 45 files changed, 5126 insertions(+), 2297 deletions(-) create mode 100644 tests/unicode.cpp create mode 100644 tests/unicode.h create mode 100644 tests/unicode_generated.cpp diff --git a/include/toml++/toml.h b/include/toml++/toml.h index 906130b..47b71f3 100644 --- a/include/toml++/toml.h +++ b/include/toml++/toml.h @@ -195,7 +195,7 @@ /// return 0; /// } /// \ecpp -/// +/// /// Instances of toml::parse_error can be printed directly to streams: /// \cpp /// try @@ -217,7 +217,16 @@ /// /// If the default error formatting is not be suitable for your use-case you can access the error's /// toml::source_region and description directly from the error object (as in the examples above). -/// +/// +/// \m_class{m-note m-warning} +/// +/// \parblock +///

Don't forget <fstream>!

+/// Not everyone who uses the library is going to work directly from files, so not everybody is forced to pay +/// the compilation overhead of including ``. You need to explicitly include it if you're going to be calling +/// toml::parse_file(). +/// \endparblock +/// /// \see /// - toml::parse_file() /// - toml::parse_result diff --git a/include/toml++/toml_default_formatter.hpp b/include/toml++/toml_default_formatter.hpp index dbec2b1..c400286 100644 --- a/include/toml++/toml_default_formatter.hpp +++ b/include/toml++/toml_default_formatter.hpp @@ -49,11 +49,11 @@ namespace toml::impl s += TOML_STRING_PREFIX('"'); for (auto c : str) { - if (c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) TOML_UNLIKELY + if TOML_UNLIKELY(c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) s.append(low_character_escape_table[c]); - else if (c == TOML_STRING_PREFIX('\x7F')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('\x7F')) s.append(TOML_STRING_PREFIX("\\u007F"sv)); - else if (c == TOML_STRING_PREFIX('"')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('"')) s.append(TOML_STRING_PREFIX("\\\""sv)); else s += c; diff --git a/include/toml++/toml_parser.hpp b/include/toml++/toml_parser.hpp index 60faf9d..375216b 100644 --- a/include/toml++/toml_parser.hpp +++ b/include/toml++/toml_parser.hpp @@ -95,9 +95,9 @@ namespace TOML_INTERNAL_NAMESPACE else if constexpr (std::is_same_v) { string_view cp_view; - if (arg.value <= U'\x1F') TOML_UNLIKELY + if TOML_UNLIKELY(arg.value <= U'\x1F') cp_view = low_character_escape_table[arg.value]; - else if (arg.value == U'\x7F') TOML_UNLIKELY + else if TOML_UNLIKELY(arg.value == U'\x7F') cp_view = TOML_STRING_PREFIX("\\u007F"sv); else cp_view = arg.template as_view(); @@ -635,7 +635,7 @@ namespace toml::impl if (!skipped_escaped_codepoint) advance_and_return_if_error_or_eof({}); } - else TOML_LIKELY + else { // handle closing delimiters if (*cp == U'"') @@ -1661,11 +1661,24 @@ namespace toml::impl assert_or_assume(!is_value_terminator(*cp)); push_parse_scope("value"sv); + // check if it begins with some control character + // (note that this will also fail for whitespace but we're assuming we've + // called consume_leading_whitespace() before calling parse_value()) + if TOML_UNLIKELY(is_control_character(*cp)) + set_error_and_return_default("unexpected control character"sv); + + // underscores at the beginning + else if (*cp == U'_') + set_error_and_return_default("values may not begin with underscores"sv); + const auto begin_pos = cp->position; std::unique_ptr val; do { + assert_or_assume(!is_control_character(*cp)); + assert_or_assume(*cp != U'_'); + // detect the value type and parse accordingly, // starting with value types that can be detected // unambiguously from just one character. @@ -1704,10 +1717,6 @@ namespace toml::impl else if (is_match(*cp, U'i', U'n', U'I', U'N')) val = std::make_unique>(parse_inf_or_nan()); - // underscores at the beginning - else if (*cp == U'_') - set_error_and_return_default("values may not begin with underscores"sv); - return_if_error({}); if (val) break; @@ -1760,68 +1769,76 @@ namespace toml::impl bool eof_while_scanning = false; const auto scan = [&]() TOML_MAY_THROW { - while (advance_count < utf8_buffered_reader::max_history_length) + if (is_eof()) + return; + assert_or_assume(!is_value_terminator(*cp)); + + do { - if (!cp || is_value_terminator(*cp)) + if (const auto c = **cp; c != U'_') { - eof_while_scanning = !cp; - break; - } + chars[char_count++] = c; - if (*cp != U'_') - { - chars[char_count++] = *cp; - switch (*cp) + if (is_decimal_digit(c)) + add_trait(has_digits); + else if (is_ascii_letter(c)) { - case U'B': [[fallthrough]]; - case U'b': - if (char_count == 2_sz && has_any(begins_zero)) - add_trait(has_b); - break; + assert_or_assume((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z')); + switch (static_cast(c | 32u)) + { + case U'b': + if (char_count == 2_sz && has_any(begins_zero)) + add_trait(has_b); + break; - case U'E': [[fallthrough]]; - case U'e': - if (char_count > 1_sz - && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) - && (has_none(has_plus | has_minus) || has_any(begins_sign))) - add_trait(has_e); - break; + case U'e': + if (char_count > 1_sz + && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) + && (has_none(has_plus | has_minus) || has_any(begins_sign))) + add_trait(has_e); + break; - case U'O': [[fallthrough]]; - case U'o': - if (char_count == 2_sz && has_any(begins_zero)) - add_trait(has_o); - break; + case U'o': + if (char_count == 2_sz && has_any(begins_zero)) + add_trait(has_o); + break; - case U'P': [[fallthrough]]; - case U'p': - if (has_any(has_x)) - add_trait(has_p); - break; + case U'p': + if (has_any(has_x)) + add_trait(has_p); + break; - case U'X': [[fallthrough]]; - case U'x': - if ((char_count == 2_sz && has_any(begins_zero)) - || (char_count == 3_sz && has_any(begins_sign) && chars[1] == U'0')) - add_trait(has_x); - break; + case U'x': + if ((char_count == 2_sz && has_any(begins_zero)) + || (char_count == 3_sz && has_any(begins_sign) && chars[1] == U'0')) + add_trait(has_x); + break; - case U'T': add_trait(has_t); break; - case U'Z': add_trait(has_z); break; - case U'+': add_trait(has_plus); break; - case U'-': add_trait(has_minus); break; - case U'.': add_trait(has_dot); break; - case U':': add_trait(has_colon); break; - - default: - if (is_decimal_digit(*cp)) - add_trait(has_digits); + case U't': add_trait(has_t); break; + case U'z': add_trait(has_z); break; + } + } + else if (c <= U':') + { + assert_or_assume(c < U'0' || c > U'9'); + switch (c) + { + case U'+': add_trait(has_plus); break; + case U'-': add_trait(has_minus); break; + case U'.': add_trait(has_dot); break; + case U':': add_trait(has_colon); break; + } } } advance_and_return_if_error(); advance_count++; + eof_while_scanning = is_eof(); } + while (advance_count < utf8_buffered_reader::max_history_length + && !is_eof() + && !is_value_terminator(*cp) + ); }; scan(); return_if_error({}); @@ -1831,7 +1848,7 @@ namespace toml::impl && traits == (bdigit_msk | has_minus) && chars[4] == U'-' && chars[7] == U'-' - && cp + && !is_eof() && *cp == U' ') { const auto pre_advance_count = advance_count; @@ -1850,7 +1867,7 @@ namespace toml::impl advance_and_return_if_error({}); advance_count++; - if (!cp || !is_decimal_digit(*cp)) + if (is_eof() || !is_decimal_digit(*cp)) backpedal(); else { diff --git a/include/toml++/toml_preprocessor.h b/include/toml++/toml_preprocessor.h index ead1d49..14317d9 100644 --- a/include/toml++/toml_preprocessor.h +++ b/include/toml++/toml_preprocessor.h @@ -96,6 +96,8 @@ #else #define TOML_COMPILER_EXCEPTIONS 0 #endif + #define TOML_LIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 1) ) + #define TOML_UNLIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 0) ) //floating-point from_chars and to_chars are not implemented in any version of clang as of 1/1/2020 #ifndef TOML_FLOAT_CHARCONV @@ -158,10 +160,8 @@ #else #define TOML_COMPILER_EXCEPTIONS 0 #endif - - // these pass the __has_attribute() test but cause warnings on if/else branches =/ - #define TOML_LIKELY - #define TOML_UNLIKELY + #define TOML_LIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 1) ) + #define TOML_UNLIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 0) ) // floating-point from_chars and to_chars are not implemented in any version of gcc as of 1/1/2020 #ifndef TOML_FLOAT_CHARCONV @@ -284,10 +284,10 @@ #if !TOML_DOXYGEN && !defined(__INTELLISENSE__) #if !defined(TOML_LIKELY) && __has_cpp_attribute(likely) - #define TOML_LIKELY [[likely]] + #define TOML_LIKELY(...) (__VA_ARGS__) [[likely]] #endif #if !defined(TOML_UNLIKELY) && __has_cpp_attribute(unlikely) - #define TOML_UNLIKELY [[unlikely]] + #define TOML_UNLIKELY(...) (__VA_ARGS__) [[unlikely]] #endif #if __has_cpp_attribute(nodiscard) >= 201907L #define TOML_NODISCARD_CTOR [[nodiscard]] @@ -295,10 +295,10 @@ #endif #ifndef TOML_LIKELY - #define TOML_LIKELY + #define TOML_LIKELY(...) (__VA_ARGS__) #endif #ifndef TOML_UNLIKELY - #define TOML_UNLIKELY + #define TOML_UNLIKELY(...) (__VA_ARGS__) #endif #ifndef TOML_NODISCARD_CTOR #define TOML_NODISCARD_CTOR diff --git a/include/toml++/toml_print_to_stream.h b/include/toml++/toml_print_to_stream.h index b16e22d..78d5a83 100644 --- a/include/toml++/toml_print_to_stream.h +++ b/include/toml++/toml_print_to_stream.h @@ -343,13 +343,13 @@ namespace toml::impl static_assert(sizeof(Char) == 1); for (auto c : str) { - if (c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) TOML_UNLIKELY + if TOML_UNLIKELY(c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) print_to_stream(low_character_escape_table[c], stream); - else if (c == TOML_STRING_PREFIX('\x7F')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('\x7F')) print_to_stream(TOML_STRING_PREFIX("\\u007F"sv), stream); - else if (c == TOML_STRING_PREFIX('"')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('"')) print_to_stream(TOML_STRING_PREFIX("\\\""sv), stream); - else if (c == TOML_STRING_PREFIX('\\')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('\\')) print_to_stream(TOML_STRING_PREFIX("\\\\"sv), stream); else print_to_stream(c, stream); diff --git a/include/toml++/toml_utf8.h b/include/toml++/toml_utf8.h index 8e9fe27..ecdd265 100644 --- a/include/toml++/toml_utf8.h +++ b/include/toml++/toml_utf8.h @@ -121,16 +121,6 @@ namespace toml::impl return (codepoint >= U'0' && codepoint <= U'9'); } - [[nodiscard]] - TOML_GNU_ATTR(const) - constexpr bool is_hexadecimal_digit(char32_t codepoint) noexcept - { - return (codepoint >= U'a' && codepoint <= U'f') - || (codepoint >= U'A' && codepoint <= U'F') - || is_decimal_digit(codepoint) - ; - } - [[nodiscard]] TOML_GNU_ATTR(const) TOML_ALWAYS_INLINE @@ -185,6 +175,14 @@ namespace toml::impl ; } + [[nodiscard]] + TOML_GNU_ATTR(const) + TOML_ALWAYS_INLINE + constexpr bool is_control_character(char32_t codepoint) noexcept + { + return codepoint <= U'\u001F' || codepoint == U'\u007F'; + } + [[nodiscard]] TOML_GNU_ATTR(const) TOML_ALWAYS_INLINE diff --git a/include/toml++/toml_utf8_generated.h b/include/toml++/toml_utf8_generated.h index 3c01ccb..007a7fb 100644 --- a/include/toml++/toml_utf8_generated.h +++ b/include/toml++/toml_utf8_generated.h @@ -1,922 +1,916 @@ //# This file is a part of toml++ and is subject to the the terms of the MIT license. //# Copyright (c) 2019-2020 Mark Gillard //# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. +// SPDX-License-Identifier: MIT //#----- //# this file was generated by generate_unicode_functions.py - do not modify it directly -// SPDX-License-Identifier: MIT #pragma once #include "toml_preprocessor.h" -#if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) - -#define TOML_ASSUME_CODEPOINT_BETWEEN(first, last) \ - TOML_ASSUME(codepoint >= first); \ - TOML_ASSUME(codepoint <= last) - namespace toml::impl { - //# Returns true if a codepoint belongs to any of these categories: Ll, Lm, Lo, Lt, Lu + //# Returns true if a codepoint matches any of: + //# 0 - 9, A - F, a - f [[nodiscard]] TOML_GNU_ATTR(const) - constexpr bool is_unicode_letter(char32_t codepoint) noexcept + constexpr bool is_hexadecimal_digit(char32_t cp) noexcept { - if (codepoint < U'\u00AA' || codepoint > U'\U00031349') + return cp >= U'0' && cp <= U'f' && (1ull << (static_cast(cp) - 0x30ull)) & 0x7E0000007E03FFull; + } + + #if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) + + //# Returns true if a codepoint belongs to any of these categories: + //# Ll, Lm, Lo, Lt, Lu + [[nodiscard]] + TOML_GNU_ATTR(const) + constexpr bool is_unicode_letter(char32_t cp) noexcept + { + if (cp < U'\u00AA' || cp > U'\U00031349') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u00AA', U'\U00031349'); - switch ((static_cast(codepoint) - 0xAAu) / 3147u) + + const auto child_index_0 = (static_cast(cp) - 0xAAull) / 0xC4Bull; + if ((1ull << child_index_0) & 0x8A7FFC004001CFA0ull) + return true; + if ((1ull << child_index_0) & 0x26180C0000ull) + return false; + switch (child_index_0) { - case 0: + case 0x00: // [0] 00AA - 0CF4 { - if (codepoint > U'\u0CF2') + if (cp > U'\u0CF2') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u00AA', U'\u0CF2'); - switch ((static_cast(codepoint) - 0xAAu) / 63u) + TOML_ASSUME(cp >= U'\u00AA'); + + constexpr uint_least64_t lookup_table_1[] = { - case 0: return (1ull << (static_cast(codepoint) - 0xAAull)) & 0x7FFFDFFFFFC10801ull; - case 1: return codepoint != U'\u00F7'; - case 8: return (1ull << (static_cast(codepoint) - 0x2A2ull)) & 0x4000FFF0FFFFFFFFull; - case 9: return (1u << (static_cast(codepoint) - 0x2E1u)) & 0x280Fu; - case 10: return false; - case 11: return (1ull << (static_cast(codepoint) - 0x370ull)) & 0x3FFFD740BCDFull; - case 12: return codepoint != U'\u03A2'; - case 13: return codepoint != U'\u03F6'; - case 15: return codepoint <= U'\u0481' || codepoint >= U'\u048A'; - case 18: return codepoint != U'\u0530'; - case 19: return codepoint <= U'\u0559' || codepoint >= U'\u0560'; - case 21: return codepoint <= U'\u05EA' || codepoint >= U'\u05EF'; - case 23: return codepoint != U'\u0653'; - case 25: return (1ull << (static_cast(codepoint) - 0x6D1ull)) & 0x4E0060300017ull; - case 26: return (1ull << (static_cast(codepoint) - 0x710ull)) & 0x60000000FFFFFFFDull; - case 28: return (1ull << (static_cast(codepoint) - 0x78Eull)) & 0x7000000800FFFFFFull; - case 29: return (1ull << (static_cast(codepoint) - 0x7CDull)) & 0x7FF821803FFFFFFFull; - case 30: return (1ull << (static_cast(codepoint) - 0x80Cull)) & 0x7FF00000110043FFull; - case 31: return codepoint <= U'\u0858' || codepoint >= U'\u0860'; - case 32: return codepoint != U'\u088A'; - case 34: return codepoint <= U'\u0939' || codepoint >= U'\u093D'; - case 35: return (1ull << (static_cast(codepoint) - 0x950ull)) & 0x21FFFE0003FF01ull; - case 36: return (1ull << (static_cast(codepoint) - 0x986ull)) & 0x8F17F7FFFFE67Full; - case 37: return (1ull << (static_cast(codepoint) - 0x9CEull)) & 0x400C000EC001ull; - case 38: return (1ull << (static_cast(codepoint) - 0xA05ull)) & 0x1B6FEFFFFFCC3Full; - case 39: return (1u << (static_cast(codepoint) - 0xA59u)) & 0xE00002Fu; - case 40: return (1ull << (static_cast(codepoint) - 0xA85ull)) & 0x11F6FEFFFFFDDFFull; - case 41: return (1ull << (static_cast(codepoint) - 0xAD0ull)) & 0x20000030001ull; - case 42: return (1ull << (static_cast(codepoint) - 0xB05ull)) & 0x11F6FEFFFFFCCFFull; - case 43: return (1u << (static_cast(codepoint) - 0xB5Cu)) & 0x20003Bu; - case 44: return (1ull << (static_cast(codepoint) - 0xB83ull)) & 0x7FF8E31AC7B8FDull; - case 46: return (1ull << (static_cast(codepoint) - 0xC05ull)) & 0x1FFFEFFFFFEEFFull; - case 47: return (1ull << (static_cast(codepoint) - 0xC3Dull)) & 0x1838000001ull; - case 48: return (1ull << (static_cast(codepoint) - 0xC80ull)) & 0x1EFFDFFFFFDDFE1ull; - case 49: return (1ull << (static_cast(codepoint) - 0xCB9ull)) & 0x30001A000000011ull; + 0xFFFFDFFFFFC10801ull, 0xFFFFFFFFFFFFDFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x07C000FFF0FFFFFFull, 0x0000000000000014ull, 0x0000000000000000ull, 0xFEFFFFF5D02F37C0ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFEFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00FFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFC09FFFFFFFFFBFull, 0x000000007FFFFFFFull, + 0xFFFFFFC000000000ull, 0xFFC00000000001E1ull, 0x00000001FFFFFFFFull, 0xFFFFFFFFFFFFFFB0ull, + 0x18000BFFFFFFFFFFull, 0xFFFFFF4000270030ull, 0xFFFFFFF80000003Full, 0x0FFFFFFFFFFFFFFFull, + 0xFFFFFFFF00000080ull, 0x44010FFFFFC10C01ull, 0xFFC07FFFFFC00000ull, 0xFFC0000000000001ull, + 0x000000003FFFF7FFull, 0xFFFFFFFFFC000000ull, 0x00FFC0400008FFFFull, 0x7FFFFE67F87FFF80ull, + 0x00EC00100008F17Full, 0x7FFFFE61F80400C0ull, 0x001780000000DB7Full, 0x7FFFFEEFF8000700ull, + 0x00C000400008FB7Full, 0x7FFFFE67F8008000ull, 0x00EC00000008FB7Full, 0xC6358F71FA000080ull, + 0x000000400000FFF1ull, 0x7FFFFF77F8000000ull, 0x00C1C0000008FFFFull, 0x7FFFFF77F8400000ull, + 0x00D000000008FBFFull, 0x0000000000000180ull, + }; + return lookup_table_1[(static_cast(cp) - 0xAAull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xAAull) % 0x40ull)); + } + case 0x01: // [1] 0CF5 - 193F + { + if (cp < U'\u0D04' || cp > U'\u191E') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x027FFFFFFFFFDDFFull, 0x0FC0000038070400ull, 0xF2FFBFFFFFC7FFFEull, 0xE000000000000007ull, + 0xF000DFFFFFFFFFFFull, 0x6000000000000007ull, 0xF200DFFAFFFFFF7Dull, 0x100000000F000005ull, + 0xF000000000000000ull, 0x000001FFFFFFFFEFull, 0x00000000000001F0ull, 0xF000000000000000ull, + 0x0800007FFFFFFFFFull, 0x3FFE1C0623C3F000ull, 0xFFFFFFFFF0000400ull, 0xFF7FFFFFFFFFF20Bull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFF3D7F3DFull, 0xD7F3DFFFFFFFF3DFull, 0xFFFFFFFFFFF7FFF3ull, + 0xFFFFFFFFFFF3DFFFull, 0xF0000000007FFFFFull, 0xFFFFFFFFF0000FFFull, 0xE3F3FFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xEFFFF9FFFFFFFFFFull, 0xFFFFFFFFF07FFFFFull, 0xF01FE07FFFFFFFFFull, + 0xF0003FFFF0003DFFull, 0xF0001DFFF0003FFFull, 0x0000FFFFFFFFFFFFull, 0x0000000001080000ull, + 0xFFFFFFFFF0000000ull, 0xF01FFFFFFFFFFFFFull, 0xFFFFF05FFFFFFFF9ull, 0xF003FFFFFFFFFFFFull, + 0x0000000007FFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0xD04ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xD04ull) % 0x40ull)); + } + case 0x02: // [2] 1940 - 258A + { + if (cp < U'\u1950' || cp > U'\u2184') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFF001F3FFFFFFFull, 0x03FFFFFF0FFFFFFFull, 0xFFFF000000000000ull, 0xFFFFFFFFFFFF007Full, + 0x000000000000001Full, 0x0000000000800000ull, 0xFFE0000000000000ull, 0x0FE0000FFFFFFFFFull, + 0xFFF8000000000000ull, 0xFFFFFC00C001FFFFull, 0xFFFF0000003FFFFFull, 0xE0000000000FFFFFull, + 0x01FF3FFFFFFFFC00ull, 0x0000E7FFFFFFFFFFull, 0xFFFF046FDE000000ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x0000FFFFFFFFFFFFull, 0xFFFF000000000000ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x3F3FFFFFFFFF3F3Full, + 0xFFFF3FFFFFFFAAFFull, 0x1FDC5FDFFFFFFFFFull, 0x00001FDC1FFF0FCFull, 0x0000000000000000ull, + 0x0000800200000000ull, 0x0000000000001FFFull, 0xFC84000000000000ull, 0x43E0F3FFBD503E2Full, + 0x0018000000000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x1950ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x1950ull) % 0x40ull)); + } + case 0x03: // [3] 258B - 31D5 + { + if (cp < U'\u2C00' || cp > U'\u31BF') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFF7FFFFFFFFFFFull, 0xFFFFFFFF7FFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x000C781FFFFFFFFFull, + 0xFFFF20BFFFFFFFFFull, 0x000080FFFFFFFFFFull, 0x7F7F7F7F007FFFFFull, 0x000000007F7F7F7Full, + 0x0000800000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x183E000000000060ull, 0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFEE07FFFFFull, 0xF7FFFFFFFFFFFFFFull, + 0xFFFEFFFFFFFFFFE0ull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00007FFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x2C00ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x04: return (cp >= U'\u31F0' && cp <= U'\u31FF') || (cp >= U'\u3400' && cp <= U'\u3E20'); + case 0x06: return (cp >= U'\u4A6C' && cp <= U'\u4DBE') || (cp >= U'\u4E00' && cp <= U'\u56B6'); + case 0x0C: return (cp >= U'\u942E' && cp <= U'\u9FFB') || (cp >= U'\uA000' && cp <= U'\uA078'); + case 0x0D: // [13] A079 - ACC3 + { + TOML_ASSUME(cp >= U'\uA079' && cp <= U'\uACC3'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x00000000000FFFFFull, 0xFFFFFFFFFF800000ull, 0xFFFFFFFFFFFFFF9Full, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0006007FFF8FFFFFull, 0x003FFFFFFFFFFF80ull, + 0xFFFFFF9FFFFFFFC0ull, 0x00001FFFFFFFFFFFull, 0xFFFFFE7FC0000000ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFCFFFFull, 0xF00000000003FE7Full, 0x000003FFFFFBDDFFull, 0x07FFFFFFFFFFFF80ull, + 0x07FFFFFFFFFFFE00ull, 0x7E00000000000000ull, 0xFF801FFFFFFE0034ull, 0xFFFFFF8000003FFFull, + 0x03FFFFFFFFFFF80Full, 0x007FEF8000400000ull, 0x0000FFFFFFFFFFBEull, 0x3FFFFF800007FB80ull, + 0x317FFFFFFFFFFFE2ull, 0x0E03FF9C0000029Full, 0xFFBFBF803F3F3F00ull, 0xFF81FFFBFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x000003FFFFFFFFFFull, 0xFFFFFFFFFFFFFF80ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x00000000000007FFull, + }; + return lookup_table_1[(static_cast(cp) - 0xA079ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xA079ull) % 0x40ull)); + } + case 0x11: return (cp >= U'\uD1A5' && cp <= U'\uD7A2') || (cp >= U'\uD7B0' && cp <= U'\uD7C6') + || (cp >= U'\uD7CB' && cp <= U'\uD7FB'); + case 0x14: // [20] F686 - 102D0 + { + if (cp < U'\uF900') + return false; + TOML_ASSUME(cp <= U'\U000102D0'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFF3FFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0000000003FFFFFFull, + 0x5F7FFDFFA0F8007Full, 0xFFFFFFFFFFFFFFDBull, 0x0003FFFFFFFFFFFFull, 0xFFFFFFFFFFF80000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x3FFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFF0000ull, 0xFFFFFFFFFFFCFFFFull, 0x0FFF0000000000FFull, + 0x0000000000000000ull, 0xFFDF000000000000ull, 0xFFFFFFFFFFFFFFFFull, 0x1FFFFFFFFFFFFFFFull, + 0x07FFFFFE00000000ull, 0xFFFFFFC007FFFFFEull, 0x7FFFFFFFFFFFFFFFull, 0x000000001CFCFCFCull, + 0xB7FFFF7FFFFFEFFFull, 0x000000003FFF3FFFull, 0xFFFFFFFFFFFFFFFFull, 0x07FFFFFFFFFFFFFFull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFFF1FFFFFFFull, 0x000000000001FFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0xF900ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x15: // [21] 102D1 - 10F1B + { + if (cp < U'\U00010300') + return false; + TOML_ASSUME(cp <= U'\U00010F1B'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFE000FFFFFFFFull, 0x003FFFFFFFFF03FDull, 0xFFFFFFFF3FFFFFFFull, 0x000000000000FF0Full, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFF00003FFFFFFFull, 0x0FFFFFFFFF0FFFFFull, + 0xFFFF00FFFFFFFFFFull, 0x0000000FFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x007FFFFFFFFFFFFFull, 0x000000FF003FFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x91BFFFFFFFFFFD3Full, 0x007FFFFF003FFFFFull, 0x000000007FFFFFFFull, 0x0037FFFF00000000ull, + 0x03FFFFFF003FFFFFull, 0x0000000000000000ull, 0xC0FFFFFFFFFFFFFFull, 0x0000000000000000ull, + 0x003FFFFFFEEF0001ull, 0x1FFFFFFF00000000ull, 0x000000001FFFFFFFull, 0x0000001FFFFFFEFFull, + 0x003FFFFFFFFFFFFFull, 0x0007FFFF003FFFFFull, 0x000000000003FFFFull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0x00000000000001FFull, 0x0007FFFFFFFFFFFFull, 0x0007FFFFFFFFFFFFull, + 0x0000000FFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x000303FFFFFFFFFFull, 0x0000000000000000ull, + 0x000000000FFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x10300ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x16: // [22] 10F1C - 11B66 + { + if (cp > U'\U00011AF8') + return false; + TOML_ASSUME(cp >= U'\U00010F1C'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0x000003FFFFF00801ull, 0x0000000000000000ull, 0x000001FFFFF00000ull, 0xFFFFFF8007FFFFF0ull, + 0x000000000FFFFFFFull, 0xFFFFFF8000000000ull, 0xFFF00000000FFFFFull, 0xFFFFFF8000001FFFull, + 0xFFF00900000007FFull, 0xFFFFFF80047FFFFFull, 0x400001E0007FFFFFull, 0xFFBFFFF000000001ull, + 0x000000000000FFFFull, 0xFFFBD7F000000000ull, 0xFFFFFFFFFFF01FFBull, 0xFF99FE0000000007ull, + 0x001000023EDFDFFFull, 0x000000000000003Eull, 0x0000000000000000ull, 0xFFFFFFF000000000ull, + 0x0000780001FFFFFFull, 0xFFFFFFF000000038ull, 0x00000B00000FFFFFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0xFFFFFFF000000000ull, 0xF00000000007FFFFull, 0xFFFFFFF000000000ull, + 0x00000100000FFFFFull, 0xFFFFFFF000000000ull, 0x0000000010007FFFull, 0x7FFFFFF000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFF000000000ull, + 0x000000000000FFFFull, 0x0000000000000000ull, 0xFFFFFFFFFFFFFFF0ull, 0xF6FF27F80000000Full, + 0x00000028000FFFFFull, 0x0000000000000000ull, 0x001FFFFFFFFFCFF0ull, 0xFFFF8010000000A0ull, + 0x00100000407FFFFFull, 0x00003FFFFFFFFFFFull, 0xFFFFFFF000000002ull, 0x000000001FFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x10F1Cull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x10F1Cull) % 0x40ull)); + } + case 0x17: // [23] 11B67 - 127B1 + { + if (cp < U'\U00011C00' || cp > U'\U00012543') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00007FFFFFFFFDFFull, 0xFFFC000000000001ull, 0x000000000000FFFFull, 0x0000000000000000ull, + 0x0001FFFFFFFFFB7Full, 0xFFFFFDBF00000040ull, 0x00000000010003FFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0007FFFF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0001000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0000000003FFFFFFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x000000000000000Full, + }; + return lookup_table_1[(static_cast(cp) - 0x11C00ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x18: return cp >= U'\U00013000'; + case 0x19: return cp <= U'\U0001342E'; + case 0x1A: return (cp >= U'\U00014400' && cp <= U'\U00014646'); + case 0x1D: // [29] 16529 - 17173 + { + if (cp < U'\U00016800') + return false; + TOML_ASSUME(cp <= U'\U00017173'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x01FFFFFFFFFFFFFFull, 0x000000007FFFFFFFull, 0x0000000000000000ull, 0x00003FFFFFFF0000ull, + 0x0000FFFFFFFFFFFFull, 0xE0FFFFF80000000Full, 0x000000000000FFFFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0xFFFFFFFFFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0x00000000000107FFull, 0x00000000FFF80000ull, 0x0000000B00000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x000FFFFFFFFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x16800ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x1F: return (cp >= U'\U00017DBF' && cp <= U'\U000187F6') || (cp >= U'\U00018800' && cp <= U'\U00018A09'); + case 0x20: return (cp >= U'\U00018A0A' && cp <= U'\U00018CD5') || (cp >= U'\U00018D00' && cp <= U'\U00018D07'); + case 0x23: // [35] 1AEEB - 1BB35 + { + if (cp < U'\U0001B000' || cp > U'\U0001B2FB') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x000000007FFFFFFFull, 0xFFFF00F000070000ull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0FFFFFFFFFFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x1B000ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x24: // [36] 1BB36 - 1C780 + { + if (cp < U'\U0001BC00' || cp > U'\U0001BC99') + return false; + + switch ((static_cast(cp) - 0x1BC00ull) / 0x40ull) + { + case 0x01: return (cp <= U'\U0001BC7C' && (1ull << (static_cast(cp) - 0x1BC40ull)) & 0x1FFF07FFFFFFFFFFull); + case 0x02: return (1u << (static_cast(cp) - 0x1BC80u)) & 0x3FF01FFu; default: return true; } - //# chunk summary: 1922 codepoints from 124 ranges (spanning a search area of 3145) } - case 1: + case 0x26: // [38] 1D3CC - 1E016 { - if (codepoint < U'\u0D04' || codepoint > U'\u191E') + if (cp < U'\U0001D400' || cp > U'\U0001D7CB') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0D04', U'\u191E'); - switch ((static_cast(codepoint) - 0xD04u) / 64u) + + constexpr uint_least64_t lookup_table_1[] = { - case 0: return (1ull << (static_cast(codepoint) - 0xD04ull)) & 0x27FFFFFFFFFDDFFull; - case 1: return (1ull << (static_cast(codepoint) - 0xD4Eull)) & 0x3F000000E01C1ull; - case 2: return (1ull << (static_cast(codepoint) - 0xD85ull)) & 0x797FDFFFFFE3FFFFull; - case 3: return codepoint <= U'\u0DC6' || codepoint >= U'\u0E01'; - case 4: return (1ull << (static_cast(codepoint) - 0xE04ull)) & 0xF000DFFFFFFFFFFFull; - case 5: return codepoint <= U'\u0E46' || codepoint >= U'\u0E81'; - case 6: return (1ull << (static_cast(codepoint) - 0xE84ull)) & 0xF200DFFAFFFFFF7Dull; - case 7: return (1ull << (static_cast(codepoint) - 0xEC4ull)) & 0x100000000F000005ull; - case 9: return codepoint != U'\u0F48'; - case 12: return codepoint <= U'\u102A' || codepoint >= U'\u103F'; - case 13: return (1ull << (static_cast(codepoint) - 0x1050ull)) & 0x3FFE1C0623C3Full; - case 14: return codepoint <= U'\u108E' || codepoint >= U'\u10A0'; - case 15: return (1ull << (static_cast(codepoint) - 0x10C4ull)) & 0xFF7FFFFFFFFFF20Bull; - case 21: return (1ull << (static_cast(codepoint) - 0x1244ull)) & 0xFFFFFFFFF3D7F3DFull; - case 22: return (1ull << (static_cast(codepoint) - 0x1284ull)) & 0xD7F3DFFFFFFFF3DFull; - case 23: return (1ull << (static_cast(codepoint) - 0x12C4ull)) & 0xFFFFFFFFFFF7FFF3ull; - case 24: return (1ull << (static_cast(codepoint) - 0x1304ull)) & 0xFFFFFFFFFFF3DFFFull; - case 25: return codepoint <= U'\u135A' || codepoint >= U'\u1380'; - case 26: return codepoint <= U'\u138F' || codepoint >= U'\u13A0'; - case 27: return (1ull << (static_cast(codepoint) - 0x13C4ull)) & 0xE3F3FFFFFFFFFFFFull; - case 37: return (1ull << (static_cast(codepoint) - 0x1644ull)) & 0xEFFFF9FFFFFFFFFFull; - case 38: return codepoint <= U'\u169A' || codepoint >= U'\u16A0'; - case 39: return (1ull << (static_cast(codepoint) - 0x16C4ull)) & 0xF01FE07FFFFFFFFFull; - case 40: return (1ull << (static_cast(codepoint) - 0x1704ull)) & 0xF0003FFFF0003DFFull; - case 41: return (1ull << (static_cast(codepoint) - 0x1744ull)) & 0xF0001DFFF0003FFFull; - case 43: return codepoint <= U'\u17D7' || codepoint >= U'\u17DC'; - case 45: return codepoint <= U'\u1878' || codepoint >= U'\u1880'; - case 46: return (1ull << (static_cast(codepoint) - 0x1884ull)) & 0xFFFFF05FFFFFFFF9ull; - case 47: return codepoint <= U'\u18F5' || codepoint >= U'\u1900'; - default: return true; - } - //# chunk summary: 2239 codepoints from 83 ranges (spanning a search area of 3099) + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFDFFFFFull, 0xEBFFDE64DFFFFFFFull, 0xFFFFFFFFFFFFFFEFull, + 0x7BFFFFFFDFDFE7BFull, 0xFFFFFFFFFFFDFC5Full, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFF3FFFFFFFFFull, 0xF7FFFFFFF7FFFFFDull, + 0xFFDFFFFFFFDFFFFFull, 0xFFFF7FFFFFFF7FFFull, 0xFFFFFDFFFFFFFDFFull, 0x0000000000000FF7ull, + }; + return lookup_table_1[(static_cast(cp) - 0x1D400ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); } - case 2: + case 0x27: // [39] 1E017 - 1EC61 { - if (codepoint < U'\u1950' || codepoint > U'\u2184') + if (cp < U'\U0001E100' || cp > U'\U0001E94B') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u1950', U'\u2184'); - switch ((static_cast(codepoint) - 0x1950u) / 64u) + + constexpr uint_least64_t lookup_table_1[] = { - case 0: return (1ull << (static_cast(codepoint) - 0x1950ull)) & 0xFFFF001F3FFFFFFFull; - case 1: return codepoint <= U'\u19AB' || codepoint >= U'\u19B0'; - case 3: return codepoint <= U'\u1A16' || codepoint >= U'\u1A20'; - case 7: return codepoint <= U'\u1B33' || codepoint >= U'\u1B45'; - case 9: return (1ull << (static_cast(codepoint) - 0x1B90ull)) & 0xFFFFFC00C001FFFFull; - case 10: return codepoint <= U'\u1BE5' || codepoint >= U'\u1C00'; - case 11: return codepoint <= U'\u1C23' || codepoint >= U'\u1C4D'; - case 12: return codepoint <= U'\u1C7D' || codepoint >= U'\u1C80'; - case 13: return codepoint <= U'\u1CBA' || codepoint >= U'\u1CBD'; - case 14: return (1ull << (static_cast(codepoint) - 0x1CE9ull)) & 0x7FFF8237EFull; - case 23: return (1ull << (static_cast(codepoint) - 0x1F10ull)) & 0x3F3FFFFFFFFF3F3Full; - case 24: return (1ull << (static_cast(codepoint) - 0x1F50ull)) & 0xFFFF3FFFFFFFAAFFull; - case 25: return (1ull << (static_cast(codepoint) - 0x1F90ull)) & 0x1FDC5FDFFFFFFFFFull; - case 26: return (1ull << (static_cast(codepoint) - 0x1FD0ull)) & 0x1FDC1FFF0FCFull; - case 27: return false; - case 28: return codepoint <= U'\u2071' || codepoint >= U'\u207F'; - case 30: return (1u << (static_cast(codepoint) - 0x2102u)) & 0x3F21u; - case 31: return (1ull << (static_cast(codepoint) - 0x2110ull)) & 0x43E0F3FFBD503E2Full; - default: return true; - } - //# chunk summary: 1184 codepoints from 59 ranges (spanning a search area of 2101) + 0x3F801FFFFFFFFFFFull, 0x0000000000004000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x00000FFFFFFFFFFFull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x000000000000001Full, + 0xFFFFFFFFFFFFFFFFull, 0x000000000000080Full, + }; + return lookup_table_1[(static_cast(cp) - 0x1E100ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); } - case 3: + case 0x28: // [40] 1EC62 - 1F8AC { - if (codepoint < U'\u2C00' || codepoint > U'\u31BF') + if (cp < U'\U0001EE00' || cp > U'\U0001EEBB') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u2C00', U'\u31BF'); - switch ((static_cast(codepoint) - 0x2C00u) / 64u) + + switch ((static_cast(cp) - 0x1EE00ull) / 0x40ull) { - case 0: return codepoint != U'\u2C2F'; - case 1: return codepoint != U'\u2C5F'; - case 2: return true; - case 3: return (1ull << (static_cast(codepoint) - 0x2CC0ull)) & 0xC781FFFFFFFFFull; - case 4: return (1ull << (static_cast(codepoint) - 0x2D00ull)) & 0xFFFF20BFFFFFFFFFull; - case 5: return codepoint <= U'\u2D67' || codepoint >= U'\u2D6F'; - case 6: return (1ull << (static_cast(codepoint) - 0x2D80ull)) & 0x7F7F7F7F007FFFFFull; - case 7: return (1u << (static_cast(codepoint) - 0x2DC0u)) & 0x7F7F7F7Fu; - case 8: return true; - case 16: return (1ull << (static_cast(codepoint) - 0x3005ull)) & 0xC1F00000000003ull; - case 17: return true; - case 18: return (1ull << (static_cast(codepoint) - 0x3080ull)) & 0xFFFFFFFEE07FFFFFull; - case 19: return codepoint != U'\u30FB'; - case 20: return codepoint != U'\u3100'; - case 21: return true; - case 22: return codepoint <= U'\u318E' || codepoint >= U'\u31A0'; - default: return false; - } - //# chunk summary: 771 codepoints from 30 ranges (spanning a search area of 1472) - } - case 4: return codepoint <= U'\u31FF' || codepoint >= U'\u3400'; - case 6: return codepoint <= U'\u4DBE' || codepoint >= U'\u4E00'; - case 12: return codepoint <= U'\u9FFB' || codepoint >= U'\uA000'; - case 13: - { - if (codepoint < U'\uA079' || codepoint > U'\uACC3') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uA079', U'\uACC3'); - switch ((static_cast(codepoint) - 0xA079u) / 63u) - { - case 18: return codepoint <= U'\uA4FD' || codepoint >= U'\uA500'; - case 22: return codepoint <= U'\uA60C' || codepoint >= U'\uA610'; - case 23: return codepoint <= U'\uA62B' || codepoint >= U'\uA640'; - case 24: return codepoint <= U'\uA66E' || codepoint >= U'\uA67F'; - case 26: return codepoint <= U'\uA6E5' || codepoint >= U'\uA717'; - case 27: return codepoint <= U'\uA71F' || codepoint >= U'\uA722'; - case 28: return codepoint <= U'\uA788' || codepoint >= U'\uA78B'; - case 29: return codepoint <= U'\uA7BF' || codepoint >= U'\uA7C2'; - case 30: return (1ull << (static_cast(codepoint) - 0xA7F5ull)) & 0x1FFFBDDFFFull; - case 31: return codepoint <= U'\uA822' || codepoint >= U'\uA840'; - case 32: return codepoint <= U'\uA873' || codepoint >= U'\uA882'; - case 34: return (1ull << (static_cast(codepoint) - 0xA8F2ull)) & 0xFFF001A3Full; - case 35: return codepoint <= U'\uA925' || codepoint >= U'\uA930'; - case 36: return codepoint <= U'\uA97C' || codepoint >= U'\uA984'; - case 37: return codepoint <= U'\uA9B2' || codepoint >= U'\uA9CF'; - case 38: return (1ull << (static_cast(codepoint) - 0xA9E0ull)) & 0x3FFFF7C00FFDFull; - case 39: return (1ull << (static_cast(codepoint) - 0xAA12ull)) & 0x3FDC000007FFFFFull; - case 40: return (1ull << (static_cast(codepoint) - 0xAA60ull)) & 0xFFFFC47FFFFFull; - case 41: return (1ull << (static_cast(codepoint) - 0xAA90ull)) & 0x53E62FFFFFFFFull; - case 42: return (1ull << (static_cast(codepoint) - 0xAADBull)) & 0x7CFC00380FFE7ull; - case 43: return (1ull << (static_cast(codepoint) - 0xAB0Eull)) & 0x7FFFFFFDFDFC01F9ull; - case 44: return (1ull << (static_cast(codepoint) - 0xAB4Dull)) & 0x7FFFFFF81FFFBFFFull; - case 46: return codepoint <= U'\uABE2' || codepoint >= U'\uAC00'; - default: return true; - } - //# chunk summary: 2554 codepoints from 52 ranges (spanning a search area of 3147) - } - case 17: return codepoint <= U'\uD7A2' || (codepoint >= U'\uD7B0' && codepoint <= U'\uD7C6') - || codepoint >= U'\uD7CB'; - case 18: return false; - case 19: return false; - case 20: - { - if (codepoint < U'\uF900' || codepoint > U'\U000102D0') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uF900', U'\U000102D0'); - switch ((static_cast(codepoint) - 0xF900u) / 63u) - { - case 5: return codepoint <= U'\uFA6D' || codepoint >= U'\uFA70'; - case 8: return (1ull << (static_cast(codepoint) - 0xFB00ull)) & 0x7FFDFFA0F8007Full; - case 9: return (1ull << (static_cast(codepoint) - 0xFB38ull)) & 0x3FFFFFFFFFFFDB5Full; - case 17: return codepoint <= U'\uFD3D' || codepoint >= U'\uFD50'; - case 18: return codepoint <= U'\uFD8F' || codepoint >= U'\uFD92'; - case 21: return false; - case 22: return codepoint != U'\uFE6A'; - case 24: return codepoint <= U'\uFEFC' || codepoint >= U'\uFF21'; - case 25: return codepoint <= U'\uFF3A' || codepoint >= U'\uFF41'; - case 27: return (1ull << (static_cast(codepoint) - 0xFFA5ull)) & 0xE7E7E7E3FFFFFFull; - case 28: return codepoint != U'\uFFE4'; - case 29: return (1ull << (static_cast(codepoint) - 0x10023ull)) & 0x7FFE7FFF6FFFFEFull; - case 33: return false; - case 34: return false; - case 35: return false; - case 36: return false; - case 37: return false; - case 39: return codepoint <= U'\U0001029C' || codepoint >= U'\U000102A0'; - default: return true; - } - //# chunk summary: 1710 codepoints from 34 ranges (spanning a search area of 2513) - } - case 21: - { - if (codepoint < U'\U00010300' || codepoint > U'\U00010F1B') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010300', U'\U00010F1B'); - switch ((static_cast(codepoint) - 0x10300u) / 64u) - { - case 0: return codepoint <= U'\U0001031F' || codepoint >= U'\U0001032D'; - case 1: return (1ull << (static_cast(codepoint) - 0x10340ull)) & 0x3FFFFFFFFF03FDull; - case 2: return codepoint <= U'\U0001039D' || codepoint >= U'\U000103A0'; - case 3: return codepoint <= U'\U000103C3' || codepoint >= U'\U000103C8'; - case 6: return codepoint <= U'\U0001049D' || codepoint >= U'\U000104B0'; - case 7: return codepoint <= U'\U000104D3' || codepoint >= U'\U000104D8'; - case 8: return codepoint <= U'\U00010527' || codepoint >= U'\U00010530'; - case 10: return false; - case 11: return false; - case 17: return codepoint <= U'\U00010755' || codepoint >= U'\U00010760'; - case 18: return false; - case 19: return false; - case 20: return (1ull << (static_cast(codepoint) - 0x10800ull)) & 0x91BFFFFFFFFFFD3Full; - case 21: return codepoint <= U'\U00010855' || codepoint >= U'\U00010860'; - case 23: return codepoint != U'\U000108C0'; - case 24: return codepoint <= U'\U00010915' || codepoint >= U'\U00010920'; - case 25: return false; - case 26: return codepoint <= U'\U000109B7' || codepoint >= U'\U000109BE'; - case 27: return false; - case 28: return (1ull << (static_cast(codepoint) - 0x10A00ull)) & 0x3FFFFFFEEF0001ull; - case 31: return codepoint != U'\U00010AC8'; - case 33: return codepoint <= U'\U00010B55' || codepoint >= U'\U00010B60'; - case 35: return false; - case 41: return false; - case 42: return false; - case 43: return false; - case 44: return false; - case 45: return false; - case 46: return codepoint <= U'\U00010EA9' || codepoint >= U'\U00010EB0'; - case 47: return false; - default: return true; - } - //# chunk summary: 1620 codepoints from 48 ranges (spanning a search area of 3100) - } - case 22: - { - if (codepoint < U'\U00010F1C' || codepoint > U'\U00011AF8') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010F1C', U'\U00011AF8'); - switch ((static_cast(codepoint) - 0x10F1Cu) / 64u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x10F1Cull)) & 0x3FFFFF00801ull; - case 1: return false; - case 3: return codepoint <= U'\U00010FF6' || codepoint >= U'\U00011003'; - case 6: return codepoint <= U'\U000110AF' || codepoint >= U'\U000110D0'; - case 7: return codepoint <= U'\U000110E8' || codepoint >= U'\U00011103'; - case 8: return (1ull << (static_cast(codepoint) - 0x1111Cull)) & 0xFFF00900000007FFull; - case 9: return (1ull << (static_cast(codepoint) - 0x1115Cull)) & 0xFFFFFF80047FFFFFull; - case 10: return (1ull << (static_cast(codepoint) - 0x1119Cull)) & 0x400001E0007FFFFFull; - case 11: return (1ull << (static_cast(codepoint) - 0x111DCull)) & 0xFFBFFFF000000001ull; - case 13: return (1u << (static_cast(codepoint) - 0x11280u)) & 0xFFFBD7Fu; - case 14: return (1ull << (static_cast(codepoint) - 0x1129Cull)) & 0xFFFFFFFFFFF01FFBull; - case 15: return (1ull << (static_cast(codepoint) - 0x112DCull)) & 0xFF99FE0000000007ull; - case 16: return (1ull << (static_cast(codepoint) - 0x1131Cull)) & 0x1000023EDFDFFFull; - case 18: return false; - case 20: return codepoint <= U'\U00011434' || codepoint >= U'\U00011447'; - case 21: return codepoint <= U'\U00011461' || codepoint >= U'\U00011480'; - case 22: return (1ull << (static_cast(codepoint) - 0x1149Cull)) & 0xB00000FFFFFull; - case 23: return false; - case 24: return false; - case 26: return codepoint <= U'\U000115AE' || codepoint >= U'\U000115D8'; - case 28: return codepoint <= U'\U0001162F' || codepoint >= U'\U00011644'; - case 30: return codepoint <= U'\U000116AA' || codepoint >= U'\U000116B8'; - case 32: return false; - case 33: return false; - case 34: return false; - case 37: return false; - case 39: return (1ull << (static_cast(codepoint) - 0x118DCull)) & 0xF6FF27F80000000Full; - case 40: return (1ull << (static_cast(codepoint) - 0x1191Cull)) & 0x28000FFFFFull; - case 41: return false; - case 42: return codepoint <= U'\U000119A7' || codepoint >= U'\U000119AA'; - case 43: return (1ull << (static_cast(codepoint) - 0x119E1ull)) & 0x7FFFC0080000005ull; - case 44: return (1ull << (static_cast(codepoint) - 0x11A1Cull)) & 0x100000407FFFFFull; - case 46: return codepoint <= U'\U00011A9D' || codepoint >= U'\U00011AC0'; - default: return true; - } - //# chunk summary: 1130 codepoints from 67 ranges (spanning a search area of 3037) - } - case 23: - { - if (codepoint < U'\U00011C00' || codepoint > U'\U00012543') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00011C00', U'\U00012543'); - switch ((static_cast(codepoint) - 0x11C00u) / 63u) - { - case 0: return codepoint != U'\U00011C09'; - case 1: return codepoint <= U'\U00011C40' || codepoint >= U'\U00011C72'; - case 3: return false; - case 4: return (1ull << (static_cast(codepoint) - 0x11D00ull)) & 0x1FFFFFFFFFB7Full; - case 5: return (1ull << (static_cast(codepoint) - 0x11D46ull)) & 0xFFFF6FC000001ull; - case 6: return codepoint <= U'\U00011D89' || codepoint >= U'\U00011D98'; - case 7: return false; - case 8: return false; - case 9: return false; - case 10: return false; - case 12: return false; - case 13: return false; - case 15: return false; - case 31: return false; - case 32: return false; - case 33: return false; - default: return true; - } - //# chunk summary: 1304 codepoints from 16 ranges (spanning a search area of 2372) - } - case 27: return false; - case 28: return false; - case 29: - { - if (codepoint < U'\U00016800' || codepoint > U'\U00017173') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00016800', U'\U00017173'); - switch ((static_cast(codepoint) - 0x16800u) / 64u) - { - case 10: return false; - case 13: return (1ull << (static_cast(codepoint) - 0x16B40ull)) & 0xE0FFFFF80000000Full; - case 15: return false; - case 16: return false; - case 17: return false; - case 18: return false; - case 19: return false; - case 20: return false; - case 21: return false; - case 22: return false; - case 23: return false; - case 24: return false; - case 26: return false; - case 27: return false; - case 29: return codepoint <= U'\U00016F4A' || codepoint >= U'\U00016F50'; - case 31: return codepoint != U'\U00016FC0'; - default: return true; - } - //# chunk summary: 1250 codepoints from 14 ranges (spanning a search area of 2420) - } - case 31: return codepoint <= U'\U000187F6' || codepoint >= U'\U00018800'; - case 32: return codepoint <= U'\U00018CD5' || codepoint >= U'\U00018D00'; - case 33: return false; - case 34: return false; - case 35: return codepoint <= U'\U0001B11E' || (codepoint >= U'\U0001B150' && codepoint <= U'\U0001B152') - || (codepoint >= U'\U0001B164' && codepoint <= U'\U0001B167') || codepoint >= U'\U0001B170'; - case 36: return codepoint <= U'\U0001BC6A' || (codepoint >= U'\U0001BC70' && codepoint <= U'\U0001BC7C') - || (codepoint >= U'\U0001BC80' && codepoint <= U'\U0001BC88') || codepoint >= U'\U0001BC90'; - case 37: return false; - case 38: - { - if (codepoint < U'\U0001D400' || codepoint > U'\U0001D7CB') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001D400', U'\U0001D7CB'); - switch ((static_cast(codepoint) - 0x1D400u) / 61u) - { - case 1: return codepoint != U'\U0001D455'; - case 2: return (1ull << (static_cast(codepoint) - 0x1D47Aull)) & 0x1FF79937FFFFFFFFull; - case 3: return (1ull << (static_cast(codepoint) - 0x1D4B7ull)) & 0x1FFFFFFFFFFFDFD7ull; - case 4: return (1ull << (static_cast(codepoint) - 0x1D4F4ull)) & 0x1FFFFDFDFE7BFFFFull; - case 5: return (1ull << (static_cast(codepoint) - 0x1D531ull)) & 0x1FFFFFFEFE2FBDFFull; - case 11: return (1ull << (static_cast(codepoint) - 0x1D69Full)) & 0xFFFFFFBFFFFFE7Full; - case 12: return (1ull << (static_cast(codepoint) - 0x1D6DCull)) & 0x1DFFFFFF7FFFFFFFull; - case 13: return (1ull << (static_cast(codepoint) - 0x1D719ull)) & 0x1FBFFFFFEFFFFFFFull; - case 14: return (1ull << (static_cast(codepoint) - 0x1D756ull)) & 0x1FF7FFFFFDFFFFFFull; - case 15: return (1ull << (static_cast(codepoint) - 0x1D793ull)) & 0x1FEFFFFFFBFFFFFull; - default: return true; - } - //# chunk summary: 936 codepoints from 30 ranges (spanning a search area of 972) - } - case 39: - { - if (codepoint < U'\U0001E100' || codepoint > U'\U0001E94B') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001E100', U'\U0001E94B'); - switch ((static_cast(codepoint) - 0x1E100u) / 63u) - { - case 0: return codepoint <= U'\U0001E12C' || codepoint >= U'\U0001E137'; - case 1: return true; - case 7: return true; - case 28: return true; - case 29: return true; - case 30: return true; - case 31: return true; - case 32: return true; - case 33: return codepoint <= U'\U0001E943' || codepoint >= U'\U0001E94B'; - default: return false; - } - //# chunk summary: 363 codepoints from 7 ranges (spanning a search area of 2124) - } - case 40: - { - if (codepoint < U'\U0001EE00' || codepoint > U'\U0001EEBB') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001EE00', U'\U0001EEBB'); - switch ((static_cast(codepoint) - 0x1EE00u) / 63u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x1EE00ull)) & 0xAF7FE96FFFFFFEFull; - case 1: return (1ull << (static_cast(codepoint) - 0x1EE42ull)) & 0x7BDFDE5AAA5BAA1ull; - case 2: return (1ull << (static_cast(codepoint) - 0x1EE7Eull)) & 0x3FFFEFB83FFFEFFDull; + case 0x00: return (cp <= U'\U0001EE3B' && (1ull << (static_cast(cp) - 0x1EE00ull)) & 0xAF7FE96FFFFFFEFull); + case 0x01: return (cp >= U'\U0001EE42' && cp <= U'\U0001EE7E' && (1ull << (static_cast(cp) - 0x1EE42ull)) & 0x17BDFDE5AAA5BAA1ull); + case 0x02: return (1ull << (static_cast(cp) - 0x1EE80ull)) & 0xFFFFBEE0FFFFBFFull; TOML_NO_DEFAULT_CASE; } - //# chunk summary: 141 codepoints from 33 ranges (spanning a search area of 188) } - case 55: return codepoint <= U'\U0002A6DC' || codepoint >= U'\U0002A700'; - case 56: return codepoint <= U'\U0002B733' || (codepoint >= U'\U0002B740' && codepoint <= U'\U0002B81C') - || codepoint >= U'\U0002B820'; - case 58: return codepoint <= U'\U0002CEA0' || codepoint >= U'\U0002CEB0'; - default: return true; + case 0x29: return cp >= U'\U00020000'; + case 0x37: return (cp >= U'\U0002A4C7' && cp <= U'\U0002A6DC') || (cp >= U'\U0002A700' && cp <= U'\U0002B111'); + case 0x38: return (cp >= U'\U0002B112' && cp <= U'\U0002B733') || (cp >= U'\U0002B740' && cp <= U'\U0002B81C') + || (cp >= U'\U0002B820' && cp <= U'\U0002BD5C'); + case 0x3A: return (cp >= U'\U0002C9A8' && cp <= U'\U0002CEA0') || (cp >= U'\U0002CEB0' && cp <= U'\U0002D5F2'); + case 0x3C: return cp <= U'\U0002EBDF'; + case 0x3D: return (cp >= U'\U0002F800' && cp <= U'\U0002FA1D'); + case 0x3E: return cp >= U'\U00030000'; + TOML_NO_DEFAULT_CASE; } - //# chunk summary: 131178 codepoints from 620 ranges (spanning a search area of 201376) } - //# Returns true if a codepoint belongs to any of these categories: Nd, Nl + //# Returns true if a codepoint belongs to any of these categories: + //# Nd, Nl [[nodiscard]] TOML_GNU_ATTR(const) - constexpr bool is_unicode_number(char32_t codepoint) noexcept + constexpr bool is_unicode_number(char32_t cp) noexcept { - if (codepoint < U'\u0660' || codepoint > U'\U0001FBF9') + if (cp < U'\u0660' || cp > U'\U0001FBF9') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0660', U'\U0001FBF9'); - switch ((static_cast(codepoint) - 0x660u) / 2007u) + + const auto child_index_0 = (static_cast(cp) - 0x660ull) / 0x7D7ull; + if ((1ull << child_index_0) & 0x47FFDFE07FCFFFD0ull) + return false; + switch (child_index_0) { - case 0: + case 0x00: // [0] 0660 - 0E36 { - if (codepoint > U'\u0DEF') + if (cp > U'\u0DEF') return false; - - return ((static_cast(codepoint) - 0x660u) / 63u) & 0x55555025ull; - //# chunk summary: 130 codepoints from 13 ranges (spanning a search area of 1936) - } - case 1: - { - if (codepoint < U'\u0E50' || codepoint > U'\u1099') - return false; - - return ((static_cast(codepoint) - 0xE50u) / 59u) & 0x30Dull; - //# chunk summary: 50 codepoints from 5 ranges (spanning a search area of 586) - } - case 2: - { - if (codepoint < U'\u16EE' || codepoint > U'\u1C59') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u16EE', U'\u1C59'); - switch ((static_cast(codepoint) - 0x16EEu) / 64u) + TOML_ASSUME(cp >= U'\u0660'); + + constexpr uint_least64_t lookup_table_1[] = { - case 0: return true; - case 3: return true; - case 4: return true; - case 9: return true; - case 11: return true; - case 14: return codepoint <= U'\u1A89' || codepoint >= U'\u1A90'; - case 17: return true; - case 19: return true; - case 21: return codepoint <= U'\u1C49' || codepoint >= U'\u1C50'; - default: return false; - } - //# chunk summary: 103 codepoints from 11 ranges (spanning a search area of 1388) + 0x00000000000003FFull, 0x0000000000000000ull, 0x0000000003FF0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, + }; + return lookup_table_1[(static_cast(cp) - 0x660ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x660ull) % 0x40ull)); } - case 3: return codepoint <= U'\u2182' || codepoint >= U'\u2185'; - case 5: return (1ull << (static_cast(codepoint) - 0x3007ull)) & 0xE0007FC000001ull; - case 20: + case 0x01: // [1] 0E37 - 160D { - if (codepoint < U'\uA620' || codepoint > U'\uAA59') + if (cp < U'\u0E50' || cp > U'\u1099') return false; - - return ((static_cast(codepoint) - 0xA620u) / 64u) & 0x1CC09ull; - //# chunk summary: 70 codepoints from 7 ranges (spanning a search area of 1082) - } - case 21: return true; - case 31: return true; - case 32: - { - if (codepoint < U'\U00010140' || codepoint > U'\U000104A9') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010140', U'\U000104A9'); - switch ((static_cast(codepoint) - 0x10140u) / 63u) + + constexpr uint_least64_t lookup_table_1[] = { - case 0: return true; - case 8: return codepoint <= U'\U00010341' || codepoint >= U'\U0001034A'; - case 10: return true; - case 13: return true; - default: return false; - } - //# chunk summary: 70 codepoints from 5 ranges (spanning a search area of 874) + 0x00000000000003FFull, 0x0000000000000000ull, 0x00000000000003FFull, 0x0000000003FF0000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x03FF000000000000ull, + 0x0000000000000000ull, 0x00000000000003FFull, + }; + return lookup_table_1[(static_cast(cp) - 0xE50ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xE50ull) % 0x40ull)); } - case 33: return codepoint <= U'\U00010D39' || codepoint >= U'\U00011066'; - case 34: + case 0x02: // [2] 160E - 1DE4 { - if (codepoint < U'\U000110F0' || codepoint > U'\U00011739') + if (cp < U'\u16EE' || cp > U'\u1C59') return false; - - return ((static_cast(codepoint) - 0x110F0u) / 62u) & 0x341610Bull; - //# chunk summary: 90 codepoints from 9 ranges (spanning a search area of 1610) + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0000000000000007ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0FFC000000000000ull, + 0x00000FFC00000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x00000003FF000000ull, 0x0000000000000000ull, 0x00000FFC00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x00000FFC0FFC0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x00000FFC00000000ull, 0x0000000000000000ull, 0x0000000000000FFCull, + 0x0000000000000000ull, 0x00000FFC0FFC0000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x16EEull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x16EEull) % 0x40ull)); } - case 35: + case 0x03: return (cp >= U'\u2160' && cp <= U'\u2188' && (1ull << (static_cast(cp) - 0x2160ull)) & 0x1E7FFFFFFFFull); + case 0x05: return (cp >= U'\u3007' && cp <= U'\u303A' && (1ull << (static_cast(cp) - 0x3007ull)) & 0xE0007FC000001ull); + case 0x14: // [20] A32C - AB02 { - if (codepoint < U'\U000118E0' || codepoint > U'\U00011DA9') + if (cp < U'\uA620' || cp > U'\uAA59') return false; - - return ((static_cast(codepoint) - 0x118E0u) / 62u) & 0xC4003ull; - //# chunk summary: 50 codepoints from 5 ranges (spanning a search area of 1226) + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00000000000003FFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x000000000000FFC0ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x03FF000000000000ull, 0x000003FF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x03FF000000000000ull, 0x0000000003FF0000ull, + 0x03FF000000000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0xA620ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xA620ull) % 0x40ull)); } - case 36: return true; - case 45: return codepoint <= U'\U00016A69' || codepoint >= U'\U00016B50'; - case 59: return true; - case 60: return codepoint <= U'\U0001E149' || codepoint >= U'\U0001E2F0'; - case 61: return true; - case 63: return true; - default: return false; + case 0x15: return (cp >= U'\uABF0' && cp <= U'\uABF9'); + case 0x1F: return (cp >= U'\uFF10' && cp <= U'\uFF19'); + case 0x20: // [32] 10140 - 10916 + { + if (cp > U'\U000104A9') + return false; + TOML_ASSUME(cp >= U'\U00010140'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0x001FFFFFFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000402ull, 0x0000000000000000ull, 0x00000000003E0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x10140ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x21: return (cp >= U'\U00010D30' && cp <= U'\U00010D39') || (cp >= U'\U00011066' && cp <= U'\U0001106F'); + case 0x22: // [34] 110EE - 118C4 + { + if (cp < U'\U000110F0' || cp > U'\U00011739') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00000000000003FFull, 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000003FF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x00000000000003FFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, 0x0000000000000000ull, 0x000003FF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, 0x0000000000000000ull, 0x0000000003FF0000ull, + 0x0000000000000000ull, 0x00000000000003FFull, + }; + return lookup_table_1[(static_cast(cp) - 0x110F0ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x110F0ull) % 0x40ull)); + } + case 0x23: // [35] 118C5 - 1209B + { + if (cp < U'\U000118E0' || cp > U'\U00011DA9') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00000000000003FFull, 0x03FF000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x03FF000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x03FF000000000000ull, 0x0000000000000000ull, 0x00000000000003FFull, + }; + return lookup_table_1[(static_cast(cp) - 0x118E0ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x118E0ull) % 0x40ull)); + } + case 0x24: return (cp >= U'\U00012400' && cp <= U'\U0001246E'); + case 0x2D: return (cp >= U'\U00016A60' && cp <= U'\U00016A69') || (cp >= U'\U00016B50' && cp <= U'\U00016B59'); + case 0x3B: return (cp >= U'\U0001D7CE' && cp <= U'\U0001D7FF'); + case 0x3C: return (cp >= U'\U0001E140' && cp <= U'\U0001E149') || (cp >= U'\U0001E2F0' && cp <= U'\U0001E2F9'); + case 0x3D: return (cp >= U'\U0001E950' && cp <= U'\U0001E959'); + case 0x3F: return cp >= U'\U0001FBF0'; + TOML_NO_DEFAULT_CASE; } - //# chunk summary: 876 codepoints from 72 ranges (spanning a search area of 128410) } - //# Returns true if a codepoint belongs to any of these categories: Mn, Mc + //# Returns true if a codepoint belongs to any of these categories: + //# Mn, Mc [[nodiscard]] TOML_GNU_ATTR(const) - constexpr bool is_unicode_combining_mark(char32_t codepoint) noexcept + constexpr bool is_unicode_combining_mark(char32_t cp) noexcept { - if (codepoint < U'\u0300' || codepoint > U'\U000E01EF') + if (cp < U'\u0300' || cp > U'\U000E01EF') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0300', U'\U000E01EF'); - switch ((static_cast(codepoint) - 0x300u) / 14332u) + + const auto child_index_0 = (static_cast(cp) - 0x300ull) / 0x37FCull; + if ((1ull << child_index_0) & 0x7FFFFFFFFFFFFE02ull) + return false; + switch (child_index_0) { - case 0: + case 0x00: // [0] 0300 - 3AFB { - if (codepoint > U'\u309A') + if (cp > U'\u309A') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0300', U'\u309A'); - switch ((static_cast(codepoint) - 0x300u) / 1460u) + TOML_ASSUME(cp >= U'\u0300'); + + const auto child_index_1 = (static_cast(cp) - 0x300ull) / 0xB7ull; + if ((1ull << child_index_1) & 0x63FFFDC00FB00002ull) + return false; + switch (child_index_1) { - case 0: + case 0x00: return cp <= U'\u036F'; + case 0x02: return (cp >= U'\u0483' && cp <= U'\u0487'); + case 0x03: return (cp >= U'\u0591' && cp <= U'\u05C7' && (1ull << (static_cast(cp) - 0x591ull)) & 0x5B5FFFFFFFFFFFull); + case 0x04: return (cp >= U'\u0610' && cp <= U'\u061A') || (cp >= U'\u064B' && cp <= U'\u065F') + || cp == U'\u0670'; + case 0x05: // [5] 0693 - 0749 { - if (codepoint > U'\u085B') + if (cp < U'\u06D6') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0300', U'\u085B'); - switch ((static_cast(codepoint) - 0x300u) / 63u) + TOML_ASSUME(cp <= U'\u0749'); + + switch ((static_cast(cp) - 0x6D6ull) / 0x40ull) { - case 0: return true; - case 1: return true; - case 6: return true; - case 10: return true; - case 11: return (1u << (static_cast(codepoint) - 0x5B5u)) & 0x5B5FFu; - case 12: return true; - case 13: return codepoint <= U'\u065F' || codepoint >= U'\u0670'; - case 15: return (1u << (static_cast(codepoint) - 0x6D6u)) & 0xF67E7Fu; - case 16: return true; - case 17: return true; - case 18: return true; - case 19: return codepoint <= U'\u07B0' || codepoint >= U'\u07EB'; - case 20: return (1ull << (static_cast(codepoint) - 0x7ECull)) & 0x6EFFBC00000200FFull; - case 21: return codepoint <= U'\u082D' || codepoint >= U'\u0859'; - default: return false; - } - //# chunk summary: 293 codepoints from 24 ranges (spanning a search area of 1372) - } - case 1: - { - if (codepoint < U'\u08D3' || codepoint > U'\u0E4E') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u08D3', U'\u0E4E'); - switch ((static_cast(codepoint) - 0x8D3u) / 64u) - { - case 0: return codepoint != U'\u08E2'; - case 1: return (1u << (static_cast(codepoint) - 0x93Au)) & 0x1BFFFF7u; - case 2: return (1ull << (static_cast(codepoint) - 0x953ull)) & 0x1C0000001801Full; - case 3: return (1u << (static_cast(codepoint) - 0x9BCu)) & 0x399FDu; - case 4: return (1ull << (static_cast(codepoint) - 0x9D7ull)) & 0x1C8000001801ull; - case 5: return (1u << (static_cast(codepoint) - 0xA3Cu)) & 0x23987Du; - case 6: return (1u << (static_cast(codepoint) - 0xA70u)) & 0xE0023u; - case 7: return (1u << (static_cast(codepoint) - 0xABCu)) & 0x3BBFDu; - case 8: return (1ull << (static_cast(codepoint) - 0xAE2ull)) & 0x3BF000003ull; - case 9: return (1u << (static_cast(codepoint) - 0xB3Cu)) & 0x399FDu; - case 10: return (1ull << (static_cast(codepoint) - 0xB55ull)) & 0x200000006007ull; - case 11: return (1u << (static_cast(codepoint) - 0xBBEu)) & 0xF71Fu; - case 12: return codepoint <= U'\u0BD7' || codepoint >= U'\u0C00'; - case 13: return (1u << (static_cast(codepoint) - 0xC3Eu)) & 0xF77Fu; - case 14: return (1ull << (static_cast(codepoint) - 0xC55ull)) & 0x700000006003ull; - case 15: return (1u << (static_cast(codepoint) - 0xCBCu)) & 0x3DDFDu; - case 16: return (1ull << (static_cast(codepoint) - 0xCD5ull)) & 0x780000006003ull; - case 17: return (1u << (static_cast(codepoint) - 0xD3Bu)) & 0x7BBFBu; - case 18: return (1ull << (static_cast(codepoint) - 0xD57ull)) & 0x1C0000001801ull; - case 19: return codepoint <= U'\u0DCA' || codepoint >= U'\u0DCF'; - case 20: return (1ull << (static_cast(codepoint) - 0xDD3ull)) & 0x180001FEBull; - case 21: return (1u << (static_cast(codepoint) - 0xE31u)) & 0x3FC003F9u; + case 0x00: return (cp <= U'\u0711' && (1ull << (static_cast(cp) - 0x6D6ull)) & 0x800000000F67E7Full); + case 0x01: return cp >= U'\u0730'; TOML_NO_DEFAULT_CASE; } - //# chunk summary: 282 codepoints from 70 ranges (spanning a search area of 1404) } - case 2: + case 0x06: // [6] 074A - 0800 { - if (codepoint < U'\u0EB1' || codepoint > U'\u135F') + if (cp > U'\u07FD') return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0EB1', U'\u135F'); - switch ((static_cast(codepoint) - 0xEB1u) / 64u) + TOML_ASSUME(cp >= U'\u074A'); + + switch ((static_cast(cp) - 0x74Aull) / 0x40ull) { - case 0: return (1u << (static_cast(codepoint) - 0xEB1u)) & 0x1F800FF9u; - case 1: return true; - case 2: return (1u << (static_cast(codepoint) - 0xF35u)) & 0x615u; - case 3: return (1ull << (static_cast(codepoint) - 0xF71ull)) & 0xFFFFFF7FF06FFFFFull; - case 4: return codepoint <= U'\u0FBC' || codepoint >= U'\u0FC6'; - case 5: return true; - case 6: return (1ull << (static_cast(codepoint) - 0x1031ull)) & 0x1FCEE1E000003FFFull; - case 7: return (1ull << (static_cast(codepoint) - 0x1071ull)) & 0x1E005FFE000Full; - case 18: return true; - default: return false; - } - //# chunk summary: 154 codepoints from 23 ranges (spanning a search area of 1199) - } - case 3: - { - if (codepoint < U'\u1712' || codepoint > U'\u193B') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u1712', U'\u193B'); - switch ((static_cast(codepoint) - 0x1712u) / 62u) - { - case 0: return codepoint <= U'\u1714' || codepoint >= U'\u1732'; - case 1: return codepoint <= U'\u1753' || codepoint >= U'\u1772'; - case 3: return codepoint <= U'\u17D3' || codepoint >= U'\u17DD'; - case 6: return codepoint <= U'\u1886' || codepoint >= U'\u18A9'; - case 7: return false; - case 8: return codepoint <= U'\u192B' || codepoint >= U'\u1930'; - default: return true; - } - //# chunk summary: 73 codepoints from 11 ranges (spanning a search area of 554) - } - case 4: - { - if (codepoint < U'\u1A17' || codepoint > U'\u1DFF') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u1A17', U'\u1DFF'); - switch ((static_cast(codepoint) - 0x1A17u) / 63u) - { - case 0: return codepoint <= U'\u1A1B' || codepoint >= U'\u1A55'; - case 1: return (1ull << (static_cast(codepoint) - 0x1A56ull)) & 0x27FFFFFFDFFull; - case 2: return codepoint != U'\u1A95'; - case 5: return codepoint <= U'\u1B73' || codepoint >= U'\u1B80'; - case 9: return false; - case 10: return false; - case 11: return (1ull << (static_cast(codepoint) - 0x1CD0ull)) & 0x39021FFFFF7ull; - case 12: return false; - case 13: return false; - case 15: return codepoint != U'\u1DFA'; - default: return true; - } - //# chunk summary: 234 codepoints from 20 ranges (spanning a search area of 1001) - } - case 5: return (1ull << (static_cast(codepoint) - 0x20D0ull)) & 0x1FFE21FFFull; - case 7: - { - if (codepoint < U'\u2CEF') - return false; - - return ((static_cast(codepoint) - 0x2CEFu) / 63u) & 0x601Dull; - //# chunk summary: 44 codepoints from 5 ranges (spanning a search area of 940) - } - default: return false; - } - //# chunk summary: 1106 codepoints from 156 ranges (spanning a search area of 11675) - } - case 2: - { - if (codepoint < U'\uA66F' || codepoint > U'\uAAEF') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uA66F', U'\uAAEF'); - switch ((static_cast(codepoint) - 0xA66Fu) / 61u) - { - case 0: return (1ull << (static_cast(codepoint) - 0xA66Full)) & 0x1800000007FE1ull; - case 1: return false; - case 3: return false; - case 4: return false; - case 5: return false; - case 6: return (1u << (static_cast(codepoint) - 0xA802u)) & 0x211u; - case 7: return codepoint <= U'\uA827' || codepoint >= U'\uA82C'; - case 10: return codepoint <= U'\uA8F1' || codepoint >= U'\uA8FF'; - case 11: return codepoint <= U'\uA92D' || codepoint >= U'\uA947'; - case 12: return codepoint <= U'\uA953' || codepoint >= U'\uA980'; - case 16: return (1ull << (static_cast(codepoint) - 0xAA43ull)) & 0x100000000000601ull; - case 17: return (1ull << (static_cast(codepoint) - 0xAA7Cull)) & 0x19D0000000000003ull; - case 18: return (1ull << (static_cast(codepoint) - 0xAABEull)) & 0x3E0000000000Bull; - default: return true; - } - //# chunk summary: 137 codepoints from 28 ranges (spanning a search area of 1153) - } - case 3: return codepoint <= U'\uAAF6' || (codepoint >= U'\uABE3' && codepoint <= U'\uABEA') - || codepoint >= U'\uABEC'; - case 4: - { - if (codepoint < U'\uFB1E' || codepoint > U'\U00011A99') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uFB1E', U'\U00011A99'); - switch ((static_cast(codepoint) - 0xFB1Eu) / 1008u) - { - case 0: return codepoint == U'\uFB1E' || (codepoint >= U'\uFE00' && codepoint <= U'\uFE0F') - || codepoint >= U'\uFE20'; - case 1: return codepoint <= U'\U000101FD' || codepoint >= U'\U000102E0'; - case 3: return (1ull << (static_cast(codepoint) - 0x10A01ull)) & 0x4380000000007837ull; - case 4: return codepoint <= U'\U00010AE6' || (codepoint >= U'\U00010D24' && codepoint <= U'\U00010D27') - || codepoint >= U'\U00010EAB'; - case 5: - { - if (codepoint < U'\U00010F46' || codepoint > U'\U0001123E') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010F46', U'\U0001123E'); - switch ((static_cast(codepoint) - 0x10F46u) / 64u) - { - case 1: return false; - case 4: return codepoint <= U'\U00011046' || codepoint >= U'\U0001107F'; - case 7: return codepoint <= U'\U00011134' || codepoint >= U'\U00011145'; - case 8: return (1ull << (static_cast(codepoint) - 0x11146ull)) & 0x1C00200000000001ull; - case 10: return codepoint != U'\U000111C6'; - case 11: return codepoint <= U'\U00011237' || codepoint >= U'\U0001123E'; - default: return true; - } - //# chunk summary: 100 codepoints from 15 ranges (spanning a search area of 761) - } - case 6: - { - if (codepoint < U'\U000112DF' || codepoint > U'\U000116AD') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U000112DF', U'\U000116AD'); - switch ((static_cast(codepoint) - 0x112DFu) / 61u) - { - case 0: return codepoint <= U'\U000112EA' || codepoint >= U'\U00011300'; - case 1: return (1u << (static_cast(codepoint) - 0x1133Bu)) & 0x100733FBu; - case 2: return (1u << (static_cast(codepoint) - 0x11362u)) & 0x7C7F3u; - case 3: return false; - case 4: return false; - case 8: return false; - case 9: return false; - case 10: return false; - case 11: return codepoint <= U'\U000115B5' || codepoint >= U'\U000115B8'; - case 12: return codepoint <= U'\U000115C0' || codepoint >= U'\U000115DC'; - default: return true; - } - //# chunk summary: 122 codepoints from 18 ranges (spanning a search area of 975) - } - case 7: - { - if (codepoint < U'\U000116AE') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U000116AE', U'\U00011A99'); - switch ((static_cast(codepoint) - 0x116AEu) / 63u) - { - case 0: return true; - case 1: return true; - case 6: return true; - case 10: return (1u << (static_cast(codepoint) - 0x11930u)) & 0xD79BFu; - case 12: return codepoint <= U'\U000119D7' || codepoint >= U'\U000119DA'; - case 13: return codepoint <= U'\U000119E4' || codepoint >= U'\U00011A01'; - case 14: return (1ull << (static_cast(codepoint) - 0x11A33ull)) & 0x1FFC0100F7Full; - case 15: return true; - default: return false; - } - //# chunk summary: 119 codepoints from 17 ranges (spanning a search area of 1004) - } - default: return true; - } - //# chunk summary: 402 codepoints from 63 ranges (spanning a search area of 8060) - } - case 5: - { - if (codepoint < U'\U00011C2F' || codepoint > U'\U00011EF6') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00011C2F', U'\U00011EF6'); - switch ((static_cast(codepoint) - 0x11C2Fu) / 60u) - { - case 0: return codepoint != U'\U00011C37'; - case 1: return true; - case 2: return codepoint != U'\U00011CA8'; - case 4: return (1u << (static_cast(codepoint) - 0x11D31u)) & 0x5FDA3Fu; - case 5: return (1u << (static_cast(codepoint) - 0x11D8Au)) & 0x1EDFu; - case 6: return true; - case 11: return true; - default: return false; - } - //# chunk summary: 85 codepoints from 13 ranges (spanning a search area of 712) - } - case 6: - { - if (codepoint < U'\U00016AF0' || codepoint > U'\U00016FF1') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00016AF0', U'\U00016FF1'); - switch ((static_cast(codepoint) - 0x16AF0u) / 62u) - { - case 0: return true; - case 1: return true; - case 18: return codepoint != U'\U00016F4C'; - case 19: return true; - case 20: return codepoint <= U'\U00016FE4' || codepoint >= U'\U00016FF0'; - default: return false; - } - //# chunk summary: 75 codepoints from 7 ranges (spanning a search area of 1282) - } - case 7: return true; - case 8: - { - if (codepoint < U'\U0001D165' || codepoint > U'\U0001E94A') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001D165', U'\U0001E94A'); - switch ((static_cast(codepoint) - 0x1D165u) / 765u) - { - case 0: - { - if (codepoint > U'\U0001D244') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001D165', U'\U0001D244'); - switch ((static_cast(codepoint) - 0x1D165u) / 56u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x1D165ull)) & 0x7F3FC03F1Full; - case 2: return false; - default: return true; - } - //# chunk summary: 33 codepoints from 6 ranges (spanning a search area of 224) - } - case 2: return codepoint <= U'\U0001DA36' || codepoint >= U'\U0001DA3B'; - case 3: - { - if (codepoint < U'\U0001DA5C' || codepoint > U'\U0001DAAF') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001DA5C', U'\U0001DAAF'); - switch ((static_cast(codepoint) - 0x1DA5Cu) / 42u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x1DA5Cull)) & 0x1000201FFFFull; - case 1: return codepoint != U'\U0001DA86'; + case 0x00: return cp == U'\u074A'; + case 0x01: return (cp >= U'\u07A6' && cp <= U'\u07B0'); + case 0x02: return (cp >= U'\u07EB' && (1u << (static_cast(cp) - 0x7EBu)) & 0x401FFu); TOML_NO_DEFAULT_CASE; } - //# chunk summary: 39 codepoints from 5 ranges (spanning a search area of 84) } - case 4: return (1ull << (static_cast(codepoint) - 0x1E000ull)) & 0x7DBF9FFFF7Full; - case 5: return codepoint <= U'\U0001E136' || codepoint >= U'\U0001E2EC'; - case 7: return codepoint <= U'\U0001E8D6' || codepoint >= U'\U0001E944'; - default: return false; + case 0x07: // [7] 0801 - 08B7 + { + if (cp < U'\u0816' || cp > U'\u085B') + return false; + + switch ((static_cast(cp) - 0x816ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u082D' && (1u << (static_cast(cp) - 0x816u)) & 0xFBBFEFu); + case 0x01: return cp >= U'\u0859'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x08: // [8] 08B8 - 096E + { + if (cp < U'\u08D3' || cp > U'\u0963') + return false; + + switch ((static_cast(cp) - 0x8D3ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0903' && (1ull << (static_cast(cp) - 0x8D3ull)) & 0x1FFFFFFFF7FFFull); + case 0x01: return (cp >= U'\u093A' && (1u << (static_cast(cp) - 0x93Au)) & 0x1BFFFF7u); + case 0x02: return (1u << (static_cast(cp) - 0x953u)) & 0x1801Fu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x09: // [9] 096F - 0A25 + { + if (cp < U'\u0981' || cp > U'\u0A03') + return false; + + switch ((static_cast(cp) - 0x981ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x981ull)) & 0xE800000000000007ull; + case 0x01: return (cp <= U'\u09FE' && (1ull << (static_cast(cp) - 0x9C1ull)) & 0x2000000600401CCFull); + default: return true; + } + } + case 0x0A: // [10] 0A26 - 0ADC + { + if (cp < U'\u0A3C' || cp > U'\u0ACD') + return false; + + switch ((static_cast(cp) - 0xA3Cull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0A75' && (1ull << (static_cast(cp) - 0xA3Cull)) & 0x23000000023987Dull); + case 0x01: return (cp >= U'\u0A81' && cp <= U'\u0A83'); + case 0x02: return (1u << (static_cast(cp) - 0xABCu)) & 0x3BBFDu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0B: // [11] 0ADD - 0B93 + { + if (cp < U'\u0AE2' || cp > U'\u0B82') + return false; + + switch ((static_cast(cp) - 0xAE2ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0B03' && (1ull << (static_cast(cp) - 0xAE2ull)) & 0x3BF000003ull); + case 0x01: return (cp >= U'\u0B3C' && cp <= U'\u0B57' && (1u << (static_cast(cp) - 0xB3Cu)) & 0xE0399FDu); + case 0x02: return (1ull << (static_cast(cp) - 0xB62ull)) & 0x100000003ull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0C: // [12] 0B94 - 0C4A + { + if (cp < U'\u0BBE') + return false; + TOML_ASSUME(cp <= U'\u0C4A'); + + switch ((static_cast(cp) - 0xBBEull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0BD7' && (1u << (static_cast(cp) - 0xBBEu)) & 0x200F71Fu); + case 0x01: return (cp >= U'\u0C00' && cp <= U'\u0C04'); + case 0x02: return (1u << (static_cast(cp) - 0xC3Eu)) & 0x177Fu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0D: // [13] 0C4B - 0D01 + { + TOML_ASSUME(cp >= U'\u0C4B' && cp <= U'\u0D01'); + + switch ((static_cast(cp) - 0xC4Bull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0C83' && (1ull << (static_cast(cp) - 0xC4Bull)) & 0x1C0000001800C07ull); + case 0x01: return (cp >= U'\u0CBC' && (1u << (static_cast(cp) - 0xCBCu)) & 0x5DFDu); + case 0x02: return (1ull << (static_cast(cp) - 0xCCBull)) & 0x60000001800C07ull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0E: // [14] 0D02 - 0DB8 + { + if (cp > U'\u0D83') + return false; + TOML_ASSUME(cp >= U'\u0D02'); + + switch ((static_cast(cp) - 0xD02ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0xD02ull)) & 0xF600000000000003ull; + case 0x01: return (1ull << (static_cast(cp) - 0xD42ull)) & 0x8000000300200F77ull; + default: return true; + } + } + case 0x0F: // [15] 0DB9 - 0E6F + { + if (cp < U'\u0DCA' || cp > U'\u0E4E') + return false; + + switch ((static_cast(cp) - 0xDCAull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0DF3' && (1ull << (static_cast(cp) - 0xDCAull)) & 0x300003FD7E1ull); + case 0x01: return (cp >= U'\u0E31' && (1u << (static_cast(cp) - 0xE31u)) & 0x1C003F9u); + default: return true; + } + } + case 0x10: // [16] 0E70 - 0F26 + { + if (cp < U'\u0EB1' || cp > U'\u0F19') + return false; + + switch ((static_cast(cp) - 0xEB1ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0ECD' && (1u << (static_cast(cp) - 0xEB1u)) & 0x1F800FF9u); + case 0x01: return cp >= U'\u0F18'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x11: // [17] 0F27 - 0FDD + { + if (cp < U'\u0F35' || cp > U'\u0FC6') + return false; + + switch ((static_cast(cp) - 0xF35ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0xF35ull)) & 0xF000000000000615ull; + case 0x01: return (1ull << (static_cast(cp) - 0xF75ull)) & 0xFFFFFFF7FF06FFFFull; + case 0x02: return (1u << (static_cast(cp) - 0xFB5u)) & 0x200FFu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x12: // [18] 0FDE - 1094 + { + if (cp < U'\u102B' || cp > U'\u108F') + return false; + + switch ((static_cast(cp) - 0x102Bull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x102Bull)) & 0xF3B87800000FFFFFull; + case 0x01: return (1ull << (static_cast(cp) - 0x106Bull)) & 0x17FF8003C7ull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x13: return (cp >= U'\u109A' && cp <= U'\u109D'); + case 0x16: return (cp >= U'\u135D' && cp <= U'\u135F'); + case 0x1C: // [28] 1704 - 17BA + { + if (cp < U'\u1712') + return false; + TOML_ASSUME(cp <= U'\u17BA'); + + switch ((static_cast(cp) - 0x1712ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u1734' && (1ull << (static_cast(cp) - 0x1712ull)) & 0x700000007ull); + case 0x01: return (cp <= U'\u1773' && (1ull << (static_cast(cp) - 0x1752ull)) & 0x300000003ull); + case 0x02: return cp >= U'\u17B4'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x1D: return (cp >= U'\u17BB' && cp <= U'\u17D3') || (cp >= U'\u180B' && cp <= U'\u180D') + || cp == U'\u17DD'; + case 0x1E: return (cp >= U'\u1885' && cp <= U'\u1886') || (cp >= U'\u1920' && cp <= U'\u1928') + || cp == U'\u18A9'; + case 0x1F: return (cp <= U'\u193B' && (1u << (static_cast(cp) - 0x1929u)) & 0x7FF87u); + case 0x20: // [32] 19E0 - 1A96 + { + if (cp < U'\u1A17' || cp > U'\u1A7F') + return false; + + switch ((static_cast(cp) - 0x1A17ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x1A17ull)) & 0xC00000000000001Full; + case 0x01: return (1ull << (static_cast(cp) - 0x1A57ull)) & 0x13FFFFFFEFFull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x21: // [33] 1A97 - 1B4D + { + if (cp < U'\u1AB0' || cp > U'\u1B44') + return false; + + switch ((static_cast(cp) - 0x1AB0ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u1AC0' && (1u << (static_cast(cp) - 0x1AB0u)) & 0x1BFFFu); + case 0x01: return (cp >= U'\u1B00' && cp <= U'\u1B04'); + case 0x02: return cp >= U'\u1B34'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x22: // [34] 1B4E - 1C04 + { + if (cp < U'\u1B6B' || cp > U'\u1BF3') + return false; + + switch ((static_cast(cp) - 0x1B6Bull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x1B6Bull)) & 0xFFC0000000E001FFull; + case 0x01: return (1ull << (static_cast(cp) - 0x1BABull)) & 0xF800000000000007ull; + default: return true; + } + } + case 0x23: return (cp >= U'\u1C24' && cp <= U'\u1C37'); + case 0x24: return (cp >= U'\u1CD0' && cp <= U'\u1CF9' && (1ull << (static_cast(cp) - 0x1CD0ull)) & 0x39021FFFFF7ull); + case 0x25: return (cp >= U'\u1DC0' && cp <= U'\u1DFF' && (1ull << (static_cast(cp) - 0x1DC0ull)) & 0xFBFFFFFFFFFFFFFFull); + case 0x29: return (cp >= U'\u20D0' && cp <= U'\u20F0' && (1ull << (static_cast(cp) - 0x20D0ull)) & 0x1FFE21FFFull); + case 0x3A: return (cp >= U'\u2CEF' && cp <= U'\u2CF1'); + case 0x3B: return (cp >= U'\u2DE0' && cp <= U'\u2DE3') || cp == U'\u2D7F'; + case 0x3C: return cp <= U'\u2DFF'; + case 0x3F: return (cp >= U'\u302A' && cp <= U'\u302F') || (cp >= U'\u3099' && cp <= U'\u309A'); + TOML_NO_DEFAULT_CASE; } - //# chunk summary: 223 codepoints from 21 ranges (spanning a search area of 6118) } - case 63: return true; - default: return false; + case 0x02: // [2] 72F8 - AAF3 + { + if (cp < U'\uA66F' || cp > U'\uAAEF') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0001800000007FE1ull, 0x0000000000000000ull, 0x0000000000000006ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x21F0000010880000ull, 0x0000000000000000ull, + 0x0000000000060000ull, 0xFFFE0000007FFFE0ull, 0x7F80000000010007ull, 0x0000001FFF000000ull, + 0x00000000001E0000ull, 0x004000000003FFF0ull, 0xFC00000000000000ull, 0x00000000601000FFull, + 0x0000000000007000ull, 0xF00000000005833Aull, 0x0000000000000001ull, + }; + return lookup_table_1[(static_cast(cp) - 0xA66Full) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xA66Full) % 0x40ull)); + } + case 0x03: return (cp >= U'\uAAF5' && cp <= U'\uAAF6') || (cp >= U'\uABE3' && cp <= U'\uABEA') + || (cp >= U'\uABEC' && cp <= U'\uABED'); + case 0x04: // [4] E2F0 - 11AEB + { + if (cp < U'\uFB1E' || cp > U'\U00011A99') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0000000000000001ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0003FFFC00000000ull, + 0x000000000003FFFCull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000080000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000004ull, + 0x0000000000000000ull, 0x000000001F000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0003C1B800000000ull, + 0x000000021C000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000180ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x00000000000003C0ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000006000ull, 0x0000000000000000ull, + 0x0007FF0000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000001C00000000ull, + 0x000001FFFC000000ull, 0x0000001E00000000ull, 0x000000001FFC0000ull, 0x0000001C00000000ull, + 0x00000180007FFE00ull, 0x0000001C00200000ull, 0x00037807FFE00000ull, 0x0000000000000000ull, + 0x0000000103FFC000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000003C00001FFEull, + 0x0200E67F60000000ull, 0x00000000007C7F30ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x000001FFFF800000ull, 0x0000000000000001ull, 0x0000003FFFFC0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xC0000007FCFE0000ull, 0x0000000000000000ull, + 0x00000007FFFC0000ull, 0x0000000000000000ull, 0x0000000003FFE000ull, 0x8000000000000000ull, + 0x0000000000003FFFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x000000001FFFC000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x00000035E6FC0000ull, 0x0000000000000000ull, 0xF3F8000000000000ull, 0x00001FF800000047ull, + 0x3FF80201EFE00000ull, 0x0FFFF00000000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0xFB1Eull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xFB1Eull) % 0x40ull)); + } + case 0x05: // [5] 11AEC - 152E7 + { + if (cp < U'\U00011C2F' || cp > U'\U00011EF6') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x000000000001FEFFull, 0xFDFFFFF800000000ull, 0x00000000000000FFull, 0x0000000000000000ull, + 0x00000000017F68FCull, 0x000001F6F8000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x00000000000000F0ull, + }; + return lookup_table_1[(static_cast(cp) - 0x11C2Full) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x11C2Full) % 0x40ull)); + } + case 0x06: // [6] 152E8 - 18AE3 + { + if (cp < U'\U00016AF0' || cp > U'\U00016FF1') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x000000000000001Full, 0x000000000000007Full, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0xFFFFFFFE80000000ull, 0x0000000780FFFFFFull, 0x0010000000000000ull, + 0x0000000000000003ull, + }; + return lookup_table_1[(static_cast(cp) - 0x16AF0ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x16AF0ull) % 0x40ull)); + } + case 0x07: return (cp >= U'\U0001BC9D' && cp <= U'\U0001BC9E'); + case 0x08: // [8] 1C2E0 - 1FADB + { + if (cp < U'\U0001D165' || cp > U'\U0001E94A') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0000007F3FC03F1Full, 0x00000000000001E0ull, 0x0000000000000000ull, 0x00000000E0000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFFFF8000000ull, 0xFFFFFFFFFFC3FFFFull, + 0xF7C00000800100FFull, 0x00000000000007FFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xDFCFFFFBF8000000ull, 0x000000000000003Eull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x000000000003F800ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000780ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0003F80000000000ull, 0x0000000000000000ull, 0x0000003F80000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x1D165ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x1D165ull) % 0x40ull)); + } + case 0x3F: return cp >= U'\U000E0100'; + TOML_NO_DEFAULT_CASE; } - //# chunk summary: 2282 codepoints from 293 ranges (spanning a search area of 917232) } -} - -#undef TOML_ASSUME_CODEPOINT_BETWEEN - -#endif // TOML_LANG_UNRELEASED + #endif // TOML_LANG_UNRELEASED +} // toml::impl diff --git a/include/toml++/toml_utf8_streams.h b/include/toml++/toml_utf8_streams.h index 0f12e21..2178e56 100644 --- a/include/toml++/toml_utf8_streams.h +++ b/include/toml++/toml_utf8_streams.h @@ -162,6 +162,14 @@ namespace toml::impl { return value; } + + [[nodiscard]] + TOML_GNU_ATTR(pure) + TOML_ALWAYS_INLINE + constexpr const char32_t& operator* () const noexcept + { + return value; + } }; static_assert(std::is_trivial_v); static_assert(std::is_standard_layout_v); @@ -399,13 +407,13 @@ namespace toml::impl else { // first character read from stream - if (!history.count && !head) TOML_UNLIKELY + if TOML_UNLIKELY(!history.count && !head) head = reader.read_next(); // subsequent characters and not eof else if (head) { - if (history.count < history_buffer_size) TOML_UNLIKELY + if TOML_UNLIKELY(history.count < history_buffer_size) history.buffer[history.count++] = *head; else history.buffer[(history.first++ + history_buffer_size) % history_buffer_size] = *head; diff --git a/python/generate_unicode_functions.py b/python/generate_unicode_functions.py index e34df0a..de452b8 100644 --- a/python/generate_unicode_functions.py +++ b/python/generate_unicode_functions.py @@ -4,6 +4,8 @@ # See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. # SPDX-License-Identifier: MIT +# godbolt session for experimenting with this script: https://godbolt.org/z/Vrcvqv + import sys import re import os @@ -11,40 +13,40 @@ import os.path as path import math import requests import traceback +import bisect -#### SETTINGS / MISC ######################################### +#### SETTINGS / MISC ################################################################################################## -class Settings: - binary_bitmasks = False - switch_case_limits = [64, 8] +class G: # G for Globals + hoist_constant_children = True + bitmask_expressions = True + elide_switches = False + lookup_tables = True + depth_limit = 0 + word_size = 64 def make_literal(codepoint): - if (codepoint > 0xFFFF): + if (32 <= codepoint < 127 and chr(codepoint).isprintable()): + return "U'{}'".format(chr(codepoint)) + elif (codepoint > 0xFFFF): return "U'\\U{:08X}'".format(codepoint) else: return "U'\\u{:04X}'".format(codepoint) -def make_bitmask(codepoint, bits = 64): - if (Settings.binary_bitmasks): - if (bits > 32): - return "0b{:064b}ull".format(codepoint) - else: - return "0b{:032b}u".format(codepoint) - else: - if (bits > 32): - return "0x{:X}ull".format(codepoint) - else: - return "0x{:X}u".format(codepoint) +def make_bitmask_literal(val, bits = 0): + if not bits: + bits = 64 if (val >> 32) > 0 else 32 + return "0x{:X}{}".format(val, 'ull' if bits > 32 else 'u') -def make_mask_from_indices(indices): +def make_bitmask_from_indices(indices): mask = 0 for i in indices: mask = mask | (1 << i) @@ -52,9 +54,22 @@ def make_mask_from_indices(indices): +def make_bitmask_index_test_expression(index, bitmask, index_offset = 0, bits = 0, cast = True): + if not bits: + bits = 64 if (bitmask >> 32) > 0 else 32 + suffix = 'ull' if bits >= 64 else 'u' + s = 'static_cast({})'.format(bits, index) if cast else str(index) + if index_offset != 0: + s = '({} {} 0x{:X}{})'.format(s, '-' if index_offset < 0 else '+', abs(index_offset), suffix) + return '(1{} << {}) & {}'.format(suffix, s, make_bitmask_literal(bitmask, bits)) + + + def range_first(r): if isinstance(r, int): return r + elif isinstance(r, range): + return r.start else: return r[0] @@ -63,455 +78,930 @@ def range_first(r): def range_last(r): if isinstance(r, int): return r + elif isinstance(r, range): + return r.stop - 1 # wrong for abs(step) != 1 but I don't do that in this script else: return r[1] +def range_union(first1, last1, first2, last2): + if last1 < first2-1 or last2 < first1-1: + return None + return (min(first1, first2), max(last1, last2)) + + + def is_pow2(v): return v & (v-1) == 0 -def calculate_subdivisions(span_size, level): - - # if it's a relatively small span, divide it such the effective size of each subchunk - # would be less than or equal to 64 so we'll generate bitmask ops - if (64 < span_size <= 4096): - return int(math.ceil(span_size / 64)) - - case_limit = Settings.switch_case_limits[min(len(Settings.switch_case_limits)-1, level)] - - # try to find a divisor that will yield a power-of-2 size - subdivs = case_limit - while (subdivs > 1): - subdiv_size = int(math.ceil(span_size / float(subdivs))) - if (subdiv_size > 1 and subdiv_size < span_size and is_pow2(subdiv_size)): - return subdivs - subdivs -= 1 - - # couldn't find divisor that would yield a power-of-2 size - subdivs = case_limit - while (subdivs > 1): - subdiv_size = int(math.ceil(span_size / float(subdivs))) - if (subdiv_size > 1 and subdiv_size < span_size): - return subdivs - subdivs /= 2 - - return subdivs +def in_collection(target, collection): + for v in collection: + if isinstance(v, (list, tuple, dict, set, range)): + if target in v: + return True + elif v == target: + return True + return False -#### CHUNK ################################################### +def binary_search(elements, value): + index = bisect.bisect_left(elements, value) + if index < len(elements) and elements[index] == value: + return index + return None -class Chunk: - def __init__(self, first, last, level=0): - self.first = int(first) - self.last = int(last) - self.level = level - self.span_size = (self.last - self.first) + 1 - self.count = 0 - self.ranges = [] - self.subchunks = None - self.subchunk_size = 0 - self.first_set = self.last + 1 - self.last_set = -1 - self.first_unset = self.first - self.all_div_by = None - self.all_div_by_add = None +def indent_with_tabs(text, count = 1): + if count == 0: + return text + indent = '\t' * count + return indent + ('\n' + indent).join(text.split('\n')) - def low_range_mask(self): - if self.count == 0: - return 0 - mask = 0 - bits = 0 - prev_last_unset = -1 - for r in self.ranges: - first = range_first(r) - last = range_last(r) - count = (last - first) + 1 - while (prev_last_unset >= 0 and prev_last_unset < first and bits < 64): - prev_last_unset += 1 - bits += 1 - if (bits >= 64): - break - while (count > 0 and bits < 64): - mask |= (1 << bits) - bits += 1 - count -= 1 - if (bits >= 64): - break - prev_last_unset = last + 1 - return mask +def compound_or(*bools): + if 'true' in bools: + return 'true' + s = ' || '.join(bools) + if len(bools) > 1: + s = '({})'.format(s) + return s + +def compound_and(*bools): + if 'false' in bools: + return 'false' + s = ' && '.join(bools) + return s + + + +def ceil(val): + return int(math.ceil(val)) + + + +def calc_child_size(span_size): + + if span_size <= G.word_size: + return span_size + elif span_size <= G.word_size * G.word_size: + return G.word_size + else: + return ceil(span_size / float(G.word_size)) + + + +def largest(*collections): + if not collections: + return None + result = None + for c in collections: + if result is None or len(result) < len(c): + result = c + return result + + + +def smallest(*collections): + if not collections: + return None + result = None + for c in collections: + if result is None or len(result) < len(c): + result = c + return result + + + +def chunks(l, n): + n = max(1, n) + return (l[i:i+n] for i in range(0, len(l), n)) + + +#### SPARSE RANGE ##################################################################################################### + + +class SparseRange: + + def __init__(self, *inital_values): + self.__values = set() + self.__ranges = [] + self.__count = None + self.__first = None + self.__last = None + for v in inital_values: + self.add(v) + + def __add_value(self, val): + if not isinstance(val, int): + raise Exception('values must be integers') + self.__values.add(val) + + def __add_collection(self, col): + for val in col: + self.__add_value(val) + + def __add_range(self, first, last): + if (not isinstance(first, int)) or (not isinstance(last, int)): + raise Exception('ranges must be integral') + if last < first: + raise Exception('reverse ranges are not allowed') + elif first == last: + self.__add_value(first) + else: + self.__ranges.append((first, last)) + def add(self, first, last = None): - f = int(first) - num_added = 0 - if (last is None or first == last): - self.ranges.append(f) - self.count += 1 - self.last_set = max(self.last_set, f) - if (self.first_unset == f): - self.first_unset = f + 1 - else: - l = int(last) - self.ranges.append((f, l)) - self.count += (l - f) + 1 - self.last_set = max(self.last_set, l) - if (self.first_unset == f): - self.first_unset = l + 1 - self.first_set = min(self.first_set, f) - - - def analyze(self): - if (self.count > 0 and (self.first != self.first_set or self.last != self.last_set)): - raise Exception('cannot call analyze() on an untrimmed Chunk') - - self.all_div_by = None - self.all_div_by_add = None - if (self.span_size <= 1): - return - for div in range(2, 51): - for add in range(0, 50): - divisible = None - for r in self.ranges: - first = range_first(r) - last = range_last(r) - if (last < self.first_set): - continue - if (first > self.last_set): - break - first = max(first, self.first_set) - last = min(last, self.last_set) - - if (divisible is None): - divisible = True - for cp in range(first, last+1): - divisible = divisible and (((cp + add) % div) == 0) - if not divisible: - break - if not divisible: - break - - if divisible is not None and divisible: - self.all_div_by = div - if add != 0: - self.all_div_by_add = add - return - - - - def trim(self): - if (self.subchunks is not None - or self.count == 0 - or (self.first_set == self.first and self.last_set == self.last)): - return - - self.first = self.first_set - self.last = self.last_set - self.span_size = (self.last - self.first) + 1 - - - def subdivide(self): - if (self.count > 0 and (self.first != self.first_set or self.last != self.last_set)): - raise Exception('cannot call subdivide() on an untrimmed Chunk') - - if (self.subchunks is not None - or self.count >= self.span_size - 1 - or self.count <= 1 - or (self.last_set - self.first_set) + 1 <= 64 - or self.count == (self.last - self.first_set) + 1 - or self.count == (self.first_unset - self.first) - or self.count == (self.last_set - self.first_set) + 1 - or (len(self.ranges) == 2 and range_first(self.ranges[0]) == self.first and range_last(self.ranges[1]) == self.last) - or len(self.ranges) <= 4 - or self.all_div_by is not None - ): - return - subchunk_count = calculate_subdivisions(self.span_size, self.level) - if (subchunk_count <= 1): - return - subchunk_size = int(math.ceil(self.span_size / float(subchunk_count))) - if (subchunk_size <= 4): - return - - self.subchunks = [] - self.subchunk_size = subchunk_size - for subchunk in range(subchunk_count): - self.subchunks.append( - Chunk( - self.first + (subchunk * self.subchunk_size), - min(self.first + (((subchunk + 1) * self.subchunk_size) - 1), self.last), - self.level + 1 - ) - ) - for r in self.ranges: - if (isinstance(r, int)): - subchunk = int((r - self.first) / self.subchunk_size) - self.subchunks[subchunk].add(r) + if self.__count is not None: + raise Exception('finish() has been called') + if last is None: + if isinstance(first, range): + if first.step != 1: + raise Exception('ranges must be contiguous') + self.__add_range(first.start, first.stop-1) + elif isinstance(first, (list, tuple, dict, set)): + self.__add_collection(first) else: - start_chunk = int((r[0] - self.first) / self.subchunk_size) - end_chunk = int((r[1] - self.first) / self.subchunk_size) - for subchunk in range(start_chunk, end_chunk+1): - self.subchunks[subchunk].add( - max(r[0], self.subchunks[subchunk].first), - min(r[1], self.subchunks[subchunk].last), - ) - #self.ranges = None - for subchunk in self.subchunks: - subchunk.trim() - subchunk.analyze() - subchunk.subdivide() + self.__add_value(first) + else: + self.__add_range(first, last) + def finished(self): + return self.__count is not None + + def finish(self): + if self.finished(): + raise Exception('finish() has already been called') + + self.__count = 0 + if len(self.__ranges) == 0 and len(self.__values) == 0: + return + + # convert sparse values to a list, sort them and convert contiguous spans into ranges + self.__values = [v for v in self.__values] + if len(self.__values) > 0: + self.__values.sort() + current_range = None + temp_values = [] + for v in self.__values: + if current_range is None: + current_range = [v, v] + elif v == current_range[1] + 1: + current_range[1] = v + else: + if (current_range[1] > current_range[0]): + self.__ranges.append((current_range[0], current_range[1])) + else: + temp_values.append(current_range[0]) + current_range = [v, v] + if (current_range[1] > current_range[0]): + self.__ranges.append((current_range[0], current_range[1])) + else: + temp_values.append(current_range[0]) + self.__values = temp_values + + # see if any of the remaining sparse values belong to any of the ranges or can be appended to one + if len(self.__values) > 0 and len(self.__ranges) > 0: + temp_values = [] + for v in self.__values: + matched = False + for r in range(0, len(self.__ranges)): + if v >= self.__ranges[r][0] and v <= self.__ranges[r][1]: + matched = True + break + elif v == self.__ranges[r][0] - 1: + self.__ranges[r] = (v, self.__ranges[r][1]) + matched = True + break + elif v == self.__ranges[r][1] + 1: + self.__ranges[r] = (self.__ranges[r][0], v) + matched = True + break + if not matched: + temp_values.append(v) + self.__values = temp_values + + # merge overlapping ranges, remove ranges completely contained by others + if len(self.__ranges) > 1: + while True: + pass_changed = False + for r1 in range(0, len(self.__ranges)): + for r2 in range(r1+1, len(self.__ranges)): + if self.__ranges[r1] is None or self.__ranges[r2] is None: + continue + union = range_union(self.__ranges[r1][0], self.__ranges[r1][1], self.__ranges[r2][0], self.__ranges[r2][1]) + if union is not None: + self.__ranges[r1] = union + self.__ranges[r2] = None + pass_changed = True + break + if pass_changed: + break; + if not pass_changed: + break; + self.__ranges = [r for r in self.__ranges if r is not None] + + # combine the sets of ranges and sparse values into a sorted list + self.__sparse_value_count = len(self.__values) + self.__contiguous_subrange_count = len(self.__ranges) + self.__values = self.__values + self.__ranges + self.__values.sort(key=range_first) + + # finalize + self.__ranges = None + self.__sparse_values = None + self.__first = range_first(self.__values[0]) + self.__last = range_last(self.__values[-1]) + for v in self.__values: + self.__count += (range_last(v) - range_first(v)) + 1 + + def __len__(self): + return self.__count if self.__count is not None else 0 + + def __bool__(self): + return self.__count is not None and self.__count > 0 + + def __contains__(self, val): + if not self.finished(): + raise Exception('finish() has not been called') + if not isinstance(val, int): + raise Exception('values must be integers') + if self.__count > 0 and self.__first <= val and self.__last >= val: + if self.__sparse_values is None: + self.__sparse_values = [v for v in self] + return binary_search(self.__sparse_values, val) is not None + return False + + def stringify(self, formatter = None, joiner = ", "): + if not self.finished(): + raise Exception('finish() has not been called') + if formatter is None: + return joiner.join(str(v) for v in self.__values) + else: + s = "" + for v in self.__values: + if len(s) > 0: + s += joiner + if isinstance(v, int): + s += formatter(v) + else: + s += formatter(v[0]) + " - " + formatter(v[1]) + return s + + def __str__(self): + return self.stringify() + + class __Iterator: + def __init__(self, values): + self.__values = values + self.__idx = 0 + self.__subidx = 0 + + def __iter__(self): + return self + + def __next__(self): + if not self.__values or self.__idx >= len(self.__values): + raise StopIteration + elem = self.__values[self.__idx] + if isinstance(elem, tuple): + val = elem[0] + self.__subidx + if val == elem[1]: + self.__idx = self.__idx + 1 + self.__subidx = 0 + else: + self.__subidx = self.__subidx + 1 + return val + else: + self.__idx = self.__idx + 1 + self.__subidx = 0 + return elem + + def __iter__(self): + if not self.finished(): + raise Exception('finish() has not been called') + return self.__Iterator(self.__values) + + def first(self): + if not self.finished(): + raise Exception('finish() has not been called') + return self.__first + + def last(self): + if not self.finished(): + raise Exception('finish() has not been called') + return self.__last + + def contiguous(self): + if not self.finished(): + raise Exception('finish() has not been called') + return self.__count > 0 and self.__count == (self.__last - self.__first + 1) + + def contiguous_subrange_count(self): + if not self.finished(): + raise Exception('finish() has not been called') + return self.__contiguous_subrange_count + + def contiguous_subranges(self): + if not self.finished(): + raise Exception('finish() has not been called') + for v in self.__values: + if isinstance(v, tuple): + yield v + + def sparse_value_count(self): + if not self.finished(): + raise Exception('finish() has not been called') + return self.__sparse_value_count + + def sparse_values(self): + if not self.finished(): + raise Exception('finish() has not been called') + for v in self.__values: + if not isinstance(v, tuple): + yield v + + + +#### CODEPOINT CHUNK ################################################################################################## + + +class CodepointChunk: + + class __Data: + def __init__(self, level = 0): + self.range = SparseRange() + self.level = level + self.span_first = None + self.span_last = None + + def __init__(self, data=None): + self.__finished = False + self.__children = None + self.__expr = None + self.__expr_clamp_low = False + self.__expr_clamp_high = False + if data is not None: + if not isinstance(data, self.__Data): + raise Exception("nope") + self.__data = data + self.__finish() + else: + self.__data = self.__Data() + + def range(self): + return self.__data.range + + def __bool__(self): + return bool(self.range()) + + def __len__(self): + return len(self.range()) + + def first(self): + return self.range().first() + + def last(self): + return self.range().last() + + def first_lit(self): + return make_literal(self.first()) + + def last_lit(self): + return make_literal(self.last()) + + def span_first(self): + return self.__data.span_first + + def span_last(self): + return self.__data.span_last + + def span_first_lit(self): + return make_literal(self.span_first()) + + def span_last_lit(self): + return make_literal(self.span_last()) + + def span_size(self): + return (self.span_last() - self.span_first()) + 1 + + def level(self): + return self.__data.level + + def root(self): + return self.level() == 0 def always_returns_true(self): - return self.count == self.span_size - + return self and len(self) == self.span_size() def always_returns_false(self): - return self.count == 0 + return not self + + def has_expression(self): + return self.__expr is not None + + def makes_lookup_table(self): + return (G.lookup_tables + and (self.last() - self.first() + 1) >= 512 + and (self.last() - self.first() + 1) <= 8192 + and not self.range().contiguous() + # and (len(self) / float(self.last() - self.first() + 1)) > 0.10 + ) + + def child_selector(self): + s = 'static_cast(cp)' + if (self.first() > 0): + s = '({} - 0x{:X}ull)'.format(s, self.first()) + return s + ' / 0x{:X}ull'.format(self.__children[0].span_size()) + + def expression(self, clamp = False): + if self.__expr is None: + return None + if not clamp or not (self.__expr_clamp_low or self.__expr_clamp_high): + return self.__expr + return '{}{}{}'.format( + 'cp >= {} && '.format(self.span_first_lit()) if self.__expr_clamp_low else '', + 'cp <= {} && '.format(self.span_last_lit()) if self.__expr_clamp_high else '', + self.__expr + ) + + def add(self, first, last = None): + if self.__finished: + raise Exception('the chunk is read-only') + self.range().add(first, last) + + def __finish(self): + if self.__finished: + return + if not self.range().finished(): + self.range().finish() + self.__finished = True + if self.root(): + self.__data.span_first = self.first() + self.__data.span_last = self.last() + if self.range(): + assert self.first() >= self.span_first() + assert self.last() <= self.span_last() + + # try to figure out a return expression if possible. + + # false + if self.always_returns_false(): + self.__expr = 'false' + + # true + elif self.always_returns_true(): + self.__expr = '(cp >= {} && cp <= {})'.format(self.first_lit(), self.last_lit()) if self.root() else 'true' + + # cp != A + elif (len(self) == self.span_size() - 1): + gap = None + for i in range(self.span_first(), self.span_last()+1): + if i not in self.range(): + gap = i + break + self.__expr = 'cp != ' + make_literal(gap) + self.__expr_clamp_low = gap > self.span_first() + self.__expr_clamp_high = gap < self.span_last() + + # cp == A + # cp >= A + # cp >= A && cp <= B + elif self.range().contiguous(): + if len(self) == 1: + self.__expr = 'cp == ' + self.first_lit() + elif (self.first() > self.span_first()) and (self.last() < self.span_last()): + self.__expr = '(cp >= {} && cp <= {})'.format(self.first_lit(), self.last_lit()) + elif self.last() < self.span_last(): + assert self.first() == self.span_first() + self.__expr = 'cp <= ' + self.last_lit() + self.__expr_clamp_high = True + else: + assert self.first() > self.span_first() + assert self.last() == self.span_last(), "{} {}".format(self.last(), self.span_last()) + self.__expr = 'cp >= ' + self.first_lit() + self.__expr_clamp_low = True + + if self.__expr is not None: + return + + # cp % A == 0 + # (cp + A) % B == 0 + for div in range(2, 11): + for add in range(0, div): + ok = True + for i in range(self.first(), self.last() + 1): + if (i + add) % div == 0: + ok = ok and i in self.range() + else: + ok = ok and i not in self.range() + if not ok: + break; + if ok: + s = 'static_cast(cp)' + if (add): + s = '({} + {}u)'.format(s, add) + s = '({} % {}u) == 0u'.format(s, div) + + self.__expr_clamp_low = self.root() + self.__expr_clamp_high = self.root() + if (self.first() > self.span_first() or self.last() < self.span_last()): + if (self.last() < self.span_last()): + s = 'cp <= {} && {}'.format(self.last_lit(), s) + self.__expr_clamp_high = False + if (self.first() > self.span_first()): + s = 'cp >= {} && {}'.format(self.first_lit(), s) + self.__expr_clamp_low = False + s = '({})'.format(s) + self.__expr = s - def print_subchunk_case(self, subchunk_index, output_file, level, indent): - print("{}\tcase {}: ".format(indent, subchunk_index), end='', file=output_file) - if (self.subchunks[subchunk_index].count == self.subchunks[subchunk_index].span_size): - self.subchunks[subchunk_index].print(output_file, level + 1, (self.first, self.last)) + break + if self.__expr: + break + + if self.__expr is not None: + return + + # cp & A + if G.bitmask_expressions and (self.last() - self.first() + 1) <= G.word_size: + bitmask = 0 + for i in self.range(): + shift = i - self.first() + if shift >= G.word_size: + break + bitmask |= 1 << shift + s = make_bitmask_index_test_expression('cp', bitmask, -self.first()) + self.__expr_clamp_low = self.root() + self.__expr_clamp_high = self.root() + if (self.first() > self.span_first() or self.last() < self.span_last()): + if (self.last() < self.span_last()): + s = 'cp <= {} && {}'.format(self.last_lit(), s) + self.__expr_clamp_high = False + if (self.first() > self.span_first()): + s = 'cp >= {} && {}'.format(self.first_lit(), s) + self.__expr_clamp_low = False + s = '({})'.format(s) + self.__expr = s + + + if self.__expr is not None: + return + + child_first = self.first() + child_last = self.last() + child_span = child_last - child_first + 1 + subdivision_allowed = ( + (G.depth_limit <= 0 or (self.level()+1) < G.depth_limit) + and child_span > 4 + and calc_child_size(child_span) < child_span + ) + + # (cp >= A && cp <= B) || cp == C || cp == D ... + if ((self.range().sparse_value_count() + self.range().contiguous_subrange_count() <= 3) + or not subdivision_allowed): + self.__expr_clamp_low = True + self.__expr_clamp_high = True + bools = [] + for f, l in self.range().contiguous_subranges(): + bools.append('(cp >= {} && cp <= {})'.format(make_literal(f), make_literal(l))) + self.__expr_clamp_low = self.__expr_clamp_low and f > self.span_first() + self.__expr_clamp_high = self.__expr_clamp_high and l < self.span_last() + for v in self.range().sparse_values(): + bools.append('cp == ' + make_literal(v)) + self.__expr_clamp_low = self.__expr_clamp_low and v > self.span_first() + self.__expr_clamp_high = self.__expr_clamp_high and v < self.span_last() + self.__expr = '\n\t\t|| '.join([' || '.join(b) for b in chunks(bools, 2)]) + + + if self.__expr is not None: + return + + # haven't been able to make an expression so check if the chunk + # can be made into a lookup table + if self.makes_lookup_table(): + return + + # couldn't figure out a return expression or make a lookup table, so subdivide + + child_node_max_size = calc_child_size(child_span) + child_nodes = ceil(child_span / float(child_node_max_size)) + self.__children = [None] * child_nodes + for i in self.range(): + relative_value = i - child_first + assert relative_value >= 0 + child_index = int(relative_value / float(child_node_max_size)) + data = self.__children[child_index] + if data is None: + data = self.__Data(self.level() + 1) + data.span_first = child_first + child_index * child_node_max_size + data.span_last = min(data.span_first + child_node_max_size - 1, child_last) + self.__children[child_index] = data + assert i >= data.span_first + assert i <= data.span_last + data.range.add(i) + for i in range(0, child_nodes): + if self.__children[i] is not None: + self.__children[i] = CodepointChunk(self.__children[i]) + for child_index in range(0, child_nodes): + child = self.__children[child_index] + if child is None: + data = self.__Data(self.level() + 1) + data.span_first = child_first + child_index * child_node_max_size + data.span_last = min(data.span_first + child_node_max_size - 1, child_last) + self.__children[child_index] = CodepointChunk(data) + + def __str__(self): + self.__finish() + if self.has_expression(): + return 'return {};'.format(self.expression(self.root())) else: - if (self.subchunks[subchunk_index].subchunks is not None and self.subchunks[subchunk_index].span_size > 64): - print("\n{}\t{{".format(indent), file=output_file) - self.subchunks[subchunk_index].print(output_file, level + 1, (self.first, self.last)) - if (self.subchunks[subchunk_index].subchunks is not None and self.subchunks[subchunk_index].span_size > 64): - print("{}\t}}".format(indent), file=output_file) - - - def return_value_string(self): - - # return true; (completely full range) - if (self.always_returns_true()): - return 'true' - - # return false; (completely empty range) - elif (self.always_returns_false()): - return 'false' - - # return cp == A - elif (self.count == 1): - return 'codepoint == {}'.format(make_literal(self.ranges[0])) - - # return cp != A - elif (self.count == self.span_size - 1): - return 'codepoint != {}'.format(make_literal(self.first_unset)) - - # return cp < A - elif (self.count == (self.first_unset - self.first)): - return 'codepoint < {}'.format(make_literal(self.first_unset)) - - # return cp >= A - elif (self.count == (self.last - self.first_set) + 1): - return 'codepoint >= {}'.format(make_literal(self.first_set)) - - # return cp >= A && cp <= B - elif (self.count == (self.last_set - self.first_set) + 1): - return 'codepoint >= {} && codepoint <= {}'.format(make_literal(self.first_set), make_literal(self.last_set)) - - # return cp <= A || cp >= B - elif (len(self.ranges) == 2 and range_first(self.ranges[0]) == self.first and range_last(self.ranges[1]) == self.last): - return 'codepoint <= {} || codepoint >= {}'.format(make_literal(range_last(self.ranges[0])), make_literal(range_first(self.ranges[1]))) - - # return cp % X == 0 - elif (self.all_div_by is not None): - if (self.all_div_by_add is not None): - return '(static_cast(codepoint) {} {}ull) % {}ull == 0ull'.format( - '-' if self.all_div_by_add < 0 else '+', - abs(self.all_div_by_add), - self.all_div_by - ) + s = '' + exclusions = [] + assumptions = [] + if self.first() > 0 and (self.root() or self.first() > self.span_first()): + exclusions.append('cp < ' + self.first_lit()) else: - return 'static_cast(codepoint) % {}ull == 0ull'.format(self.all_div_by) - - # return cp & A (32-bit) - elif ((self.last_set - self.first_set) + 1 <= 32): - if (self.first_set == self.first): - return '(1u << (static_cast(codepoint) - 0x{:X}u)) & {}'.format(self.first_set, make_bitmask(self.low_range_mask(), 32)) + assumptions.append('cp >= ' + self.first_lit()) + if self.root() or self.last() < self.span_last(): + exclusions.append('cp > ' + self.last_lit()) else: - return 'codepoint >= {} && ((1u << (static_cast(codepoint) - 0x{:X}u)) & {})'.format( - make_literal(self.first_set), self.first_set, make_bitmask(self.low_range_mask(), 32)) + assumptions.append('cp <= ' + self.last_lit()) + if exclusions: + s += 'if ({})\n\treturn false;\n'.format(' || '.join(exclusions)) + if assumptions: + s += 'TOML_ASSUME({});\n'.format(compound_and(*assumptions)) + if exclusions or assumptions: + s += '\n' - # return cp & A (64-bit) - elif ((self.last_set - self.first_set) + 1 <= 64): - if (self.first_set == self.first): - return '(1ull << (static_cast(codepoint) - 0x{:X}ull)) & {}'.format(self.first_set, make_bitmask(self.low_range_mask())) - else: - return 'codepoint >= {} && ((1ull << (static_cast(codepoint) - 0x{:X}ull)) & {})'.format( - make_literal(self.first_set), self.first_set, make_bitmask(self.low_range_mask())) + if (self.makes_lookup_table()): + table_name = 'lookup_table_' + str(self.level()) + s += 'constexpr uint_least{}_t {}[] = \n{{'.format(G.word_size, table_name) + fmt_str = "\t0x{{:0{}X}}{{}},".format(int(G.word_size/4)) + idx = -1 + for v in range(self.first(), self.last() + 1, G.word_size): + idx += 1 + if (G.word_size >= 256 or ((idx % int(min(256 / G.word_size, 6))) == 0)): + s += '\n' + mask = 0 + for i in range(v, min(v + G.word_size, self.last() + 1)): + if i in self.range(): + mask = mask | (1 << (i - v)) + s += fmt_str.format(mask, 'ull' if G.word_size > 32 else 'u') + element_selector = '(static_cast(cp) - {}) / {}'.format( + G.word_size, + make_bitmask_literal(self.first(), G.word_size), + make_bitmask_literal(G.word_size, G.word_size) + ) + bit_selector = 'static_cast(cp)'.format(G.word_size) + if (self.first() % G.word_size != 0): + bit_selector = '({} - {})'.format(bit_selector, make_bitmask_literal(self.first(), G.word_size)) + bit_selector = '{} % {}'.format(bit_selector, make_bitmask_literal(G.word_size, G.word_size)) + s += '\n};' + s += '\nreturn {}[{}]\n\t& ({} << ({}));'.format( + table_name, + element_selector, + make_bitmask_literal(1, G.word_size), + bit_selector + ) + return s - return None - - - def print(self, output_file, level = 0, parent_range = None): - indent = '\t\t' + ('\t' * (2 * level)) - if (parent_range is None): - parent_range = (0, 0x7FFFFFFF) - - rvs = self.return_value_string() - - # return ______; - if (rvs is not None): - print("return {};".format(rvs), file=output_file) - - # switch (cp) - elif (self.subchunks is not None): - - # guard against non-exhaustive ranges (we may have been trimmed) - if (self.first > parent_range[0] and self.last < parent_range[1]): - print("{}if (codepoint < {} || codepoint > {})\n{}\treturn false;\n".format(indent, make_literal(self.first), make_literal(self.last), indent), file=output_file) - elif (self.first > parent_range[0]): - print("{}if (codepoint < {})\n{}\treturn false;\n".format(indent, make_literal(self.first), indent), file=output_file) - elif (self.last < parent_range[1]): - print("{}if (codepoint > {})\n{}\treturn false;\n".format(indent, make_literal(self.last), indent), file=output_file) - - # see if we can avoid emitting a switch altogether, or reduce its scope always_true = [] always_false = [] - not_always_true_or_false = [] - for subchunk_index in range(len(self.subchunks)): - even = (subchunk_index % 2) == 0 - if self.subchunks[subchunk_index].always_returns_true(): - always_true.append(subchunk_index) - elif self.subchunks[subchunk_index].always_returns_false(): - always_false.append(subchunk_index) + expressions_or_switches = [] + selector_references = 0 + for i in range(0, len(self.__children)): + if self.__children[i].always_returns_false(): + always_false.append((i,self.__children[i])) + elif self.__children[i].always_returns_true(): + always_true.append((i,self.__children[i])) else: - not_always_true_or_false.append(subchunk_index) + expressions_or_switches.append((i,self.__children[i])) - selector = '(static_cast(codepoint) - 0x{:X}u) / {}u'.format(self.first, self.subchunk_size) + hoist_constants = G.hoist_constant_children and G.bitmask_expressions + always_true_selector = None + if (hoist_constants and 2 <= len(always_true) <= G.word_size): + always_true_selector = make_bitmask_index_test_expression( + '@@SELECTOR@@', + make_bitmask_from_indices([c[0] for c in always_true]), + 0, + G.word_size, + False) + selector_references += 1 + always_true = [] - # return selector & mask - if (len(always_true) + len(always_false) == len(self.subchunks) and len(self.subchunks) <= 64): - print("{}return ({}) & {};".format(indent, selector, make_bitmask(make_mask_from_indices(always_true))), file=output_file) + always_false_selector = None + if (hoist_constants and 2 <= len(always_false) <= G.word_size): + always_false_selector = make_bitmask_index_test_expression( + '@@SELECTOR@@', + make_bitmask_from_indices([c[0] for c in always_false]), + 0, + G.word_size, + False) + selector_references += 1 + always_false = [] - # return selector == A ? true : selector & mask - #elif (len(not_always_true_or_false) == 1 - # and (len(always_true) + len(always_false)) == len(self.subchunks)-1 - # and len(self.subchunks) <= 64): - # print('{}const auto selector = {}; //kek'.format(indent, selector), file=output_file) - # print('{}return selector == {}u ? true : selector & {};'.format( - # indent, - # not_always_true_or_false[0], - # make_bitmask(make_mask_from_indices(always_true)) - # ), - # file=output_file - # ) + default = None + default_check = None + if (len(always_false) > len(always_true)): + default = False + default_check = lambda c: c.always_returns_false() + elif (always_true and len(always_true) >= len(always_false)): + default = True + default_check = lambda c: c.always_returns_true() - # switch(selector) - else: - print("{}TOML_ASSUME_CODEPOINT_BETWEEN({}, {});".format(indent, make_literal(self.first), make_literal(self.last)), file=output_file) - print("{}switch ({})\n{}{{".format(indent, selector, indent), file=output_file) - if (len(always_true) == 0 and len(always_false) == 0): - for subchunk_index in range(len(self.subchunks)): - self.print_subchunk_case(subchunk_index, output_file, level, indent) - print("{}\tTOML_NO_DEFAULT_CASE;".format(indent), file=output_file) - elif (len(always_true) > len(always_false)): - for subchunk_index in range(len(self.subchunks)): - if not self.subchunks[subchunk_index].always_returns_true(): - self.print_subchunk_case(subchunk_index, output_file, level, indent) - print("{}\tdefault: return true;".format(indent), file=output_file) + emittables = [] + emittables_all_have_expressions = True + defaulted = 0 + for i in range(0, len(self.__children)): + if ((always_true_selector and self.__children[i].always_returns_true()) + or (always_false_selector and self.__children[i].always_returns_false())): + continue + if (default_check and default_check(self.__children[i])): + defaulted += 1 + continue + emittables.append((i,self.__children[i])) + emittables_all_have_expressions = emittables_all_have_expressions and self.__children[i].has_expression() + if defaulted == 0: + default = None + + requires_switch = not G.elide_switches or len(emittables) >= 2 or not emittables_all_have_expressions + if requires_switch: + selector_references += 1 + + selector = self.child_selector() + selector_name = 'child_index_{}'.format(self.level()) + if selector_references > 1: + s += 'const auto {} = {};\n'.format(selector_name, selector) + + return_trues = [] + if always_true_selector: + return_trues.append(always_true_selector) + elif always_false_selector and not expressions_or_switches: + return_trues.append('!({})'.format(always_false_selector)) + always_false_selector = None + if not requires_switch: + return_trues += [e[1].expression() for e in emittables if e[1].has_expression()] + + return_falses = [] + if always_false_selector: + return_falses.append(always_false_selector) + + for l, v in [(return_trues, True), (return_falses, False)]: + if not l: + continue + ret = '\n\t|| '.join(l) + if (return_trues and return_falses) or requires_switch or default is not None: + s += 'if ({})\n\treturn {};\n'.format(ret, 'true' if v else 'false') else: - for subchunk_index in range(len(self.subchunks)): - if not self.subchunks[subchunk_index].always_returns_false(): - self.print_subchunk_case(subchunk_index, output_file, level, indent) - print("{}\tdefault: return false;".format(indent), file=output_file) - print("{}}}".format(indent), file=output_file) - print("{}//# chunk summary: {} codepoints from {} ranges (spanning a search area of {})".format(indent, self.count, len(self.ranges), self.span_size), file=output_file) - - # return cp == A || cp == B ... - else: - print("return", end='', file=output_file) - line_weight = 0 - first_line = True - for range_idx in range(0, len(self.ranges)): - r = self.ranges[range_idx] - range_weight = (1 if ( - isinstance(r, int) - or (range_idx == 0 and r[0] == self.first) - or (range_idx == (len(self.ranges)-1) and r[1] == self.last)) - else 2 - ) - needs_space = True - if ((line_weight + range_weight) > (4 - (1 if first_line else 0))): - print("\n\t{}".format(indent), end='', file=output_file) - line_weight = range_weight - needs_space = False - first_line = False - else: - line_weight += range_weight - if (needs_space): - print(" ", end='', file=output_file) - if (range_idx > 0): - print("|| ", end='', file=output_file) - if (isinstance(r, int)): - print("codepoint == {}".format(make_literal(r)), end='', file=output_file) - elif (range_idx == 0 and r[0] == self.first): - print("codepoint <= {}".format(make_literal(r[1])), end='', file=output_file) - elif (range_idx == (len(self.ranges)-1) and r[1] == self.last): - print("codepoint >= {}".format(make_literal(r[0])), end='', file=output_file) - else: - print("{}codepoint >= {} && codepoint <= {}{}".format( - '(' if len(self.ranges) > 1 else '', - make_literal(r[0]), - make_literal(r[1]), - ')' if len(self.ranges) > 1 else '' - ), - end='', - file=output_file + s += 'return {}{}{};'.format( + '' if v else '!(', + ret, + '' if v else ')' ) - print(";", file=output_file) - - -#### FUNCTION GENERATOR ##################################### - - - -def emit_function(name, categories, file, codepoints): - - # divide the codepoints up into chunks of ranges - root_chunk = Chunk(codepoints[0][0], codepoints[-1][0]) - first_codepoint = -1 - last_codepoint = -1 - for codepoint, category in codepoints: - if (category in categories): - if (first_codepoint == -1): - first_codepoint = codepoint - last_codepoint = codepoint - elif (last_codepoint == codepoint-1): - last_codepoint = codepoint + if requires_switch: + s += "switch (@@SELECTOR@@)\n" + s += "{\n" + emitted = 0 + for i, c in emittables: + s += '\tcase 0x{:02X}:{}{}{}'.format( + i, + ' ' if c.has_expression() else ' // [{}] {:04X} - {:04X}\n\t{{\n'.format(i, c.span_first(), c.span_last()), + indent_with_tabs(str(c), 0 if c.has_expression() else 2), + '\n' if c.has_expression() else '\n\t}\n', + ) + emitted += 1 + s += '\t{};\n'.format('TOML_NO_DEFAULT_CASE' if default is None else 'default: return '+str(default).lower()) + s += "}" + if (emitted <= 1): + s += "\n/* FIX ME: switch has only {} case{}! */".format(emitted, 's' if emitted > 1 else '') else: - root_chunk.add(first_codepoint, last_codepoint) - first_codepoint = codepoint - last_codepoint = codepoint - if (first_codepoint != -1): - root_chunk.add(first_codepoint, last_codepoint) - root_chunk.trim() - root_chunk.analyze() - root_chunk.subdivide() - - # write the function - - print('\n\t//# Returns true if a codepoint belongs to any of these categories: {}'.format(', '.join(categories)), file=file) - print('\t[[nodiscard]]', file=file) - print('\tTOML_GNU_ATTR(const)', file=file) - print('\tconstexpr bool {}(char32_t codepoint) noexcept\n\t{{'.format(name), file=file) - root_chunk.print(file) - print('\t}', file=file) + if default is not None: + s += 'return '+str(default).lower()+';' + s += "\n/* CHECK ME */" + return s.replace('@@SELECTOR@@', selector_name if selector_references > 1 else selector) -#### MAIN #################################################### +##### FUNCTION GENERATORS ############################################################################################# + + + +def emit_function(name, header_file, test_file, codepoints, test_func, description): + root_chunk = CodepointChunk() + for cp in codepoints: + if test_func is None or test_func(cp): + root_chunk.add(cp[0]) + + header = lambda txt: print(txt, file=header_file) + header(" //# " + ("\n\t//# ".join(description.split('\n')))) + header(' [[nodiscard]]') + header(' TOML_GNU_ATTR(const)') + header(' constexpr bool {}(char32_t cp) noexcept'.format(name)) + header(' {') + header(indent_with_tabs(str(root_chunk), 2)) + header(' }') + header('') + + if not test_file: + return + test = lambda txt: print(txt, file=test_file) + test(' //----- {} {}'.format(name, '-' * (80 - len(name) - 4))) + test(' {') + test(' INFO("{}"sv)'.format(name)) + test(' static constexpr auto fn = {};'.format(name)) + + if root_chunk.range().contiguous_subrange_count(): + test('') + test(' // contiguous ranges of values which should return true') + for f, l in root_chunk.range().contiguous_subranges(): + test(' REQUIRE(in(fn, {{ {}, {} }}));'.format(make_literal(f), make_literal(l))) + if root_chunk.range().sparse_value_count(): + test('') + test(' // individual values which should return true') + for v in root_chunk.range().sparse_values(): + test(' REQUIRE(fn({}));'.format(make_literal(v))) + + + unicode_max = 0x10FFFF + if len(root_chunk.range()) < (unicode_max + 1): + exclusive_values = SparseRange() + low_iter = iter(root_chunk.range()) + high_iter = iter(root_chunk.range()) + try: + high = next(high_iter) + while True: + low = next(low_iter) + high = next(high_iter) + if low+1 < high: + exclusive_values.add(low+1, high-1) + except StopIteration: + pass + if root_chunk.range().first() > 0: + exclusive_values.add(0, root_chunk.range().first()-1) + if root_chunk.range().last() < unicode_max: + exclusive_values.add(root_chunk.range().last()+1, unicode_max) + exclusive_values.finish() + if exclusive_values.contiguous_subrange_count(): + test('') + test(' // contiguous ranges of values which should return false') + for f, l in exclusive_values.contiguous_subranges(): + test(' REQUIRE(not_in(fn, {{ {}, {} }}));'.format(make_literal(f), make_literal(l))) + if exclusive_values.sparse_value_count(): + test('') + test(' // individual values which should return false') + for v in exclusive_values.sparse_values(): + test(' REQUIRE(!fn({}));'.format(make_literal(v))) + + test(' }') + test('') + + + +def emit_category_function(name, header_file, test_file, codepoints, categories, exclusions = None): + emit_function( + name, header_file, test_file, codepoints, + lambda cp: (True if exclusions is None else cp[0] not in exclusions) and cp[1] in categories, + 'Returns true if a codepoint belongs to any of these categories:\n\t{}'.format(', '.join(categories)) + ) + + + +def emit_character_function(name, header_file, test_file, codepoints, *characters): + rng = SparseRange() + for c in characters: + if isinstance(c, int): + rng.add(c) + elif isinstance(c, str): + rng.add(ord(c)) + elif isinstance(c, tuple) and len(c) == 2: + rng.add( + ord(c[0]) if isinstance(c[0], str) else c[0], + ord(c[1]) if isinstance(c[1], str) else c[1] + ) + else: + raise Exception("Invalid argument") + rng.finish() + emit_function( + name, header_file, test_file, codepoints, + lambda cp: cp[0] in rng, + 'Returns true if a codepoint matches {}:\n\t{}'.format( + 'any of' if len(rng) > 1 else '', + rng.stringify(lambda v: chr(v) if 32 < v < 127 and chr(v).isprintable() else ('U+{:08X}'.format(v) if v > 0xFFFF else 'U+{:04X}'.format(v))) + ) + ) + + + +#### MAIN ############################################################################################################# @@ -521,16 +1011,56 @@ def get_script_folder(): def append_codepoint(codepoints, codepoint, category): - if (codepoint <= 128 # ASCII range (handled separately in C++) - or 0xD800 <= codepoint <= 0xF8FF # surrogates & private use area - or 0x40000 <= codepoint <= 0xDFFFF # planes 4-13 - or 0xF0000 <= codepoint <= 0x10FFFD # planes 15-16 - or 0xFFFE <= (codepoint & 0xFFFF) <= 0xFFFF # noncharacters - ): return + # if (0xD800 <= codepoint <= 0xF8FF # surrogates & private use area + # or 0x40000 <= codepoint <= 0xDFFFF # planes 4-13 + # or 0xF0000 <= codepoint <= 0x10FFFD # planes 15-16 + # or 0xFFFE <= (codepoint & 0xFFFF) <= 0xFFFF # noncharacters + # ): return codepoints.append((codepoint, category)) +def write_to_files(codepoints, header_file, test_file): + header = lambda txt: print(txt, file=header_file) + test = lambda txt: print(txt, file=test_file) + both = lambda txt: (header(txt), test(txt)) + + header('//# This file is a part of toml++ and is subject to the the terms of the MIT license.') + header('//# Copyright (c) 2019-2020 Mark Gillard ') + header('//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.') + header('// SPDX-License-Identifier: MIT') + header('//#-----') + header('//# this file was generated by generate_unicode_functions.py - do not modify it directly') + header('') + header('#pragma once') + header('#include "toml_preprocessor.h"') + header('') + header('namespace toml::impl') + header('{') + + test('#include "tests.h"') + test('#include "unicode.h"') + test('using namespace toml::impl;') + test('') + test('TEST_CASE("unicode - generated functions")') + test('{') + + emit_character_function('is_hexadecimal_digit', header_file, test_file, codepoints, ('a', 'f'), ('A', 'F'), ('0', '9')) + + both(' #if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys)') + both('') + unicode_exclusions = SparseRange() + unicode_exclusions.add(0, 127) # ascii block + unicode_exclusions.finish() + emit_category_function('is_unicode_letter', header_file, test_file, codepoints, ('Ll', 'Lm', 'Lo', 'Lt', 'Lu'), unicode_exclusions) + emit_category_function('is_unicode_number', header_file, test_file, codepoints, ('Nd', 'Nl'), unicode_exclusions) + emit_category_function('is_unicode_combining_mark', header_file, test_file, codepoints, ('Mn', 'Mc'), unicode_exclusions) + both(' #endif // TOML_LANG_UNRELEASED') + + header('} // toml::impl') + test('}') + + def main(): # get unicode character database @@ -550,7 +1080,6 @@ def main(): with open(codepoint_file_path, 'r', encoding='utf-8') as codepoint_file: codepoint_list = codepoint_file.read() - # parse the database file into codepoints re_codepoint = re.compile(r'^([0-9a-fA-F]+);(.+?);([a-zA-Z]+);') current_range_start = -1 @@ -577,40 +1106,16 @@ def main(): print("Extracted {} of {} codepoints from unicode database file.".format(len(codepoints), parsed_codepoints)) codepoints.sort(key=lambda r:r[0]) - # write the output file - output_file_path = path.join(get_script_folder(), '..', 'include', 'toml++', 'toml_utf8_generated.h') - print("Writing to {}".format(output_file_path)) - with open(output_file_path, 'w', encoding='utf-8', newline='\n') as output_file: - print( -'''//# This file is a part of toml++ and is subject to the the terms of the MIT license. -//# Copyright (c) 2019-2020 Mark Gillard -//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. -//#----- -//# this file was generated by generate_unicode_functions.py - do not modify it directly -// SPDX-License-Identifier: MIT - -#pragma once -#include "toml_common.h" - -#if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) - -#define TOML_ASSUME_CODEPOINT_BETWEEN(first, last) \\ - TOML_ASSUME(codepoint >= first); \\ - TOML_ASSUME(codepoint <= last) - -namespace toml::impl -{''', file=output_file, end='') - emit_function('is_unicode_letter', ('Ll', 'Lm', 'Lo', 'Lt', 'Lu'), output_file, codepoints) - emit_function('is_unicode_number', ('Nd', 'Nl'), output_file, codepoints) - emit_function('is_unicode_combining_mark', ('Mn', 'Mc'), output_file, codepoints) - print( -'''} + # write the output files + header_file_path = path.join(get_script_folder(), '..', 'include', 'toml++', 'toml_utf8_generated.h') + test_file_path = path.join(get_script_folder(), '..', 'tests', 'unicode_generated.cpp') + print("Writing to {}".format(header_file_path)) + with open(header_file_path, 'w', encoding='utf-8', newline='\n') as header_file: + print("Writing to {}".format(test_file_path)) + with open(test_file_path, 'w', encoding='utf-8', newline='\n') as test_file: + write_to_files(codepoints, header_file, test_file) -#undef TOML_ASSUME_CODEPOINT_BETWEEN - -#endif // TOML_LANG_UNRELEASED -''', file=output_file, end='') if __name__ == '__main__': try: diff --git a/tests/manipulating_arrays.cpp b/tests/manipulating_arrays.cpp index 7586aea..d7fdf6e 100644 --- a/tests/manipulating_arrays.cpp +++ b/tests/manipulating_arrays.cpp @@ -7,7 +7,7 @@ TEST_CASE("arrays - moving") parsing_should_succeed( FILE_LINE_ARGS, S(R"(test = [ "foo" ])"sv), - [&](table&& tbl) noexcept + [&](table&& tbl) { CHECK(tbl.source().begin == source_position{ 1, 1 }); CHECK(tbl.source().end == source_position{ 1, 17 }); diff --git a/tests/manipulating_tables.cpp b/tests/manipulating_tables.cpp index e34a20b..885ec02 100644 --- a/tests/manipulating_tables.cpp +++ b/tests/manipulating_tables.cpp @@ -7,7 +7,7 @@ TEST_CASE("tables - moving") parsing_should_succeed( FILE_LINE_ARGS, S(R"(test = { val1 = "foo" })"sv), - [&](table&& tbl) noexcept + [&](table&& tbl) { CHECK(tbl.source().begin == source_position{ 1, 1 }); CHECK(tbl.source().end == source_position{ 1, 24 }); @@ -136,7 +136,7 @@ TEST_CASE("tables - equality") namespace { template - static auto advance(T iter, ptrdiff_t offset) noexcept + static auto advance(T iter, ptrdiff_t offset) { while (offset > 0) { diff --git a/tests/manipulating_values.cpp b/tests/manipulating_values.cpp index 4fa8f07..497f000 100644 --- a/tests/manipulating_values.cpp +++ b/tests/manipulating_values.cpp @@ -2,7 +2,7 @@ TEST_CASE("values - printing") { - static constexpr auto print_value = [](auto&& raw) noexcept + static constexpr auto print_value = [](auto&& raw) { auto val = toml::value{ std::forward(raw) }; std::stringstream ss; diff --git a/tests/meson.build b/tests/meson.build index 5a0a42b..ec977d5 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -15,6 +15,8 @@ test_sources = [ 'manipulating_arrays.cpp', 'manipulating_tables.cpp', 'manipulating_values.cpp', + 'unicode.cpp', + 'unicode_generated.cpp', ] compiler_supports_char8_strings = compiler.compiles(''' diff --git a/tests/parsing_arrays.cpp b/tests/parsing_arrays.cpp index 2100e36..6090877 100644 --- a/tests/parsing_arrays.cpp +++ b/tests/parsing_arrays.cpp @@ -18,7 +18,7 @@ nested_array_of_int = [ [ 1, 2 ], [3, 4, 5] ] nested_mixed_array = [ [ 1, 2 ], ["a", "b", "c"] ] string_array = [ "all", 'strings', """are the same""", '''type''' ] )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("integers")].as()); CHECK(tbl[S("integers")].as()->is_homogeneous()); @@ -106,7 +106,7 @@ contributors = [ { name = "Baz Qux", email = "bazqux@example.com", url = "https://example.com/bazqux" } ] )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("numbers")].as()); CHECK(!tbl[S("numbers")].as()->is_homogeneous()); diff --git a/tests/parsing_booleans.cpp b/tests/parsing_booleans.cpp index 8f1a9b8..533dd83 100644 --- a/tests/parsing_booleans.cpp +++ b/tests/parsing_booleans.cpp @@ -8,7 +8,7 @@ TEST_CASE("parsing - booleans") bool1 = true bool2 = false )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("bool1")] == true); CHECK(tbl[S("bool2")] == false); diff --git a/tests/parsing_comments.cpp b/tests/parsing_comments.cpp index ec2d20e..a1dcea6 100644 --- a/tests/parsing_comments.cpp +++ b/tests/parsing_comments.cpp @@ -9,7 +9,7 @@ TEST_CASE("parsing - comments") key = "value" # This is a comment at the end of a line another = "# This is not a comment" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl.size() == 2); CHECK(tbl[S("key")] == S("value"sv)); @@ -20,7 +20,7 @@ another = "# This is not a comment" parsing_should_succeed( FILE_LINE_ARGS, S(R"(# this = "looks like a KVP but is commented out)"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl.size() == 0); } diff --git a/tests/parsing_dates_and_times.cpp b/tests/parsing_dates_and_times.cpp index 7bdf0b1..61b0026 100644 --- a/tests/parsing_dates_and_times.cpp +++ b/tests/parsing_dates_and_times.cpp @@ -18,7 +18,7 @@ ld1 = 1979-05-27 lt1 = 07:32:00 lt2 = 00:32:00.999999 )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { static constexpr auto odt1 = date_time{ { 1979, 5, 27 }, { 7, 32 }, {} }; CHECK(tbl[S("odt1")] == odt1); diff --git a/tests/parsing_floats.cpp b/tests/parsing_floats.cpp index cc99e1f..d5cb751 100644 --- a/tests/parsing_floats.cpp +++ b/tests/parsing_floats.cpp @@ -22,7 +22,7 @@ flt7 = 6.626e-34 flt8 = 224_617.445_991_228 )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("flt1")] == 1.0); CHECK(tbl[S("flt2")] == 3.1415); @@ -44,7 +44,7 @@ flt8 = 224_617.445_991_228 parsing_should_succeed( FILE_LINE_ARGS, S(R"(zeroes = [-0.0, +0.0])"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("zeroes")][0] == -0.0); CHECK(tbl[S("zeroes")][1] == +0.0); @@ -188,7 +188,7 @@ sf4 = nan # actual sNaN/qNaN encoding is implementation specific sf5 = +nan # same as `nan` sf6 = -nan # valid, actual encoding is implementation specific )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("sf1")] == std::numeric_limits::infinity()); CHECK(tbl[S("sf2")] == std::numeric_limits::infinity()); diff --git a/tests/parsing_integers.cpp b/tests/parsing_integers.cpp index c9d61cd..876e12f 100644 --- a/tests/parsing_integers.cpp +++ b/tests/parsing_integers.cpp @@ -13,7 +13,7 @@ int5 = 1_000 int6 = 5_349_221 int7 = 1_2_3_4_5 # VALID but discouraged )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("int1")] == 99); CHECK(tbl[S("int2")] == 42); @@ -43,7 +43,7 @@ int7 = 1_2_3_4_5 # VALID but discouraged parsing_should_succeed( FILE_LINE_ARGS, S("zeroes = [-0, +0]"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("zeroes")][0] == 0); CHECK(tbl[S("zeroes")][1] == 0); @@ -87,7 +87,7 @@ oct2 = 0o755 # useful for Unix file permissions # binary with prefix `0b` bin1 = 0b11010110 )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("hex1")] == 0xDEADBEEF); CHECK(tbl[S("hex2")] == 0xDEADBEEF); @@ -118,7 +118,7 @@ oct1 = 0o0001234567 oct2 = 0o000755 bin1 = 0b0000011010110 )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("hex1")] == 0xDEADBEEF); CHECK(tbl[S("hex2")] == 0xDEADBEEF); diff --git a/tests/parsing_key_value_pairs.cpp b/tests/parsing_key_value_pairs.cpp index c7a456b..d8feba4 100644 --- a/tests/parsing_key_value_pairs.cpp +++ b/tests/parsing_key_value_pairs.cpp @@ -11,7 +11,7 @@ bare-key = "value" 1234 = "value" "" = "blank" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl.size() == 5); CHECK(tbl[S("key")] == S("value"sv)); @@ -34,7 +34,7 @@ bare-key = "value" 'quoted "value"' = "value" '' = 'blank' )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("127.0.0.1")] == S("value"sv)); CHECK(tbl[S("character encoding")] == S("value"sv)); @@ -66,7 +66,7 @@ physical.shape = "round" site."google.com" = true 3.14159 = "pi" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl.size() == 4); CHECK(tbl[S("name")] == S("Orange"sv)); @@ -84,7 +84,7 @@ site."google.com" = true fruit.apple.smooth = true fruit.orange = 2 )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("fruit")][S("apple")][S("smooth")] == true); CHECK(tbl[S("fruit")][S("orange")] == 2); @@ -111,7 +111,7 @@ orange.skin = "thick" apple.color = "red" orange.color = "orange" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("apple")][S("type")] == S("fruit"sv)); CHECK(tbl[S("apple")][S("skin")] == S("thin"sv)); @@ -135,7 +135,7 @@ orange.type = "fruit" orange.skin = "thick" orange.color = "orange" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("apple")][S("type")] == S("fruit"sv)); CHECK(tbl[S("apple")][S("skin")] == S("thin"sv)); @@ -154,7 +154,7 @@ orange.color = "orange" key+1 = 0 ʎǝʞ2 = 0 )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl.size() == 2); CHECK(tbl[S("key+1")] == 0); diff --git a/tests/parsing_spec_example.cpp b/tests/parsing_spec_example.cpp index 83b1e30..abcdd21 100644 --- a/tests/parsing_spec_example.cpp +++ b/tests/parsing_spec_example.cpp @@ -43,7 +43,7 @@ hosts = [ parsing_should_succeed( FILE_LINE_ARGS, toml_text, - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl.size() == 5); diff --git a/tests/parsing_strings.cpp b/tests/parsing_strings.cpp index 7ee9145..f0b6b7d 100644 --- a/tests/parsing_strings.cpp +++ b/tests/parsing_strings.cpp @@ -16,7 +16,7 @@ str2 = """ Roses are red Violets are blue""" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("str")] == S("I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."sv)); CHECK(tbl[S("str1")] == S("Roses are red\nViolets are blue"sv)); @@ -51,7 +51,7 @@ str6 = """Here are fifteen quotation marks: ""\"""\"""\"""\"""\".""" # "This," she said, "is just a pointless statement." str7 = """"This," she said, "is just a pointless statement."""" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { static constexpr auto quick_brown_fox = S("The quick brown fox jumps over the lazy dog."sv); CHECK(tbl[S("str1")] == quick_brown_fox); @@ -89,7 +89,7 @@ trimmed in raw strings. is preserved. ''' )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("winpath")] == S(R"(C:\Users\nodejs\templates)"sv)); CHECK(tbl[S("winpath2")] == S(R"(\\ServerX\admin$\system32\)"sv)); @@ -121,7 +121,7 @@ apos15 = "Here are fifteen apostrophes: '''''''''''''''" # 'That's still pointless', she said. str = ''''That's still pointless', she said.''' )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { CHECK(tbl[S("quot15")] == S(R"(Here are fifteen quotation marks: """"""""""""""")"sv)); CHECK(tbl[S("apos15")] == S(R"(Here are fifteen apostrophes: ''''''''''''''')"sv)); diff --git a/tests/parsing_tables.cpp b/tests/parsing_tables.cpp index c8fc740..4321299 100644 --- a/tests/parsing_tables.cpp +++ b/tests/parsing_tables.cpp @@ -38,7 +38,7 @@ apple.taste.sweet = true smooth = true )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("table")].as()); CHECK(tbl[S("table")].as
()->size() == 0_sz); @@ -128,7 +128,7 @@ apple.taste.sweet = true [animal] [fruit.orange] )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("animal")].as
()); CHECK(tbl[S("animal")].as
()->size() == 0_sz); @@ -150,7 +150,7 @@ apple.taste.sweet = true [fruit.orange] [animal] )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("animal")].as
()); CHECK(tbl[S("animal")].as
()->size() == 0_sz); @@ -176,7 +176,7 @@ animal = { type.name = "pug" } [product] type = { name = "Nail" } )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("name")].as
()); CHECK(tbl[S("name")].as
()->size() == 2_sz); @@ -223,7 +223,7 @@ test = { val1 = "foo", val2 = [ 3 ], val3 = "bar" } )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("test")].as
()); CHECK(tbl[S("test")].as
()->size() == 3_sz); @@ -248,7 +248,7 @@ name = { last = "Preston-Werner", } )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("name")].as
()); CHECK(tbl[S("name")].as
()->size() == 2_sz); @@ -317,7 +317,7 @@ color = "gray" name = "plantain" )"sv), - [](table&& tbl) noexcept + [](table&& tbl) { REQUIRE(tbl[S("points")].as()); CHECK(tbl[S("points")].as()->size() == 3_sz); diff --git a/tests/tests.cpp b/tests/tests.cpp index 4671316..8e4f054 100644 --- a/tests/tests.cpp +++ b/tests/tests.cpp @@ -1,11 +1,11 @@ #include "tests.h" -template void parse_expected_value(std::string_view, uint32_t, std::string_view, const int&) noexcept; -template void parse_expected_value(std::string_view, uint32_t, std::string_view, const unsigned int&) noexcept; -template void parse_expected_value(std::string_view, uint32_t, std::string_view, const bool&) noexcept; -template void parse_expected_value(std::string_view, uint32_t, std::string_view, const float&) noexcept; -template void parse_expected_value(std::string_view, uint32_t, std::string_view, const double&) noexcept; -template void parse_expected_value(std::string_view, uint32_t, std::string_view, const toml::string_view&) noexcept; +template void parse_expected_value(std::string_view, uint32_t, std::string_view, const int&); +template void parse_expected_value(std::string_view, uint32_t, std::string_view, const unsigned int&); +template void parse_expected_value(std::string_view, uint32_t, std::string_view, const bool&); +template void parse_expected_value(std::string_view, uint32_t, std::string_view, const float&); +template void parse_expected_value(std::string_view, uint32_t, std::string_view, const double&); +template void parse_expected_value(std::string_view, uint32_t, std::string_view, const toml::string_view&); namespace std { diff --git a/tests/tests.h b/tests/tests.h index 10d6b83..9766e45 100644 --- a/tests/tests.h +++ b/tests/tests.h @@ -46,14 +46,14 @@ inline void parsing_should_succeed( uint32_t test_line, std::basic_string_view toml_str, Func&& func = {}, - std::string_view source_path = {}) noexcept + std::string_view source_path = {}) { INFO( "["sv << test_file << ", line "sv << test_line << "] "sv << "parsing_should_succeed('"sv << std::string_view(reinterpret_cast(toml_str.data()), toml_str.length()) << "')"sv ) - constexpr auto validate_table = [](table&& tabl, std::string_view path) noexcept -> table&& + constexpr auto validate_table = [](table&& tabl, std::string_view path) -> table&& { INFO("Validating table source information"sv) CHECK(tabl.source().begin != source_position{}); @@ -155,7 +155,7 @@ template inline void parsing_should_fail( std::string_view test_file, uint32_t test_line, - std::basic_string_view toml_str) noexcept + std::basic_string_view toml_str) { INFO( "["sv << test_file << ", line "sv << test_line << "] "sv @@ -164,7 +164,7 @@ inline void parsing_should_fail( #if TOML_EXCEPTIONS - static constexpr auto run_tests = [](auto&& fn) noexcept + static constexpr auto run_tests = [](auto&& fn) { try { @@ -196,7 +196,7 @@ inline void parsing_should_fail( #else - static constexpr auto run_tests = [](auto&& fn) noexcept + static constexpr auto run_tests = [](auto&& fn) { parse_result result = fn(); if (result) @@ -212,8 +212,8 @@ inline void parsing_should_fail( } }; - if (run_tests([=]() noexcept { return toml::parse(toml_str); })) - run_tests([=]() noexcept + if (run_tests([=]() { return toml::parse(toml_str); })) + run_tests([=]() { std::basic_stringstream, std::allocator> ss; ss.write(toml_str.data(), static_cast(toml_str.length())); @@ -228,7 +228,7 @@ inline void parse_expected_value( std::string_view test_file, uint32_t test_line, std::string_view value_str, - const T& expected) noexcept + const T& expected) { INFO("["sv << test_file << ", line "sv << test_line << "] "sv << "parse_expected_value('"sv << value_str << "')"sv) @@ -238,7 +238,7 @@ inline void parse_expected_value( val.append(key); val.append(value_str); - static constexpr auto is_val = [](char32_t codepoint) noexcept + static constexpr auto is_val = [](char32_t codepoint) { if constexpr (std::is_same_v>) return codepoint == U'"' || codepoint == U'\''; @@ -287,7 +287,7 @@ inline void parse_expected_value( test_file, test_line, std::string_view{ val }, - [&](table&& tbl) noexcept + [&](table&& tbl) { REQUIRE(tbl.size() == 1); auto nv = tbl[S("val"sv)]; @@ -347,7 +347,7 @@ inline void parse_expected_value( test_file, test_line, std::string_view{ str }, - [&](table&& tbl) noexcept + [&](table&& tbl) { REQUIRE(tbl.size() == 1); auto nv = tbl[S("val"sv)]; @@ -368,12 +368,12 @@ inline void parse_expected_value( } // manually instantiate some templates to reduce test compilation time (chosen using ClangBuildAnalyzer) -extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const int&) noexcept; -extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const unsigned int&) noexcept; -extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const bool&) noexcept; -extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const float&) noexcept; -extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const double&) noexcept; -extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const toml::string_view&) noexcept; +extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const int&); +extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const unsigned int&); +extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const bool&); +extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const float&); +extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const double&); +extern template void parse_expected_value(std::string_view, uint32_t, std::string_view, const toml::string_view&); namespace std { extern template class unique_ptr; diff --git a/tests/unicode.cpp b/tests/unicode.cpp new file mode 100644 index 0000000..22b4459 --- /dev/null +++ b/tests/unicode.cpp @@ -0,0 +1,86 @@ +#include "tests.h" +#include "unicode.h" +using namespace toml::impl; + +TEST_CASE("unicode - is_ascii_letter") +{ + constexpr auto fn = is_ascii_letter; + REQUIRE(not_in(fn, { U'\0', U'@' })); + REQUIRE(in_only(fn, { U'A', U'Z' })); + REQUIRE(not_in(fn, { U'[', U'`' })); + REQUIRE(in_only(fn, { U'a', U'z' })); + REQUIRE(not_in(fn, { U'{', unimax })); +} + +TEST_CASE("unicode - is_ascii_whitespace") +{ + constexpr auto fn = is_ascii_whitespace; + REQUIRE(not_in(fn, { U'\0', U'\u0008' })); + REQUIRE(in_only(fn, U'\t' )); + REQUIRE(not_in(fn, { U'\n', U'\u001F' })); + REQUIRE(in_only(fn, U' ' )); + REQUIRE(not_in(fn, { U'!', unimax })); +} + +TEST_CASE("unicode - is_ascii_line_break") +{ + constexpr auto fn = is_ascii_line_break; + REQUIRE(not_in(fn, { U'\0', U'\t' })); + REQUIRE(in_only(fn, { U'\n', U'\r' })); + REQUIRE(not_in(fn, { U'\u000E', unimax })); +} + +TEST_CASE("unicode - is_decimal_digit") +{ + constexpr auto fn = is_decimal_digit; + REQUIRE(not_in(fn, { U'\0', U'/' })); + REQUIRE(in_only(fn, { U'0', U'9' })); + REQUIRE(not_in(fn, { U':', unimax })); +} + +TEST_CASE("unicode - is_string_delimiter") +{ + constexpr auto fn = is_string_delimiter; + REQUIRE(not_in(fn, { U'\0', U'!' })); + REQUIRE(in_only(fn, U'"' )); + REQUIRE(not_in(fn, { U'#', U'&' })); + REQUIRE(in_only(fn, U'\'' )); + REQUIRE(not_in(fn, { U'(', unimax })); +} + +TEST_CASE("unicode - is_unicode_whitespace") +{ + constexpr auto fn = is_unicode_whitespace; + REQUIRE(not_in(fn, { U'\0', U'\u009F' })); + REQUIRE(in_only(fn, U'\u00A0' )); + REQUIRE(not_in(fn, { U'\u00A1', U'\u167F' })); + REQUIRE(in_only(fn, U'\u1680' )); + REQUIRE(not_in(fn, { U'\u1681', U'\u1FFF' })); + REQUIRE(in_only(fn, { U'\u2000', U'\u200A' })); + REQUIRE(not_in(fn, { U'\u200B', U'\u202E' })); + REQUIRE(in_only(fn, U'\u202F' )); + REQUIRE(not_in(fn, { U'\u2030', U'\u205E' })); + REQUIRE(in_only(fn, U'\u205F' )); + REQUIRE(not_in(fn, { U'\u2060', U'\u2FFF' })); + REQUIRE(in_only(fn, U'\u3000' )); + REQUIRE(not_in(fn, { U'\u3001', unimax })); +} + + +TEST_CASE("unicode - is_unicode_line_break") +{ + constexpr auto fn = is_unicode_line_break; + REQUIRE(not_in(fn, { U'\0', U'\u0084' })); + REQUIRE(in_only(fn, U'\u0085' )); + REQUIRE(not_in(fn, { U'\u0086', U'\u2027' })); + REQUIRE(in_only(fn, { U'\u2028', U'\u2029' })); + REQUIRE(not_in(fn, { U'\u202A', unimax })); +} + +TEST_CASE("unicode - is_unicode_surrogate") +{ + constexpr auto fn = is_unicode_surrogate; + REQUIRE(not_in(fn, { U'\0', 0xD7FFu })); + REQUIRE(in_only(fn, { 0xD800u, 0xDFFF })); + REQUIRE(not_in(fn, { 0xE000, unimax })); +} diff --git a/tests/unicode.h b/tests/unicode.h new file mode 100644 index 0000000..98d13d8 --- /dev/null +++ b/tests/unicode.h @@ -0,0 +1,84 @@ +#pragma once +#include "tests.h" + +using func_type = bool(char32_t); +inline constexpr func_type* funcs[] = +{ + // these must be mutually-exclusive + + impl::is_ascii_letter, + impl::is_ascii_whitespace, + impl::is_ascii_line_break, + impl::is_decimal_digit, + impl::is_string_delimiter, + impl::is_unicode_whitespace, + impl::is_unicode_line_break, + impl::is_unicode_surrogate, + #if TOML_LANG_UNRELEASED + impl::is_unicode_letter, + impl::is_unicode_number, + impl::is_unicode_combining_mark, + #endif +}; + +template +inline bool in_only(func_type* fptr, T cp) noexcept +{ + if (!fptr(static_cast(cp))) + return false; + for (auto fn : funcs) + { + if (fn == fptr) + continue; + if (fn(static_cast(cp))) + return false; + } + return true; +} + +inline constexpr uint32_t unimax = 0x10FFFFu; + +struct codepoint_range +{ + char32_t first; + char32_t last; + + template + codepoint_range(T first_, U last_) noexcept + : first{ static_cast(first_) }, + last{ static_cast(last_) } + { + if (last < first) + std::swap(first, last); + } + + template + codepoint_range(T first_) noexcept + : first{ static_cast(first_) }, + last{ first } + {} +}; + +inline bool in(func_type* fptr, codepoint_range range) noexcept +{ + for (auto cp = range.first; cp <= range.last; cp++) + if (!fptr(cp)) + return false; + return true; +} + +inline bool in_only(func_type* fptr, codepoint_range range) noexcept +{ + for (auto cp = range.first; cp <= range.last; cp++) + if (!in_only(fptr, cp)) + return false; + return true; +} + +inline bool not_in(func_type* fptr, codepoint_range range) noexcept +{ + for (auto cp = range.first; cp <= range.last; cp++) + if (fptr(cp)) + return false; + return true; +} diff --git a/tests/unicode_generated.cpp b/tests/unicode_generated.cpp new file mode 100644 index 0000000..93c2a1c --- /dev/null +++ b/tests/unicode_generated.cpp @@ -0,0 +1,2040 @@ +#include "tests.h" +#include "unicode.h" +using namespace toml::impl; + +TEST_CASE("unicode - generated functions") +{ + //----- is_hexadecimal_digit -------------------------------------------------------- + { + INFO("is_hexadecimal_digit"sv) + static constexpr auto fn = is_hexadecimal_digit; + + // contiguous ranges of values which should return true + REQUIRE(in(fn, { U'0', U'9' })); + REQUIRE(in(fn, { U'A', U'F' })); + REQUIRE(in(fn, { U'a', U'f' })); + + // contiguous ranges of values which should return false + REQUIRE(not_in(fn, { U'\u0000', U'/' })); + REQUIRE(not_in(fn, { U':', U'@' })); + REQUIRE(not_in(fn, { U'G', U'`' })); + REQUIRE(not_in(fn, { U'g', U'\U0010FFFF' })); + } + + #if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) + + //----- is_unicode_letter ----------------------------------------------------------- + { + INFO("is_unicode_letter"sv) + static constexpr auto fn = is_unicode_letter; + + // contiguous ranges of values which should return true + REQUIRE(in(fn, { U'\u00C0', U'\u00D6' })); + REQUIRE(in(fn, { U'\u00D8', U'\u00F6' })); + REQUIRE(in(fn, { U'\u00F8', U'\u02C1' })); + REQUIRE(in(fn, { U'\u02C6', U'\u02D1' })); + REQUIRE(in(fn, { U'\u02E0', U'\u02E4' })); + REQUIRE(in(fn, { U'\u0370', U'\u0374' })); + REQUIRE(in(fn, { U'\u0376', U'\u0377' })); + REQUIRE(in(fn, { U'\u037A', U'\u037D' })); + REQUIRE(in(fn, { U'\u0388', U'\u038A' })); + REQUIRE(in(fn, { U'\u038E', U'\u03A1' })); + REQUIRE(in(fn, { U'\u03A3', U'\u03F5' })); + REQUIRE(in(fn, { U'\u03F7', U'\u0481' })); + REQUIRE(in(fn, { U'\u048A', U'\u052F' })); + REQUIRE(in(fn, { U'\u0531', U'\u0556' })); + REQUIRE(in(fn, { U'\u0560', U'\u0588' })); + REQUIRE(in(fn, { U'\u05D0', U'\u05EA' })); + REQUIRE(in(fn, { U'\u05EF', U'\u05F2' })); + REQUIRE(in(fn, { U'\u0620', U'\u064A' })); + REQUIRE(in(fn, { U'\u066E', U'\u066F' })); + REQUIRE(in(fn, { U'\u0671', U'\u06D3' })); + REQUIRE(in(fn, { U'\u06E5', U'\u06E6' })); + REQUIRE(in(fn, { U'\u06EE', U'\u06EF' })); + REQUIRE(in(fn, { U'\u06FA', U'\u06FC' })); + REQUIRE(in(fn, { U'\u0712', U'\u072F' })); + REQUIRE(in(fn, { U'\u074D', U'\u07A5' })); + REQUIRE(in(fn, { U'\u07CA', U'\u07EA' })); + REQUIRE(in(fn, { U'\u07F4', U'\u07F5' })); + REQUIRE(in(fn, { U'\u0800', U'\u0815' })); + REQUIRE(in(fn, { U'\u0840', U'\u0858' })); + REQUIRE(in(fn, { U'\u0860', U'\u086A' })); + REQUIRE(in(fn, { U'\u08A0', U'\u08B4' })); + REQUIRE(in(fn, { U'\u08B6', U'\u08C7' })); + REQUIRE(in(fn, { U'\u0904', U'\u0939' })); + REQUIRE(in(fn, { U'\u0958', U'\u0961' })); + REQUIRE(in(fn, { U'\u0971', U'\u0980' })); + REQUIRE(in(fn, { U'\u0985', U'\u098C' })); + REQUIRE(in(fn, { U'\u098F', U'\u0990' })); + REQUIRE(in(fn, { U'\u0993', U'\u09A8' })); + REQUIRE(in(fn, { U'\u09AA', U'\u09B0' })); + REQUIRE(in(fn, { U'\u09B6', U'\u09B9' })); + REQUIRE(in(fn, { U'\u09DC', U'\u09DD' })); + REQUIRE(in(fn, { U'\u09DF', U'\u09E1' })); + REQUIRE(in(fn, { U'\u09F0', U'\u09F1' })); + REQUIRE(in(fn, { U'\u0A05', U'\u0A0A' })); + REQUIRE(in(fn, { U'\u0A0F', U'\u0A10' })); + REQUIRE(in(fn, { U'\u0A13', U'\u0A28' })); + REQUIRE(in(fn, { U'\u0A2A', U'\u0A30' })); + REQUIRE(in(fn, { U'\u0A32', U'\u0A33' })); + REQUIRE(in(fn, { U'\u0A35', U'\u0A36' })); + REQUIRE(in(fn, { U'\u0A38', U'\u0A39' })); + REQUIRE(in(fn, { U'\u0A59', U'\u0A5C' })); + REQUIRE(in(fn, { U'\u0A72', U'\u0A74' })); + REQUIRE(in(fn, { U'\u0A85', U'\u0A8D' })); + REQUIRE(in(fn, { U'\u0A8F', U'\u0A91' })); + REQUIRE(in(fn, { U'\u0A93', U'\u0AA8' })); + REQUIRE(in(fn, { U'\u0AAA', U'\u0AB0' })); + REQUIRE(in(fn, { U'\u0AB2', U'\u0AB3' })); + REQUIRE(in(fn, { U'\u0AB5', U'\u0AB9' })); + REQUIRE(in(fn, { U'\u0AE0', U'\u0AE1' })); + REQUIRE(in(fn, { U'\u0B05', U'\u0B0C' })); + REQUIRE(in(fn, { U'\u0B0F', U'\u0B10' })); + REQUIRE(in(fn, { U'\u0B13', U'\u0B28' })); + REQUIRE(in(fn, { U'\u0B2A', U'\u0B30' })); + REQUIRE(in(fn, { U'\u0B32', U'\u0B33' })); + REQUIRE(in(fn, { U'\u0B35', U'\u0B39' })); + REQUIRE(in(fn, { U'\u0B5C', U'\u0B5D' })); + REQUIRE(in(fn, { U'\u0B5F', U'\u0B61' })); + REQUIRE(in(fn, { U'\u0B85', U'\u0B8A' })); + REQUIRE(in(fn, { U'\u0B8E', U'\u0B90' })); + REQUIRE(in(fn, { U'\u0B92', U'\u0B95' })); + REQUIRE(in(fn, { U'\u0B99', U'\u0B9A' })); + REQUIRE(in(fn, { U'\u0B9E', U'\u0B9F' })); + REQUIRE(in(fn, { U'\u0BA3', U'\u0BA4' })); + REQUIRE(in(fn, { U'\u0BA8', U'\u0BAA' })); + REQUIRE(in(fn, { U'\u0BAE', U'\u0BB9' })); + REQUIRE(in(fn, { U'\u0C05', U'\u0C0C' })); + REQUIRE(in(fn, { U'\u0C0E', U'\u0C10' })); + REQUIRE(in(fn, { U'\u0C12', U'\u0C28' })); + REQUIRE(in(fn, { U'\u0C2A', U'\u0C39' })); + REQUIRE(in(fn, { U'\u0C58', U'\u0C5A' })); + REQUIRE(in(fn, { U'\u0C60', U'\u0C61' })); + REQUIRE(in(fn, { U'\u0C85', U'\u0C8C' })); + REQUIRE(in(fn, { U'\u0C8E', U'\u0C90' })); + REQUIRE(in(fn, { U'\u0C92', U'\u0CA8' })); + REQUIRE(in(fn, { U'\u0CAA', U'\u0CB3' })); + REQUIRE(in(fn, { U'\u0CB5', U'\u0CB9' })); + REQUIRE(in(fn, { U'\u0CE0', U'\u0CE1' })); + REQUIRE(in(fn, { U'\u0CF1', U'\u0CF2' })); + REQUIRE(in(fn, { U'\u0D04', U'\u0D0C' })); + REQUIRE(in(fn, { U'\u0D0E', U'\u0D10' })); + REQUIRE(in(fn, { U'\u0D12', U'\u0D3A' })); + REQUIRE(in(fn, { U'\u0D54', U'\u0D56' })); + REQUIRE(in(fn, { U'\u0D5F', U'\u0D61' })); + REQUIRE(in(fn, { U'\u0D7A', U'\u0D7F' })); + REQUIRE(in(fn, { U'\u0D85', U'\u0D96' })); + REQUIRE(in(fn, { U'\u0D9A', U'\u0DB1' })); + REQUIRE(in(fn, { U'\u0DB3', U'\u0DBB' })); + REQUIRE(in(fn, { U'\u0DC0', U'\u0DC6' })); + REQUIRE(in(fn, { U'\u0E01', U'\u0E30' })); + REQUIRE(in(fn, { U'\u0E32', U'\u0E33' })); + REQUIRE(in(fn, { U'\u0E40', U'\u0E46' })); + REQUIRE(in(fn, { U'\u0E81', U'\u0E82' })); + REQUIRE(in(fn, { U'\u0E86', U'\u0E8A' })); + REQUIRE(in(fn, { U'\u0E8C', U'\u0EA3' })); + REQUIRE(in(fn, { U'\u0EA7', U'\u0EB0' })); + REQUIRE(in(fn, { U'\u0EB2', U'\u0EB3' })); + REQUIRE(in(fn, { U'\u0EC0', U'\u0EC4' })); + REQUIRE(in(fn, { U'\u0EDC', U'\u0EDF' })); + REQUIRE(in(fn, { U'\u0F40', U'\u0F47' })); + REQUIRE(in(fn, { U'\u0F49', U'\u0F6C' })); + REQUIRE(in(fn, { U'\u0F88', U'\u0F8C' })); + REQUIRE(in(fn, { U'\u1000', U'\u102A' })); + REQUIRE(in(fn, { U'\u1050', U'\u1055' })); + REQUIRE(in(fn, { U'\u105A', U'\u105D' })); + REQUIRE(in(fn, { U'\u1065', U'\u1066' })); + REQUIRE(in(fn, { U'\u106E', U'\u1070' })); + REQUIRE(in(fn, { U'\u1075', U'\u1081' })); + REQUIRE(in(fn, { U'\u10A0', U'\u10C5' })); + REQUIRE(in(fn, { U'\u10D0', U'\u10FA' })); + REQUIRE(in(fn, { U'\u10FC', U'\u1248' })); + REQUIRE(in(fn, { U'\u124A', U'\u124D' })); + REQUIRE(in(fn, { U'\u1250', U'\u1256' })); + REQUIRE(in(fn, { U'\u125A', U'\u125D' })); + REQUIRE(in(fn, { U'\u1260', U'\u1288' })); + REQUIRE(in(fn, { U'\u128A', U'\u128D' })); + REQUIRE(in(fn, { U'\u1290', U'\u12B0' })); + REQUIRE(in(fn, { U'\u12B2', U'\u12B5' })); + REQUIRE(in(fn, { U'\u12B8', U'\u12BE' })); + REQUIRE(in(fn, { U'\u12C2', U'\u12C5' })); + REQUIRE(in(fn, { U'\u12C8', U'\u12D6' })); + REQUIRE(in(fn, { U'\u12D8', U'\u1310' })); + REQUIRE(in(fn, { U'\u1312', U'\u1315' })); + REQUIRE(in(fn, { U'\u1318', U'\u135A' })); + REQUIRE(in(fn, { U'\u1380', U'\u138F' })); + REQUIRE(in(fn, { U'\u13A0', U'\u13F5' })); + REQUIRE(in(fn, { U'\u13F8', U'\u13FD' })); + REQUIRE(in(fn, { U'\u1401', U'\u166C' })); + REQUIRE(in(fn, { U'\u166F', U'\u167F' })); + REQUIRE(in(fn, { U'\u1681', U'\u169A' })); + REQUIRE(in(fn, { U'\u16A0', U'\u16EA' })); + REQUIRE(in(fn, { U'\u16F1', U'\u16F8' })); + REQUIRE(in(fn, { U'\u1700', U'\u170C' })); + REQUIRE(in(fn, { U'\u170E', U'\u1711' })); + REQUIRE(in(fn, { U'\u1720', U'\u1731' })); + REQUIRE(in(fn, { U'\u1740', U'\u1751' })); + REQUIRE(in(fn, { U'\u1760', U'\u176C' })); + REQUIRE(in(fn, { U'\u176E', U'\u1770' })); + REQUIRE(in(fn, { U'\u1780', U'\u17B3' })); + REQUIRE(in(fn, { U'\u1820', U'\u1878' })); + REQUIRE(in(fn, { U'\u1880', U'\u1884' })); + REQUIRE(in(fn, { U'\u1887', U'\u18A8' })); + REQUIRE(in(fn, { U'\u18B0', U'\u18F5' })); + REQUIRE(in(fn, { U'\u1900', U'\u191E' })); + REQUIRE(in(fn, { U'\u1950', U'\u196D' })); + REQUIRE(in(fn, { U'\u1970', U'\u1974' })); + REQUIRE(in(fn, { U'\u1980', U'\u19AB' })); + REQUIRE(in(fn, { U'\u19B0', U'\u19C9' })); + REQUIRE(in(fn, { U'\u1A00', U'\u1A16' })); + REQUIRE(in(fn, { U'\u1A20', U'\u1A54' })); + REQUIRE(in(fn, { U'\u1B05', U'\u1B33' })); + REQUIRE(in(fn, { U'\u1B45', U'\u1B4B' })); + REQUIRE(in(fn, { U'\u1B83', U'\u1BA0' })); + REQUIRE(in(fn, { U'\u1BAE', U'\u1BAF' })); + REQUIRE(in(fn, { U'\u1BBA', U'\u1BE5' })); + REQUIRE(in(fn, { U'\u1C00', U'\u1C23' })); + REQUIRE(in(fn, { U'\u1C4D', U'\u1C4F' })); + REQUIRE(in(fn, { U'\u1C5A', U'\u1C7D' })); + REQUIRE(in(fn, { U'\u1C80', U'\u1C88' })); + REQUIRE(in(fn, { U'\u1C90', U'\u1CBA' })); + REQUIRE(in(fn, { U'\u1CBD', U'\u1CBF' })); + REQUIRE(in(fn, { U'\u1CE9', U'\u1CEC' })); + REQUIRE(in(fn, { U'\u1CEE', U'\u1CF3' })); + REQUIRE(in(fn, { U'\u1CF5', U'\u1CF6' })); + REQUIRE(in(fn, { U'\u1D00', U'\u1DBF' })); + REQUIRE(in(fn, { U'\u1E00', U'\u1F15' })); + REQUIRE(in(fn, { U'\u1F18', U'\u1F1D' })); + REQUIRE(in(fn, { U'\u1F20', U'\u1F45' })); + REQUIRE(in(fn, { U'\u1F48', U'\u1F4D' })); + REQUIRE(in(fn, { U'\u1F50', U'\u1F57' })); + REQUIRE(in(fn, { U'\u1F5F', U'\u1F7D' })); + REQUIRE(in(fn, { U'\u1F80', U'\u1FB4' })); + REQUIRE(in(fn, { U'\u1FB6', U'\u1FBC' })); + REQUIRE(in(fn, { U'\u1FC2', U'\u1FC4' })); + REQUIRE(in(fn, { U'\u1FC6', U'\u1FCC' })); + REQUIRE(in(fn, { U'\u1FD0', U'\u1FD3' })); + REQUIRE(in(fn, { U'\u1FD6', U'\u1FDB' })); + REQUIRE(in(fn, { U'\u1FE0', U'\u1FEC' })); + REQUIRE(in(fn, { U'\u1FF2', U'\u1FF4' })); + REQUIRE(in(fn, { U'\u1FF6', U'\u1FFC' })); + REQUIRE(in(fn, { U'\u2090', U'\u209C' })); + REQUIRE(in(fn, { U'\u210A', U'\u2113' })); + REQUIRE(in(fn, { U'\u2119', U'\u211D' })); + REQUIRE(in(fn, { U'\u212A', U'\u212D' })); + REQUIRE(in(fn, { U'\u212F', U'\u2139' })); + REQUIRE(in(fn, { U'\u213C', U'\u213F' })); + REQUIRE(in(fn, { U'\u2145', U'\u2149' })); + REQUIRE(in(fn, { U'\u2183', U'\u2184' })); + REQUIRE(in(fn, { U'\u2C00', U'\u2C2E' })); + REQUIRE(in(fn, { U'\u2C30', U'\u2C5E' })); + REQUIRE(in(fn, { U'\u2C60', U'\u2CE4' })); + REQUIRE(in(fn, { U'\u2CEB', U'\u2CEE' })); + REQUIRE(in(fn, { U'\u2CF2', U'\u2CF3' })); + REQUIRE(in(fn, { U'\u2D00', U'\u2D25' })); + REQUIRE(in(fn, { U'\u2D30', U'\u2D67' })); + REQUIRE(in(fn, { U'\u2D80', U'\u2D96' })); + REQUIRE(in(fn, { U'\u2DA0', U'\u2DA6' })); + REQUIRE(in(fn, { U'\u2DA8', U'\u2DAE' })); + REQUIRE(in(fn, { U'\u2DB0', U'\u2DB6' })); + REQUIRE(in(fn, { U'\u2DB8', U'\u2DBE' })); + REQUIRE(in(fn, { U'\u2DC0', U'\u2DC6' })); + REQUIRE(in(fn, { U'\u2DC8', U'\u2DCE' })); + REQUIRE(in(fn, { U'\u2DD0', U'\u2DD6' })); + REQUIRE(in(fn, { U'\u2DD8', U'\u2DDE' })); + REQUIRE(in(fn, { U'\u3005', U'\u3006' })); + REQUIRE(in(fn, { U'\u3031', U'\u3035' })); + REQUIRE(in(fn, { U'\u303B', U'\u303C' })); + REQUIRE(in(fn, { U'\u3041', U'\u3096' })); + REQUIRE(in(fn, { U'\u309D', U'\u309F' })); + REQUIRE(in(fn, { U'\u30A1', U'\u30FA' })); + REQUIRE(in(fn, { U'\u30FC', U'\u30FF' })); + REQUIRE(in(fn, { U'\u3105', U'\u312F' })); + REQUIRE(in(fn, { U'\u3131', U'\u318E' })); + REQUIRE(in(fn, { U'\u31A0', U'\u31BF' })); + REQUIRE(in(fn, { U'\u31F0', U'\u31FF' })); + REQUIRE(in(fn, { U'\u3400', U'\u4DBE' })); + REQUIRE(in(fn, { U'\u4E00', U'\u9FFB' })); + REQUIRE(in(fn, { U'\uA000', U'\uA48C' })); + REQUIRE(in(fn, { U'\uA4D0', U'\uA4FD' })); + REQUIRE(in(fn, { U'\uA500', U'\uA60C' })); + REQUIRE(in(fn, { U'\uA610', U'\uA61F' })); + REQUIRE(in(fn, { U'\uA62A', U'\uA62B' })); + REQUIRE(in(fn, { U'\uA640', U'\uA66E' })); + REQUIRE(in(fn, { U'\uA67F', U'\uA69D' })); + REQUIRE(in(fn, { U'\uA6A0', U'\uA6E5' })); + REQUIRE(in(fn, { U'\uA717', U'\uA71F' })); + REQUIRE(in(fn, { U'\uA722', U'\uA788' })); + REQUIRE(in(fn, { U'\uA78B', U'\uA7BF' })); + REQUIRE(in(fn, { U'\uA7C2', U'\uA7CA' })); + REQUIRE(in(fn, { U'\uA7F5', U'\uA801' })); + REQUIRE(in(fn, { U'\uA803', U'\uA805' })); + REQUIRE(in(fn, { U'\uA807', U'\uA80A' })); + REQUIRE(in(fn, { U'\uA80C', U'\uA822' })); + REQUIRE(in(fn, { U'\uA840', U'\uA873' })); + REQUIRE(in(fn, { U'\uA882', U'\uA8B3' })); + REQUIRE(in(fn, { U'\uA8F2', U'\uA8F7' })); + REQUIRE(in(fn, { U'\uA8FD', U'\uA8FE' })); + REQUIRE(in(fn, { U'\uA90A', U'\uA925' })); + REQUIRE(in(fn, { U'\uA930', U'\uA946' })); + REQUIRE(in(fn, { U'\uA960', U'\uA97C' })); + REQUIRE(in(fn, { U'\uA984', U'\uA9B2' })); + REQUIRE(in(fn, { U'\uA9E0', U'\uA9E4' })); + REQUIRE(in(fn, { U'\uA9E6', U'\uA9EF' })); + REQUIRE(in(fn, { U'\uA9FA', U'\uA9FE' })); + REQUIRE(in(fn, { U'\uAA00', U'\uAA28' })); + REQUIRE(in(fn, { U'\uAA40', U'\uAA42' })); + REQUIRE(in(fn, { U'\uAA44', U'\uAA4B' })); + REQUIRE(in(fn, { U'\uAA60', U'\uAA76' })); + REQUIRE(in(fn, { U'\uAA7E', U'\uAAAF' })); + REQUIRE(in(fn, { U'\uAAB5', U'\uAAB6' })); + REQUIRE(in(fn, { U'\uAAB9', U'\uAABD' })); + REQUIRE(in(fn, { U'\uAADB', U'\uAADD' })); + REQUIRE(in(fn, { U'\uAAE0', U'\uAAEA' })); + REQUIRE(in(fn, { U'\uAAF2', U'\uAAF4' })); + REQUIRE(in(fn, { U'\uAB01', U'\uAB06' })); + REQUIRE(in(fn, { U'\uAB09', U'\uAB0E' })); + REQUIRE(in(fn, { U'\uAB11', U'\uAB16' })); + REQUIRE(in(fn, { U'\uAB20', U'\uAB26' })); + REQUIRE(in(fn, { U'\uAB28', U'\uAB2E' })); + REQUIRE(in(fn, { U'\uAB30', U'\uAB5A' })); + REQUIRE(in(fn, { U'\uAB5C', U'\uAB69' })); + REQUIRE(in(fn, { U'\uAB70', U'\uABE2' })); + REQUIRE(in(fn, { U'\uAC00', U'\uD7A2' })); + REQUIRE(in(fn, { U'\uD7B0', U'\uD7C6' })); + REQUIRE(in(fn, { U'\uD7CB', U'\uD7FB' })); + REQUIRE(in(fn, { U'\uF900', U'\uFA6D' })); + REQUIRE(in(fn, { U'\uFA70', U'\uFAD9' })); + REQUIRE(in(fn, { U'\uFB00', U'\uFB06' })); + REQUIRE(in(fn, { U'\uFB13', U'\uFB17' })); + REQUIRE(in(fn, { U'\uFB1F', U'\uFB28' })); + REQUIRE(in(fn, { U'\uFB2A', U'\uFB36' })); + REQUIRE(in(fn, { U'\uFB38', U'\uFB3C' })); + REQUIRE(in(fn, { U'\uFB40', U'\uFB41' })); + REQUIRE(in(fn, { U'\uFB43', U'\uFB44' })); + REQUIRE(in(fn, { U'\uFB46', U'\uFBB1' })); + REQUIRE(in(fn, { U'\uFBD3', U'\uFD3D' })); + REQUIRE(in(fn, { U'\uFD50', U'\uFD8F' })); + REQUIRE(in(fn, { U'\uFD92', U'\uFDC7' })); + REQUIRE(in(fn, { U'\uFDF0', U'\uFDFB' })); + REQUIRE(in(fn, { U'\uFE70', U'\uFE74' })); + REQUIRE(in(fn, { U'\uFE76', U'\uFEFC' })); + REQUIRE(in(fn, { U'\uFF21', U'\uFF3A' })); + REQUIRE(in(fn, { U'\uFF41', U'\uFF5A' })); + REQUIRE(in(fn, { U'\uFF66', U'\uFFBE' })); + REQUIRE(in(fn, { U'\uFFC2', U'\uFFC7' })); + REQUIRE(in(fn, { U'\uFFCA', U'\uFFCF' })); + REQUIRE(in(fn, { U'\uFFD2', U'\uFFD7' })); + REQUIRE(in(fn, { U'\uFFDA', U'\uFFDC' })); + REQUIRE(in(fn, { U'\U00010000', U'\U0001000B' })); + REQUIRE(in(fn, { U'\U0001000D', U'\U00010026' })); + REQUIRE(in(fn, { U'\U00010028', U'\U0001003A' })); + REQUIRE(in(fn, { U'\U0001003C', U'\U0001003D' })); + REQUIRE(in(fn, { U'\U0001003F', U'\U0001004D' })); + REQUIRE(in(fn, { U'\U00010050', U'\U0001005D' })); + REQUIRE(in(fn, { U'\U00010080', U'\U000100FA' })); + REQUIRE(in(fn, { U'\U00010280', U'\U0001029C' })); + REQUIRE(in(fn, { U'\U000102A0', U'\U000102D0' })); + REQUIRE(in(fn, { U'\U00010300', U'\U0001031F' })); + REQUIRE(in(fn, { U'\U0001032D', U'\U00010340' })); + REQUIRE(in(fn, { U'\U00010342', U'\U00010349' })); + REQUIRE(in(fn, { U'\U00010350', U'\U00010375' })); + REQUIRE(in(fn, { U'\U00010380', U'\U0001039D' })); + REQUIRE(in(fn, { U'\U000103A0', U'\U000103C3' })); + REQUIRE(in(fn, { U'\U000103C8', U'\U000103CF' })); + REQUIRE(in(fn, { U'\U00010400', U'\U0001049D' })); + REQUIRE(in(fn, { U'\U000104B0', U'\U000104D3' })); + REQUIRE(in(fn, { U'\U000104D8', U'\U000104FB' })); + REQUIRE(in(fn, { U'\U00010500', U'\U00010527' })); + REQUIRE(in(fn, { U'\U00010530', U'\U00010563' })); + REQUIRE(in(fn, { U'\U00010600', U'\U00010736' })); + REQUIRE(in(fn, { U'\U00010740', U'\U00010755' })); + REQUIRE(in(fn, { U'\U00010760', U'\U00010767' })); + REQUIRE(in(fn, { U'\U00010800', U'\U00010805' })); + REQUIRE(in(fn, { U'\U0001080A', U'\U00010835' })); + REQUIRE(in(fn, { U'\U00010837', U'\U00010838' })); + REQUIRE(in(fn, { U'\U0001083F', U'\U00010855' })); + REQUIRE(in(fn, { U'\U00010860', U'\U00010876' })); + REQUIRE(in(fn, { U'\U00010880', U'\U0001089E' })); + REQUIRE(in(fn, { U'\U000108E0', U'\U000108F2' })); + REQUIRE(in(fn, { U'\U000108F4', U'\U000108F5' })); + REQUIRE(in(fn, { U'\U00010900', U'\U00010915' })); + REQUIRE(in(fn, { U'\U00010920', U'\U00010939' })); + REQUIRE(in(fn, { U'\U00010980', U'\U000109B7' })); + REQUIRE(in(fn, { U'\U000109BE', U'\U000109BF' })); + REQUIRE(in(fn, { U'\U00010A10', U'\U00010A13' })); + REQUIRE(in(fn, { U'\U00010A15', U'\U00010A17' })); + REQUIRE(in(fn, { U'\U00010A19', U'\U00010A35' })); + REQUIRE(in(fn, { U'\U00010A60', U'\U00010A7C' })); + REQUIRE(in(fn, { U'\U00010A80', U'\U00010A9C' })); + REQUIRE(in(fn, { U'\U00010AC0', U'\U00010AC7' })); + REQUIRE(in(fn, { U'\U00010AC9', U'\U00010AE4' })); + REQUIRE(in(fn, { U'\U00010B00', U'\U00010B35' })); + REQUIRE(in(fn, { U'\U00010B40', U'\U00010B55' })); + REQUIRE(in(fn, { U'\U00010B60', U'\U00010B72' })); + REQUIRE(in(fn, { U'\U00010B80', U'\U00010B91' })); + REQUIRE(in(fn, { U'\U00010C00', U'\U00010C48' })); + REQUIRE(in(fn, { U'\U00010C80', U'\U00010CB2' })); + REQUIRE(in(fn, { U'\U00010CC0', U'\U00010CF2' })); + REQUIRE(in(fn, { U'\U00010D00', U'\U00010D23' })); + REQUIRE(in(fn, { U'\U00010E80', U'\U00010EA9' })); + REQUIRE(in(fn, { U'\U00010EB0', U'\U00010EB1' })); + REQUIRE(in(fn, { U'\U00010F00', U'\U00010F1C' })); + REQUIRE(in(fn, { U'\U00010F30', U'\U00010F45' })); + REQUIRE(in(fn, { U'\U00010FB0', U'\U00010FC4' })); + REQUIRE(in(fn, { U'\U00010FE0', U'\U00010FF6' })); + REQUIRE(in(fn, { U'\U00011003', U'\U00011037' })); + REQUIRE(in(fn, { U'\U00011083', U'\U000110AF' })); + REQUIRE(in(fn, { U'\U000110D0', U'\U000110E8' })); + REQUIRE(in(fn, { U'\U00011103', U'\U00011126' })); + REQUIRE(in(fn, { U'\U00011150', U'\U00011172' })); + REQUIRE(in(fn, { U'\U00011183', U'\U000111B2' })); + REQUIRE(in(fn, { U'\U000111C1', U'\U000111C4' })); + REQUIRE(in(fn, { U'\U00011200', U'\U00011211' })); + REQUIRE(in(fn, { U'\U00011213', U'\U0001122B' })); + REQUIRE(in(fn, { U'\U00011280', U'\U00011286' })); + REQUIRE(in(fn, { U'\U0001128A', U'\U0001128D' })); + REQUIRE(in(fn, { U'\U0001128F', U'\U0001129D' })); + REQUIRE(in(fn, { U'\U0001129F', U'\U000112A8' })); + REQUIRE(in(fn, { U'\U000112B0', U'\U000112DE' })); + REQUIRE(in(fn, { U'\U00011305', U'\U0001130C' })); + REQUIRE(in(fn, { U'\U0001130F', U'\U00011310' })); + REQUIRE(in(fn, { U'\U00011313', U'\U00011328' })); + REQUIRE(in(fn, { U'\U0001132A', U'\U00011330' })); + REQUIRE(in(fn, { U'\U00011332', U'\U00011333' })); + REQUIRE(in(fn, { U'\U00011335', U'\U00011339' })); + REQUIRE(in(fn, { U'\U0001135D', U'\U00011361' })); + REQUIRE(in(fn, { U'\U00011400', U'\U00011434' })); + REQUIRE(in(fn, { U'\U00011447', U'\U0001144A' })); + REQUIRE(in(fn, { U'\U0001145F', U'\U00011461' })); + REQUIRE(in(fn, { U'\U00011480', U'\U000114AF' })); + REQUIRE(in(fn, { U'\U000114C4', U'\U000114C5' })); + REQUIRE(in(fn, { U'\U00011580', U'\U000115AE' })); + REQUIRE(in(fn, { U'\U000115D8', U'\U000115DB' })); + REQUIRE(in(fn, { U'\U00011600', U'\U0001162F' })); + REQUIRE(in(fn, { U'\U00011680', U'\U000116AA' })); + REQUIRE(in(fn, { U'\U00011700', U'\U0001171A' })); + REQUIRE(in(fn, { U'\U00011800', U'\U0001182B' })); + REQUIRE(in(fn, { U'\U000118A0', U'\U000118DF' })); + REQUIRE(in(fn, { U'\U000118FF', U'\U00011906' })); + REQUIRE(in(fn, { U'\U0001190C', U'\U00011913' })); + REQUIRE(in(fn, { U'\U00011915', U'\U00011916' })); + REQUIRE(in(fn, { U'\U00011918', U'\U0001192F' })); + REQUIRE(in(fn, { U'\U000119A0', U'\U000119A7' })); + REQUIRE(in(fn, { U'\U000119AA', U'\U000119D0' })); + REQUIRE(in(fn, { U'\U00011A0B', U'\U00011A32' })); + REQUIRE(in(fn, { U'\U00011A5C', U'\U00011A89' })); + REQUIRE(in(fn, { U'\U00011AC0', U'\U00011AF8' })); + REQUIRE(in(fn, { U'\U00011C00', U'\U00011C08' })); + REQUIRE(in(fn, { U'\U00011C0A', U'\U00011C2E' })); + REQUIRE(in(fn, { U'\U00011C72', U'\U00011C8F' })); + REQUIRE(in(fn, { U'\U00011D00', U'\U00011D06' })); + REQUIRE(in(fn, { U'\U00011D08', U'\U00011D09' })); + REQUIRE(in(fn, { U'\U00011D0B', U'\U00011D30' })); + REQUIRE(in(fn, { U'\U00011D60', U'\U00011D65' })); + REQUIRE(in(fn, { U'\U00011D67', U'\U00011D68' })); + REQUIRE(in(fn, { U'\U00011D6A', U'\U00011D89' })); + REQUIRE(in(fn, { U'\U00011EE0', U'\U00011EF2' })); + REQUIRE(in(fn, { U'\U00012000', U'\U00012399' })); + REQUIRE(in(fn, { U'\U00012480', U'\U00012543' })); + REQUIRE(in(fn, { U'\U00013000', U'\U0001342E' })); + REQUIRE(in(fn, { U'\U00014400', U'\U00014646' })); + REQUIRE(in(fn, { U'\U00016800', U'\U00016A38' })); + REQUIRE(in(fn, { U'\U00016A40', U'\U00016A5E' })); + REQUIRE(in(fn, { U'\U00016AD0', U'\U00016AED' })); + REQUIRE(in(fn, { U'\U00016B00', U'\U00016B2F' })); + REQUIRE(in(fn, { U'\U00016B40', U'\U00016B43' })); + REQUIRE(in(fn, { U'\U00016B63', U'\U00016B77' })); + REQUIRE(in(fn, { U'\U00016B7D', U'\U00016B8F' })); + REQUIRE(in(fn, { U'\U00016E40', U'\U00016E7F' })); + REQUIRE(in(fn, { U'\U00016F00', U'\U00016F4A' })); + REQUIRE(in(fn, { U'\U00016F93', U'\U00016F9F' })); + REQUIRE(in(fn, { U'\U00016FE0', U'\U00016FE1' })); + REQUIRE(in(fn, { U'\U00017000', U'\U000187F6' })); + REQUIRE(in(fn, { U'\U00018800', U'\U00018CD5' })); + REQUIRE(in(fn, { U'\U00018D00', U'\U00018D07' })); + REQUIRE(in(fn, { U'\U0001B000', U'\U0001B11E' })); + REQUIRE(in(fn, { U'\U0001B150', U'\U0001B152' })); + REQUIRE(in(fn, { U'\U0001B164', U'\U0001B167' })); + REQUIRE(in(fn, { U'\U0001B170', U'\U0001B2FB' })); + REQUIRE(in(fn, { U'\U0001BC00', U'\U0001BC6A' })); + REQUIRE(in(fn, { U'\U0001BC70', U'\U0001BC7C' })); + REQUIRE(in(fn, { U'\U0001BC80', U'\U0001BC88' })); + REQUIRE(in(fn, { U'\U0001BC90', U'\U0001BC99' })); + REQUIRE(in(fn, { U'\U0001D400', U'\U0001D454' })); + REQUIRE(in(fn, { U'\U0001D456', U'\U0001D49C' })); + REQUIRE(in(fn, { U'\U0001D49E', U'\U0001D49F' })); + REQUIRE(in(fn, { U'\U0001D4A5', U'\U0001D4A6' })); + REQUIRE(in(fn, { U'\U0001D4A9', U'\U0001D4AC' })); + REQUIRE(in(fn, { U'\U0001D4AE', U'\U0001D4B9' })); + REQUIRE(in(fn, { U'\U0001D4BD', U'\U0001D4C3' })); + REQUIRE(in(fn, { U'\U0001D4C5', U'\U0001D505' })); + REQUIRE(in(fn, { U'\U0001D507', U'\U0001D50A' })); + REQUIRE(in(fn, { U'\U0001D50D', U'\U0001D514' })); + REQUIRE(in(fn, { U'\U0001D516', U'\U0001D51C' })); + REQUIRE(in(fn, { U'\U0001D51E', U'\U0001D539' })); + REQUIRE(in(fn, { U'\U0001D53B', U'\U0001D53E' })); + REQUIRE(in(fn, { U'\U0001D540', U'\U0001D544' })); + REQUIRE(in(fn, { U'\U0001D54A', U'\U0001D550' })); + REQUIRE(in(fn, { U'\U0001D552', U'\U0001D6A5' })); + REQUIRE(in(fn, { U'\U0001D6A8', U'\U0001D6C0' })); + REQUIRE(in(fn, { U'\U0001D6C2', U'\U0001D6DA' })); + REQUIRE(in(fn, { U'\U0001D6DC', U'\U0001D6FA' })); + REQUIRE(in(fn, { U'\U0001D6FC', U'\U0001D714' })); + REQUIRE(in(fn, { U'\U0001D716', U'\U0001D734' })); + REQUIRE(in(fn, { U'\U0001D736', U'\U0001D74E' })); + REQUIRE(in(fn, { U'\U0001D750', U'\U0001D76E' })); + REQUIRE(in(fn, { U'\U0001D770', U'\U0001D788' })); + REQUIRE(in(fn, { U'\U0001D78A', U'\U0001D7A8' })); + REQUIRE(in(fn, { U'\U0001D7AA', U'\U0001D7C2' })); + REQUIRE(in(fn, { U'\U0001D7C4', U'\U0001D7CB' })); + REQUIRE(in(fn, { U'\U0001E100', U'\U0001E12C' })); + REQUIRE(in(fn, { U'\U0001E137', U'\U0001E13D' })); + REQUIRE(in(fn, { U'\U0001E2C0', U'\U0001E2EB' })); + REQUIRE(in(fn, { U'\U0001E800', U'\U0001E8C4' })); + REQUIRE(in(fn, { U'\U0001E900', U'\U0001E943' })); + REQUIRE(in(fn, { U'\U0001EE00', U'\U0001EE03' })); + REQUIRE(in(fn, { U'\U0001EE05', U'\U0001EE1F' })); + REQUIRE(in(fn, { U'\U0001EE21', U'\U0001EE22' })); + REQUIRE(in(fn, { U'\U0001EE29', U'\U0001EE32' })); + REQUIRE(in(fn, { U'\U0001EE34', U'\U0001EE37' })); + REQUIRE(in(fn, { U'\U0001EE4D', U'\U0001EE4F' })); + REQUIRE(in(fn, { U'\U0001EE51', U'\U0001EE52' })); + REQUIRE(in(fn, { U'\U0001EE61', U'\U0001EE62' })); + REQUIRE(in(fn, { U'\U0001EE67', U'\U0001EE6A' })); + REQUIRE(in(fn, { U'\U0001EE6C', U'\U0001EE72' })); + REQUIRE(in(fn, { U'\U0001EE74', U'\U0001EE77' })); + REQUIRE(in(fn, { U'\U0001EE79', U'\U0001EE7C' })); + REQUIRE(in(fn, { U'\U0001EE80', U'\U0001EE89' })); + REQUIRE(in(fn, { U'\U0001EE8B', U'\U0001EE9B' })); + REQUIRE(in(fn, { U'\U0001EEA1', U'\U0001EEA3' })); + REQUIRE(in(fn, { U'\U0001EEA5', U'\U0001EEA9' })); + REQUIRE(in(fn, { U'\U0001EEAB', U'\U0001EEBB' })); + REQUIRE(in(fn, { U'\U00020000', U'\U0002A6DC' })); + REQUIRE(in(fn, { U'\U0002A700', U'\U0002B733' })); + REQUIRE(in(fn, { U'\U0002B740', U'\U0002B81C' })); + REQUIRE(in(fn, { U'\U0002B820', U'\U0002CEA0' })); + REQUIRE(in(fn, { U'\U0002CEB0', U'\U0002EBDF' })); + REQUIRE(in(fn, { U'\U0002F800', U'\U0002FA1D' })); + REQUIRE(in(fn, { U'\U00030000', U'\U00031349' })); + + // individual values which should return true + REQUIRE(fn(U'\u00AA')); + REQUIRE(fn(U'\u00B5')); + REQUIRE(fn(U'\u00BA')); + REQUIRE(fn(U'\u02EC')); + REQUIRE(fn(U'\u02EE')); + REQUIRE(fn(U'\u037F')); + REQUIRE(fn(U'\u0386')); + REQUIRE(fn(U'\u038C')); + REQUIRE(fn(U'\u0559')); + REQUIRE(fn(U'\u06D5')); + REQUIRE(fn(U'\u06FF')); + REQUIRE(fn(U'\u0710')); + REQUIRE(fn(U'\u07B1')); + REQUIRE(fn(U'\u07FA')); + REQUIRE(fn(U'\u081A')); + REQUIRE(fn(U'\u0824')); + REQUIRE(fn(U'\u0828')); + REQUIRE(fn(U'\u093D')); + REQUIRE(fn(U'\u0950')); + REQUIRE(fn(U'\u09B2')); + REQUIRE(fn(U'\u09BD')); + REQUIRE(fn(U'\u09CE')); + REQUIRE(fn(U'\u09FC')); + REQUIRE(fn(U'\u0A5E')); + REQUIRE(fn(U'\u0ABD')); + REQUIRE(fn(U'\u0AD0')); + REQUIRE(fn(U'\u0AF9')); + REQUIRE(fn(U'\u0B3D')); + REQUIRE(fn(U'\u0B71')); + REQUIRE(fn(U'\u0B83')); + REQUIRE(fn(U'\u0B9C')); + REQUIRE(fn(U'\u0BD0')); + REQUIRE(fn(U'\u0C3D')); + REQUIRE(fn(U'\u0C80')); + REQUIRE(fn(U'\u0CBD')); + REQUIRE(fn(U'\u0CDE')); + REQUIRE(fn(U'\u0D3D')); + REQUIRE(fn(U'\u0D4E')); + REQUIRE(fn(U'\u0DBD')); + REQUIRE(fn(U'\u0E84')); + REQUIRE(fn(U'\u0EA5')); + REQUIRE(fn(U'\u0EBD')); + REQUIRE(fn(U'\u0EC6')); + REQUIRE(fn(U'\u0F00')); + REQUIRE(fn(U'\u103F')); + REQUIRE(fn(U'\u1061')); + REQUIRE(fn(U'\u108E')); + REQUIRE(fn(U'\u10C7')); + REQUIRE(fn(U'\u10CD')); + REQUIRE(fn(U'\u1258')); + REQUIRE(fn(U'\u12C0')); + REQUIRE(fn(U'\u17D7')); + REQUIRE(fn(U'\u17DC')); + REQUIRE(fn(U'\u18AA')); + REQUIRE(fn(U'\u1AA7')); + REQUIRE(fn(U'\u1CFA')); + REQUIRE(fn(U'\u1F59')); + REQUIRE(fn(U'\u1F5B')); + REQUIRE(fn(U'\u1F5D')); + REQUIRE(fn(U'\u1FBE')); + REQUIRE(fn(U'\u2071')); + REQUIRE(fn(U'\u207F')); + REQUIRE(fn(U'\u2102')); + REQUIRE(fn(U'\u2107')); + REQUIRE(fn(U'\u2115')); + REQUIRE(fn(U'\u2124')); + REQUIRE(fn(U'\u2126')); + REQUIRE(fn(U'\u2128')); + REQUIRE(fn(U'\u214E')); + REQUIRE(fn(U'\u2D27')); + REQUIRE(fn(U'\u2D2D')); + REQUIRE(fn(U'\u2D6F')); + REQUIRE(fn(U'\u2E2F')); + REQUIRE(fn(U'\uA8FB')); + REQUIRE(fn(U'\uA9CF')); + REQUIRE(fn(U'\uAA7A')); + REQUIRE(fn(U'\uAAB1')); + REQUIRE(fn(U'\uAAC0')); + REQUIRE(fn(U'\uAAC2')); + REQUIRE(fn(U'\uFB1D')); + REQUIRE(fn(U'\uFB3E')); + REQUIRE(fn(U'\U00010808')); + REQUIRE(fn(U'\U0001083C')); + REQUIRE(fn(U'\U00010A00')); + REQUIRE(fn(U'\U00010F27')); + REQUIRE(fn(U'\U00011144')); + REQUIRE(fn(U'\U00011147')); + REQUIRE(fn(U'\U00011176')); + REQUIRE(fn(U'\U000111DA')); + REQUIRE(fn(U'\U000111DC')); + REQUIRE(fn(U'\U00011288')); + REQUIRE(fn(U'\U0001133D')); + REQUIRE(fn(U'\U00011350')); + REQUIRE(fn(U'\U000114C7')); + REQUIRE(fn(U'\U00011644')); + REQUIRE(fn(U'\U000116B8')); + REQUIRE(fn(U'\U00011909')); + REQUIRE(fn(U'\U0001193F')); + REQUIRE(fn(U'\U00011941')); + REQUIRE(fn(U'\U000119E1')); + REQUIRE(fn(U'\U000119E3')); + REQUIRE(fn(U'\U00011A00')); + REQUIRE(fn(U'\U00011A3A')); + REQUIRE(fn(U'\U00011A50')); + REQUIRE(fn(U'\U00011A9D')); + REQUIRE(fn(U'\U00011C40')); + REQUIRE(fn(U'\U00011D46')); + REQUIRE(fn(U'\U00011D98')); + REQUIRE(fn(U'\U00011FB0')); + REQUIRE(fn(U'\U00016F50')); + REQUIRE(fn(U'\U00016FE3')); + REQUIRE(fn(U'\U0001D4A2')); + REQUIRE(fn(U'\U0001D4BB')); + REQUIRE(fn(U'\U0001D546')); + REQUIRE(fn(U'\U0001E14E')); + REQUIRE(fn(U'\U0001E94B')); + REQUIRE(fn(U'\U0001EE24')); + REQUIRE(fn(U'\U0001EE27')); + REQUIRE(fn(U'\U0001EE39')); + REQUIRE(fn(U'\U0001EE3B')); + REQUIRE(fn(U'\U0001EE42')); + REQUIRE(fn(U'\U0001EE47')); + REQUIRE(fn(U'\U0001EE49')); + REQUIRE(fn(U'\U0001EE4B')); + REQUIRE(fn(U'\U0001EE54')); + REQUIRE(fn(U'\U0001EE57')); + REQUIRE(fn(U'\U0001EE59')); + REQUIRE(fn(U'\U0001EE5B')); + REQUIRE(fn(U'\U0001EE5D')); + REQUIRE(fn(U'\U0001EE5F')); + REQUIRE(fn(U'\U0001EE64')); + REQUIRE(fn(U'\U0001EE7E')); + + // contiguous ranges of values which should return false + REQUIRE(not_in(fn, { U'\u0000', U'\u00A9' })); + REQUIRE(not_in(fn, { U'\u00AB', U'\u00B4' })); + REQUIRE(not_in(fn, { U'\u00B6', U'\u00B9' })); + REQUIRE(not_in(fn, { U'\u00BB', U'\u00BF' })); + REQUIRE(not_in(fn, { U'\u02C2', U'\u02C5' })); + REQUIRE(not_in(fn, { U'\u02D2', U'\u02DF' })); + REQUIRE(not_in(fn, { U'\u02E5', U'\u02EB' })); + REQUIRE(not_in(fn, { U'\u02EF', U'\u036F' })); + REQUIRE(not_in(fn, { U'\u0378', U'\u0379' })); + REQUIRE(not_in(fn, { U'\u0380', U'\u0385' })); + REQUIRE(not_in(fn, { U'\u0482', U'\u0489' })); + REQUIRE(not_in(fn, { U'\u0557', U'\u0558' })); + REQUIRE(not_in(fn, { U'\u055A', U'\u055F' })); + REQUIRE(not_in(fn, { U'\u0589', U'\u05CF' })); + REQUIRE(not_in(fn, { U'\u05EB', U'\u05EE' })); + REQUIRE(not_in(fn, { U'\u05F3', U'\u061F' })); + REQUIRE(not_in(fn, { U'\u064B', U'\u066D' })); + REQUIRE(not_in(fn, { U'\u06D6', U'\u06E4' })); + REQUIRE(not_in(fn, { U'\u06E7', U'\u06ED' })); + REQUIRE(not_in(fn, { U'\u06F0', U'\u06F9' })); + REQUIRE(not_in(fn, { U'\u06FD', U'\u06FE' })); + REQUIRE(not_in(fn, { U'\u0700', U'\u070F' })); + REQUIRE(not_in(fn, { U'\u0730', U'\u074C' })); + REQUIRE(not_in(fn, { U'\u07A6', U'\u07B0' })); + REQUIRE(not_in(fn, { U'\u07B2', U'\u07C9' })); + REQUIRE(not_in(fn, { U'\u07EB', U'\u07F3' })); + REQUIRE(not_in(fn, { U'\u07F6', U'\u07F9' })); + REQUIRE(not_in(fn, { U'\u07FB', U'\u07FF' })); + REQUIRE(not_in(fn, { U'\u0816', U'\u0819' })); + REQUIRE(not_in(fn, { U'\u081B', U'\u0823' })); + REQUIRE(not_in(fn, { U'\u0825', U'\u0827' })); + REQUIRE(not_in(fn, { U'\u0829', U'\u083F' })); + REQUIRE(not_in(fn, { U'\u0859', U'\u085F' })); + REQUIRE(not_in(fn, { U'\u086B', U'\u089F' })); + REQUIRE(not_in(fn, { U'\u08C8', U'\u0903' })); + REQUIRE(not_in(fn, { U'\u093A', U'\u093C' })); + REQUIRE(not_in(fn, { U'\u093E', U'\u094F' })); + REQUIRE(not_in(fn, { U'\u0951', U'\u0957' })); + REQUIRE(not_in(fn, { U'\u0962', U'\u0970' })); + REQUIRE(not_in(fn, { U'\u0981', U'\u0984' })); + REQUIRE(not_in(fn, { U'\u098D', U'\u098E' })); + REQUIRE(not_in(fn, { U'\u0991', U'\u0992' })); + REQUIRE(not_in(fn, { U'\u09B3', U'\u09B5' })); + REQUIRE(not_in(fn, { U'\u09BA', U'\u09BC' })); + REQUIRE(not_in(fn, { U'\u09BE', U'\u09CD' })); + REQUIRE(not_in(fn, { U'\u09CF', U'\u09DB' })); + REQUIRE(not_in(fn, { U'\u09E2', U'\u09EF' })); + REQUIRE(not_in(fn, { U'\u09F2', U'\u09FB' })); + REQUIRE(not_in(fn, { U'\u09FD', U'\u0A04' })); + REQUIRE(not_in(fn, { U'\u0A0B', U'\u0A0E' })); + REQUIRE(not_in(fn, { U'\u0A11', U'\u0A12' })); + REQUIRE(not_in(fn, { U'\u0A3A', U'\u0A58' })); + REQUIRE(not_in(fn, { U'\u0A5F', U'\u0A71' })); + REQUIRE(not_in(fn, { U'\u0A75', U'\u0A84' })); + REQUIRE(not_in(fn, { U'\u0ABA', U'\u0ABC' })); + REQUIRE(not_in(fn, { U'\u0ABE', U'\u0ACF' })); + REQUIRE(not_in(fn, { U'\u0AD1', U'\u0ADF' })); + REQUIRE(not_in(fn, { U'\u0AE2', U'\u0AF8' })); + REQUIRE(not_in(fn, { U'\u0AFA', U'\u0B04' })); + REQUIRE(not_in(fn, { U'\u0B0D', U'\u0B0E' })); + REQUIRE(not_in(fn, { U'\u0B11', U'\u0B12' })); + REQUIRE(not_in(fn, { U'\u0B3A', U'\u0B3C' })); + REQUIRE(not_in(fn, { U'\u0B3E', U'\u0B5B' })); + REQUIRE(not_in(fn, { U'\u0B62', U'\u0B70' })); + REQUIRE(not_in(fn, { U'\u0B72', U'\u0B82' })); + REQUIRE(not_in(fn, { U'\u0B8B', U'\u0B8D' })); + REQUIRE(not_in(fn, { U'\u0B96', U'\u0B98' })); + REQUIRE(not_in(fn, { U'\u0BA0', U'\u0BA2' })); + REQUIRE(not_in(fn, { U'\u0BA5', U'\u0BA7' })); + REQUIRE(not_in(fn, { U'\u0BAB', U'\u0BAD' })); + REQUIRE(not_in(fn, { U'\u0BBA', U'\u0BCF' })); + REQUIRE(not_in(fn, { U'\u0BD1', U'\u0C04' })); + REQUIRE(not_in(fn, { U'\u0C3A', U'\u0C3C' })); + REQUIRE(not_in(fn, { U'\u0C3E', U'\u0C57' })); + REQUIRE(not_in(fn, { U'\u0C5B', U'\u0C5F' })); + REQUIRE(not_in(fn, { U'\u0C62', U'\u0C7F' })); + REQUIRE(not_in(fn, { U'\u0C81', U'\u0C84' })); + REQUIRE(not_in(fn, { U'\u0CBA', U'\u0CBC' })); + REQUIRE(not_in(fn, { U'\u0CBE', U'\u0CDD' })); + REQUIRE(not_in(fn, { U'\u0CE2', U'\u0CF0' })); + REQUIRE(not_in(fn, { U'\u0CF3', U'\u0D03' })); + REQUIRE(not_in(fn, { U'\u0D3B', U'\u0D3C' })); + REQUIRE(not_in(fn, { U'\u0D3E', U'\u0D4D' })); + REQUIRE(not_in(fn, { U'\u0D4F', U'\u0D53' })); + REQUIRE(not_in(fn, { U'\u0D57', U'\u0D5E' })); + REQUIRE(not_in(fn, { U'\u0D62', U'\u0D79' })); + REQUIRE(not_in(fn, { U'\u0D80', U'\u0D84' })); + REQUIRE(not_in(fn, { U'\u0D97', U'\u0D99' })); + REQUIRE(not_in(fn, { U'\u0DBE', U'\u0DBF' })); + REQUIRE(not_in(fn, { U'\u0DC7', U'\u0E00' })); + REQUIRE(not_in(fn, { U'\u0E34', U'\u0E3F' })); + REQUIRE(not_in(fn, { U'\u0E47', U'\u0E80' })); + REQUIRE(not_in(fn, { U'\u0EB4', U'\u0EBC' })); + REQUIRE(not_in(fn, { U'\u0EBE', U'\u0EBF' })); + REQUIRE(not_in(fn, { U'\u0EC7', U'\u0EDB' })); + REQUIRE(not_in(fn, { U'\u0EE0', U'\u0EFF' })); + REQUIRE(not_in(fn, { U'\u0F01', U'\u0F3F' })); + REQUIRE(not_in(fn, { U'\u0F6D', U'\u0F87' })); + REQUIRE(not_in(fn, { U'\u0F8D', U'\u0FFF' })); + REQUIRE(not_in(fn, { U'\u102B', U'\u103E' })); + REQUIRE(not_in(fn, { U'\u1040', U'\u104F' })); + REQUIRE(not_in(fn, { U'\u1056', U'\u1059' })); + REQUIRE(not_in(fn, { U'\u105E', U'\u1060' })); + REQUIRE(not_in(fn, { U'\u1062', U'\u1064' })); + REQUIRE(not_in(fn, { U'\u1067', U'\u106D' })); + REQUIRE(not_in(fn, { U'\u1071', U'\u1074' })); + REQUIRE(not_in(fn, { U'\u1082', U'\u108D' })); + REQUIRE(not_in(fn, { U'\u108F', U'\u109F' })); + REQUIRE(not_in(fn, { U'\u10C8', U'\u10CC' })); + REQUIRE(not_in(fn, { U'\u10CE', U'\u10CF' })); + REQUIRE(not_in(fn, { U'\u124E', U'\u124F' })); + REQUIRE(not_in(fn, { U'\u125E', U'\u125F' })); + REQUIRE(not_in(fn, { U'\u128E', U'\u128F' })); + REQUIRE(not_in(fn, { U'\u12B6', U'\u12B7' })); + REQUIRE(not_in(fn, { U'\u12C6', U'\u12C7' })); + REQUIRE(not_in(fn, { U'\u1316', U'\u1317' })); + REQUIRE(not_in(fn, { U'\u135B', U'\u137F' })); + REQUIRE(not_in(fn, { U'\u1390', U'\u139F' })); + REQUIRE(not_in(fn, { U'\u13F6', U'\u13F7' })); + REQUIRE(not_in(fn, { U'\u13FE', U'\u1400' })); + REQUIRE(not_in(fn, { U'\u166D', U'\u166E' })); + REQUIRE(not_in(fn, { U'\u169B', U'\u169F' })); + REQUIRE(not_in(fn, { U'\u16EB', U'\u16F0' })); + REQUIRE(not_in(fn, { U'\u16F9', U'\u16FF' })); + REQUIRE(not_in(fn, { U'\u1712', U'\u171F' })); + REQUIRE(not_in(fn, { U'\u1732', U'\u173F' })); + REQUIRE(not_in(fn, { U'\u1752', U'\u175F' })); + REQUIRE(not_in(fn, { U'\u1771', U'\u177F' })); + REQUIRE(not_in(fn, { U'\u17B4', U'\u17D6' })); + REQUIRE(not_in(fn, { U'\u17D8', U'\u17DB' })); + REQUIRE(not_in(fn, { U'\u17DD', U'\u181F' })); + REQUIRE(not_in(fn, { U'\u1879', U'\u187F' })); + REQUIRE(not_in(fn, { U'\u1885', U'\u1886' })); + REQUIRE(not_in(fn, { U'\u18AB', U'\u18AF' })); + REQUIRE(not_in(fn, { U'\u18F6', U'\u18FF' })); + REQUIRE(not_in(fn, { U'\u191F', U'\u194F' })); + REQUIRE(not_in(fn, { U'\u196E', U'\u196F' })); + REQUIRE(not_in(fn, { U'\u1975', U'\u197F' })); + REQUIRE(not_in(fn, { U'\u19AC', U'\u19AF' })); + REQUIRE(not_in(fn, { U'\u19CA', U'\u19FF' })); + REQUIRE(not_in(fn, { U'\u1A17', U'\u1A1F' })); + REQUIRE(not_in(fn, { U'\u1A55', U'\u1AA6' })); + REQUIRE(not_in(fn, { U'\u1AA8', U'\u1B04' })); + REQUIRE(not_in(fn, { U'\u1B34', U'\u1B44' })); + REQUIRE(not_in(fn, { U'\u1B4C', U'\u1B82' })); + REQUIRE(not_in(fn, { U'\u1BA1', U'\u1BAD' })); + REQUIRE(not_in(fn, { U'\u1BB0', U'\u1BB9' })); + REQUIRE(not_in(fn, { U'\u1BE6', U'\u1BFF' })); + REQUIRE(not_in(fn, { U'\u1C24', U'\u1C4C' })); + REQUIRE(not_in(fn, { U'\u1C50', U'\u1C59' })); + REQUIRE(not_in(fn, { U'\u1C7E', U'\u1C7F' })); + REQUIRE(not_in(fn, { U'\u1C89', U'\u1C8F' })); + REQUIRE(not_in(fn, { U'\u1CBB', U'\u1CBC' })); + REQUIRE(not_in(fn, { U'\u1CC0', U'\u1CE8' })); + REQUIRE(not_in(fn, { U'\u1CF7', U'\u1CF9' })); + REQUIRE(not_in(fn, { U'\u1CFB', U'\u1CFF' })); + REQUIRE(not_in(fn, { U'\u1DC0', U'\u1DFF' })); + REQUIRE(not_in(fn, { U'\u1F16', U'\u1F17' })); + REQUIRE(not_in(fn, { U'\u1F1E', U'\u1F1F' })); + REQUIRE(not_in(fn, { U'\u1F46', U'\u1F47' })); + REQUIRE(not_in(fn, { U'\u1F4E', U'\u1F4F' })); + REQUIRE(not_in(fn, { U'\u1F7E', U'\u1F7F' })); + REQUIRE(not_in(fn, { U'\u1FBF', U'\u1FC1' })); + REQUIRE(not_in(fn, { U'\u1FCD', U'\u1FCF' })); + REQUIRE(not_in(fn, { U'\u1FD4', U'\u1FD5' })); + REQUIRE(not_in(fn, { U'\u1FDC', U'\u1FDF' })); + REQUIRE(not_in(fn, { U'\u1FED', U'\u1FF1' })); + REQUIRE(not_in(fn, { U'\u1FFD', U'\u2070' })); + REQUIRE(not_in(fn, { U'\u2072', U'\u207E' })); + REQUIRE(not_in(fn, { U'\u2080', U'\u208F' })); + REQUIRE(not_in(fn, { U'\u209D', U'\u2101' })); + REQUIRE(not_in(fn, { U'\u2103', U'\u2106' })); + REQUIRE(not_in(fn, { U'\u2108', U'\u2109' })); + REQUIRE(not_in(fn, { U'\u2116', U'\u2118' })); + REQUIRE(not_in(fn, { U'\u211E', U'\u2123' })); + REQUIRE(not_in(fn, { U'\u213A', U'\u213B' })); + REQUIRE(not_in(fn, { U'\u2140', U'\u2144' })); + REQUIRE(not_in(fn, { U'\u214A', U'\u214D' })); + REQUIRE(not_in(fn, { U'\u214F', U'\u2182' })); + REQUIRE(not_in(fn, { U'\u2185', U'\u2BFF' })); + REQUIRE(not_in(fn, { U'\u2CE5', U'\u2CEA' })); + REQUIRE(not_in(fn, { U'\u2CEF', U'\u2CF1' })); + REQUIRE(not_in(fn, { U'\u2CF4', U'\u2CFF' })); + REQUIRE(not_in(fn, { U'\u2D28', U'\u2D2C' })); + REQUIRE(not_in(fn, { U'\u2D2E', U'\u2D2F' })); + REQUIRE(not_in(fn, { U'\u2D68', U'\u2D6E' })); + REQUIRE(not_in(fn, { U'\u2D70', U'\u2D7F' })); + REQUIRE(not_in(fn, { U'\u2D97', U'\u2D9F' })); + REQUIRE(not_in(fn, { U'\u2DDF', U'\u2E2E' })); + REQUIRE(not_in(fn, { U'\u2E30', U'\u3004' })); + REQUIRE(not_in(fn, { U'\u3007', U'\u3030' })); + REQUIRE(not_in(fn, { U'\u3036', U'\u303A' })); + REQUIRE(not_in(fn, { U'\u303D', U'\u3040' })); + REQUIRE(not_in(fn, { U'\u3097', U'\u309C' })); + REQUIRE(not_in(fn, { U'\u3100', U'\u3104' })); + REQUIRE(not_in(fn, { U'\u318F', U'\u319F' })); + REQUIRE(not_in(fn, { U'\u31C0', U'\u31EF' })); + REQUIRE(not_in(fn, { U'\u3200', U'\u33FF' })); + REQUIRE(not_in(fn, { U'\u4DBF', U'\u4DFF' })); + REQUIRE(not_in(fn, { U'\u9FFC', U'\u9FFF' })); + REQUIRE(not_in(fn, { U'\uA48D', U'\uA4CF' })); + REQUIRE(not_in(fn, { U'\uA4FE', U'\uA4FF' })); + REQUIRE(not_in(fn, { U'\uA60D', U'\uA60F' })); + REQUIRE(not_in(fn, { U'\uA620', U'\uA629' })); + REQUIRE(not_in(fn, { U'\uA62C', U'\uA63F' })); + REQUIRE(not_in(fn, { U'\uA66F', U'\uA67E' })); + REQUIRE(not_in(fn, { U'\uA69E', U'\uA69F' })); + REQUIRE(not_in(fn, { U'\uA6E6', U'\uA716' })); + REQUIRE(not_in(fn, { U'\uA720', U'\uA721' })); + REQUIRE(not_in(fn, { U'\uA789', U'\uA78A' })); + REQUIRE(not_in(fn, { U'\uA7C0', U'\uA7C1' })); + REQUIRE(not_in(fn, { U'\uA7CB', U'\uA7F4' })); + REQUIRE(not_in(fn, { U'\uA823', U'\uA83F' })); + REQUIRE(not_in(fn, { U'\uA874', U'\uA881' })); + REQUIRE(not_in(fn, { U'\uA8B4', U'\uA8F1' })); + REQUIRE(not_in(fn, { U'\uA8F8', U'\uA8FA' })); + REQUIRE(not_in(fn, { U'\uA8FF', U'\uA909' })); + REQUIRE(not_in(fn, { U'\uA926', U'\uA92F' })); + REQUIRE(not_in(fn, { U'\uA947', U'\uA95F' })); + REQUIRE(not_in(fn, { U'\uA97D', U'\uA983' })); + REQUIRE(not_in(fn, { U'\uA9B3', U'\uA9CE' })); + REQUIRE(not_in(fn, { U'\uA9D0', U'\uA9DF' })); + REQUIRE(not_in(fn, { U'\uA9F0', U'\uA9F9' })); + REQUIRE(not_in(fn, { U'\uAA29', U'\uAA3F' })); + REQUIRE(not_in(fn, { U'\uAA4C', U'\uAA5F' })); + REQUIRE(not_in(fn, { U'\uAA77', U'\uAA79' })); + REQUIRE(not_in(fn, { U'\uAA7B', U'\uAA7D' })); + REQUIRE(not_in(fn, { U'\uAAB2', U'\uAAB4' })); + REQUIRE(not_in(fn, { U'\uAAB7', U'\uAAB8' })); + REQUIRE(not_in(fn, { U'\uAABE', U'\uAABF' })); + REQUIRE(not_in(fn, { U'\uAAC3', U'\uAADA' })); + REQUIRE(not_in(fn, { U'\uAADE', U'\uAADF' })); + REQUIRE(not_in(fn, { U'\uAAEB', U'\uAAF1' })); + REQUIRE(not_in(fn, { U'\uAAF5', U'\uAB00' })); + REQUIRE(not_in(fn, { U'\uAB07', U'\uAB08' })); + REQUIRE(not_in(fn, { U'\uAB0F', U'\uAB10' })); + REQUIRE(not_in(fn, { U'\uAB17', U'\uAB1F' })); + REQUIRE(not_in(fn, { U'\uAB6A', U'\uAB6F' })); + REQUIRE(not_in(fn, { U'\uABE3', U'\uABFF' })); + REQUIRE(not_in(fn, { U'\uD7A3', U'\uD7AF' })); + REQUIRE(not_in(fn, { U'\uD7C7', U'\uD7CA' })); + REQUIRE(not_in(fn, { U'\uD7FC', U'\uF8FF' })); + REQUIRE(not_in(fn, { U'\uFA6E', U'\uFA6F' })); + REQUIRE(not_in(fn, { U'\uFADA', U'\uFAFF' })); + REQUIRE(not_in(fn, { U'\uFB07', U'\uFB12' })); + REQUIRE(not_in(fn, { U'\uFB18', U'\uFB1C' })); + REQUIRE(not_in(fn, { U'\uFBB2', U'\uFBD2' })); + REQUIRE(not_in(fn, { U'\uFD3E', U'\uFD4F' })); + REQUIRE(not_in(fn, { U'\uFD90', U'\uFD91' })); + REQUIRE(not_in(fn, { U'\uFDC8', U'\uFDEF' })); + REQUIRE(not_in(fn, { U'\uFDFC', U'\uFE6F' })); + REQUIRE(not_in(fn, { U'\uFEFD', U'\uFF20' })); + REQUIRE(not_in(fn, { U'\uFF3B', U'\uFF40' })); + REQUIRE(not_in(fn, { U'\uFF5B', U'\uFF65' })); + REQUIRE(not_in(fn, { U'\uFFBF', U'\uFFC1' })); + REQUIRE(not_in(fn, { U'\uFFC8', U'\uFFC9' })); + REQUIRE(not_in(fn, { U'\uFFD0', U'\uFFD1' })); + REQUIRE(not_in(fn, { U'\uFFD8', U'\uFFD9' })); + REQUIRE(not_in(fn, { U'\uFFDD', U'\uFFFF' })); + REQUIRE(not_in(fn, { U'\U0001004E', U'\U0001004F' })); + REQUIRE(not_in(fn, { U'\U0001005E', U'\U0001007F' })); + REQUIRE(not_in(fn, { U'\U000100FB', U'\U0001027F' })); + REQUIRE(not_in(fn, { U'\U0001029D', U'\U0001029F' })); + REQUIRE(not_in(fn, { U'\U000102D1', U'\U000102FF' })); + REQUIRE(not_in(fn, { U'\U00010320', U'\U0001032C' })); + REQUIRE(not_in(fn, { U'\U0001034A', U'\U0001034F' })); + REQUIRE(not_in(fn, { U'\U00010376', U'\U0001037F' })); + REQUIRE(not_in(fn, { U'\U0001039E', U'\U0001039F' })); + REQUIRE(not_in(fn, { U'\U000103C4', U'\U000103C7' })); + REQUIRE(not_in(fn, { U'\U000103D0', U'\U000103FF' })); + REQUIRE(not_in(fn, { U'\U0001049E', U'\U000104AF' })); + REQUIRE(not_in(fn, { U'\U000104D4', U'\U000104D7' })); + REQUIRE(not_in(fn, { U'\U000104FC', U'\U000104FF' })); + REQUIRE(not_in(fn, { U'\U00010528', U'\U0001052F' })); + REQUIRE(not_in(fn, { U'\U00010564', U'\U000105FF' })); + REQUIRE(not_in(fn, { U'\U00010737', U'\U0001073F' })); + REQUIRE(not_in(fn, { U'\U00010756', U'\U0001075F' })); + REQUIRE(not_in(fn, { U'\U00010768', U'\U000107FF' })); + REQUIRE(not_in(fn, { U'\U00010806', U'\U00010807' })); + REQUIRE(not_in(fn, { U'\U00010839', U'\U0001083B' })); + REQUIRE(not_in(fn, { U'\U0001083D', U'\U0001083E' })); + REQUIRE(not_in(fn, { U'\U00010856', U'\U0001085F' })); + REQUIRE(not_in(fn, { U'\U00010877', U'\U0001087F' })); + REQUIRE(not_in(fn, { U'\U0001089F', U'\U000108DF' })); + REQUIRE(not_in(fn, { U'\U000108F6', U'\U000108FF' })); + REQUIRE(not_in(fn, { U'\U00010916', U'\U0001091F' })); + REQUIRE(not_in(fn, { U'\U0001093A', U'\U0001097F' })); + REQUIRE(not_in(fn, { U'\U000109B8', U'\U000109BD' })); + REQUIRE(not_in(fn, { U'\U000109C0', U'\U000109FF' })); + REQUIRE(not_in(fn, { U'\U00010A01', U'\U00010A0F' })); + REQUIRE(not_in(fn, { U'\U00010A36', U'\U00010A5F' })); + REQUIRE(not_in(fn, { U'\U00010A7D', U'\U00010A7F' })); + REQUIRE(not_in(fn, { U'\U00010A9D', U'\U00010ABF' })); + REQUIRE(not_in(fn, { U'\U00010AE5', U'\U00010AFF' })); + REQUIRE(not_in(fn, { U'\U00010B36', U'\U00010B3F' })); + REQUIRE(not_in(fn, { U'\U00010B56', U'\U00010B5F' })); + REQUIRE(not_in(fn, { U'\U00010B73', U'\U00010B7F' })); + REQUIRE(not_in(fn, { U'\U00010B92', U'\U00010BFF' })); + REQUIRE(not_in(fn, { U'\U00010C49', U'\U00010C7F' })); + REQUIRE(not_in(fn, { U'\U00010CB3', U'\U00010CBF' })); + REQUIRE(not_in(fn, { U'\U00010CF3', U'\U00010CFF' })); + REQUIRE(not_in(fn, { U'\U00010D24', U'\U00010E7F' })); + REQUIRE(not_in(fn, { U'\U00010EAA', U'\U00010EAF' })); + REQUIRE(not_in(fn, { U'\U00010EB2', U'\U00010EFF' })); + REQUIRE(not_in(fn, { U'\U00010F1D', U'\U00010F26' })); + REQUIRE(not_in(fn, { U'\U00010F28', U'\U00010F2F' })); + REQUIRE(not_in(fn, { U'\U00010F46', U'\U00010FAF' })); + REQUIRE(not_in(fn, { U'\U00010FC5', U'\U00010FDF' })); + REQUIRE(not_in(fn, { U'\U00010FF7', U'\U00011002' })); + REQUIRE(not_in(fn, { U'\U00011038', U'\U00011082' })); + REQUIRE(not_in(fn, { U'\U000110B0', U'\U000110CF' })); + REQUIRE(not_in(fn, { U'\U000110E9', U'\U00011102' })); + REQUIRE(not_in(fn, { U'\U00011127', U'\U00011143' })); + REQUIRE(not_in(fn, { U'\U00011145', U'\U00011146' })); + REQUIRE(not_in(fn, { U'\U00011148', U'\U0001114F' })); + REQUIRE(not_in(fn, { U'\U00011173', U'\U00011175' })); + REQUIRE(not_in(fn, { U'\U00011177', U'\U00011182' })); + REQUIRE(not_in(fn, { U'\U000111B3', U'\U000111C0' })); + REQUIRE(not_in(fn, { U'\U000111C5', U'\U000111D9' })); + REQUIRE(not_in(fn, { U'\U000111DD', U'\U000111FF' })); + REQUIRE(not_in(fn, { U'\U0001122C', U'\U0001127F' })); + REQUIRE(not_in(fn, { U'\U000112A9', U'\U000112AF' })); + REQUIRE(not_in(fn, { U'\U000112DF', U'\U00011304' })); + REQUIRE(not_in(fn, { U'\U0001130D', U'\U0001130E' })); + REQUIRE(not_in(fn, { U'\U00011311', U'\U00011312' })); + REQUIRE(not_in(fn, { U'\U0001133A', U'\U0001133C' })); + REQUIRE(not_in(fn, { U'\U0001133E', U'\U0001134F' })); + REQUIRE(not_in(fn, { U'\U00011351', U'\U0001135C' })); + REQUIRE(not_in(fn, { U'\U00011362', U'\U000113FF' })); + REQUIRE(not_in(fn, { U'\U00011435', U'\U00011446' })); + REQUIRE(not_in(fn, { U'\U0001144B', U'\U0001145E' })); + REQUIRE(not_in(fn, { U'\U00011462', U'\U0001147F' })); + REQUIRE(not_in(fn, { U'\U000114B0', U'\U000114C3' })); + REQUIRE(not_in(fn, { U'\U000114C8', U'\U0001157F' })); + REQUIRE(not_in(fn, { U'\U000115AF', U'\U000115D7' })); + REQUIRE(not_in(fn, { U'\U000115DC', U'\U000115FF' })); + REQUIRE(not_in(fn, { U'\U00011630', U'\U00011643' })); + REQUIRE(not_in(fn, { U'\U00011645', U'\U0001167F' })); + REQUIRE(not_in(fn, { U'\U000116AB', U'\U000116B7' })); + REQUIRE(not_in(fn, { U'\U000116B9', U'\U000116FF' })); + REQUIRE(not_in(fn, { U'\U0001171B', U'\U000117FF' })); + REQUIRE(not_in(fn, { U'\U0001182C', U'\U0001189F' })); + REQUIRE(not_in(fn, { U'\U000118E0', U'\U000118FE' })); + REQUIRE(not_in(fn, { U'\U00011907', U'\U00011908' })); + REQUIRE(not_in(fn, { U'\U0001190A', U'\U0001190B' })); + REQUIRE(not_in(fn, { U'\U00011930', U'\U0001193E' })); + REQUIRE(not_in(fn, { U'\U00011942', U'\U0001199F' })); + REQUIRE(not_in(fn, { U'\U000119A8', U'\U000119A9' })); + REQUIRE(not_in(fn, { U'\U000119D1', U'\U000119E0' })); + REQUIRE(not_in(fn, { U'\U000119E4', U'\U000119FF' })); + REQUIRE(not_in(fn, { U'\U00011A01', U'\U00011A0A' })); + REQUIRE(not_in(fn, { U'\U00011A33', U'\U00011A39' })); + REQUIRE(not_in(fn, { U'\U00011A3B', U'\U00011A4F' })); + REQUIRE(not_in(fn, { U'\U00011A51', U'\U00011A5B' })); + REQUIRE(not_in(fn, { U'\U00011A8A', U'\U00011A9C' })); + REQUIRE(not_in(fn, { U'\U00011A9E', U'\U00011ABF' })); + REQUIRE(not_in(fn, { U'\U00011AF9', U'\U00011BFF' })); + REQUIRE(not_in(fn, { U'\U00011C2F', U'\U00011C3F' })); + REQUIRE(not_in(fn, { U'\U00011C41', U'\U00011C71' })); + REQUIRE(not_in(fn, { U'\U00011C90', U'\U00011CFF' })); + REQUIRE(not_in(fn, { U'\U00011D31', U'\U00011D45' })); + REQUIRE(not_in(fn, { U'\U00011D47', U'\U00011D5F' })); + REQUIRE(not_in(fn, { U'\U00011D8A', U'\U00011D97' })); + REQUIRE(not_in(fn, { U'\U00011D99', U'\U00011EDF' })); + REQUIRE(not_in(fn, { U'\U00011EF3', U'\U00011FAF' })); + REQUIRE(not_in(fn, { U'\U00011FB1', U'\U00011FFF' })); + REQUIRE(not_in(fn, { U'\U0001239A', U'\U0001247F' })); + REQUIRE(not_in(fn, { U'\U00012544', U'\U00012FFF' })); + REQUIRE(not_in(fn, { U'\U0001342F', U'\U000143FF' })); + REQUIRE(not_in(fn, { U'\U00014647', U'\U000167FF' })); + REQUIRE(not_in(fn, { U'\U00016A39', U'\U00016A3F' })); + REQUIRE(not_in(fn, { U'\U00016A5F', U'\U00016ACF' })); + REQUIRE(not_in(fn, { U'\U00016AEE', U'\U00016AFF' })); + REQUIRE(not_in(fn, { U'\U00016B30', U'\U00016B3F' })); + REQUIRE(not_in(fn, { U'\U00016B44', U'\U00016B62' })); + REQUIRE(not_in(fn, { U'\U00016B78', U'\U00016B7C' })); + REQUIRE(not_in(fn, { U'\U00016B90', U'\U00016E3F' })); + REQUIRE(not_in(fn, { U'\U00016E80', U'\U00016EFF' })); + REQUIRE(not_in(fn, { U'\U00016F4B', U'\U00016F4F' })); + REQUIRE(not_in(fn, { U'\U00016F51', U'\U00016F92' })); + REQUIRE(not_in(fn, { U'\U00016FA0', U'\U00016FDF' })); + REQUIRE(not_in(fn, { U'\U00016FE4', U'\U00016FFF' })); + REQUIRE(not_in(fn, { U'\U000187F7', U'\U000187FF' })); + REQUIRE(not_in(fn, { U'\U00018CD6', U'\U00018CFF' })); + REQUIRE(not_in(fn, { U'\U00018D08', U'\U0001AFFF' })); + REQUIRE(not_in(fn, { U'\U0001B11F', U'\U0001B14F' })); + REQUIRE(not_in(fn, { U'\U0001B153', U'\U0001B163' })); + REQUIRE(not_in(fn, { U'\U0001B168', U'\U0001B16F' })); + REQUIRE(not_in(fn, { U'\U0001B2FC', U'\U0001BBFF' })); + REQUIRE(not_in(fn, { U'\U0001BC6B', U'\U0001BC6F' })); + REQUIRE(not_in(fn, { U'\U0001BC7D', U'\U0001BC7F' })); + REQUIRE(not_in(fn, { U'\U0001BC89', U'\U0001BC8F' })); + REQUIRE(not_in(fn, { U'\U0001BC9A', U'\U0001D3FF' })); + REQUIRE(not_in(fn, { U'\U0001D4A0', U'\U0001D4A1' })); + REQUIRE(not_in(fn, { U'\U0001D4A3', U'\U0001D4A4' })); + REQUIRE(not_in(fn, { U'\U0001D4A7', U'\U0001D4A8' })); + REQUIRE(not_in(fn, { U'\U0001D50B', U'\U0001D50C' })); + REQUIRE(not_in(fn, { U'\U0001D547', U'\U0001D549' })); + REQUIRE(not_in(fn, { U'\U0001D6A6', U'\U0001D6A7' })); + REQUIRE(not_in(fn, { U'\U0001D7CC', U'\U0001E0FF' })); + REQUIRE(not_in(fn, { U'\U0001E12D', U'\U0001E136' })); + REQUIRE(not_in(fn, { U'\U0001E13E', U'\U0001E14D' })); + REQUIRE(not_in(fn, { U'\U0001E14F', U'\U0001E2BF' })); + REQUIRE(not_in(fn, { U'\U0001E2EC', U'\U0001E7FF' })); + REQUIRE(not_in(fn, { U'\U0001E8C5', U'\U0001E8FF' })); + REQUIRE(not_in(fn, { U'\U0001E944', U'\U0001E94A' })); + REQUIRE(not_in(fn, { U'\U0001E94C', U'\U0001EDFF' })); + REQUIRE(not_in(fn, { U'\U0001EE25', U'\U0001EE26' })); + REQUIRE(not_in(fn, { U'\U0001EE3C', U'\U0001EE41' })); + REQUIRE(not_in(fn, { U'\U0001EE43', U'\U0001EE46' })); + REQUIRE(not_in(fn, { U'\U0001EE55', U'\U0001EE56' })); + REQUIRE(not_in(fn, { U'\U0001EE65', U'\U0001EE66' })); + REQUIRE(not_in(fn, { U'\U0001EE9C', U'\U0001EEA0' })); + REQUIRE(not_in(fn, { U'\U0001EEBC', U'\U0001FFFF' })); + REQUIRE(not_in(fn, { U'\U0002A6DD', U'\U0002A6FF' })); + REQUIRE(not_in(fn, { U'\U0002B734', U'\U0002B73F' })); + REQUIRE(not_in(fn, { U'\U0002B81D', U'\U0002B81F' })); + REQUIRE(not_in(fn, { U'\U0002CEA1', U'\U0002CEAF' })); + REQUIRE(not_in(fn, { U'\U0002EBE0', U'\U0002F7FF' })); + REQUIRE(not_in(fn, { U'\U0002FA1E', U'\U0002FFFF' })); + REQUIRE(not_in(fn, { U'\U0003134A', U'\U0010FFFF' })); + + // individual values which should return false + REQUIRE(!fn(U'\u00D7')); + REQUIRE(!fn(U'\u00F7')); + REQUIRE(!fn(U'\u02ED')); + REQUIRE(!fn(U'\u0375')); + REQUIRE(!fn(U'\u037E')); + REQUIRE(!fn(U'\u0387')); + REQUIRE(!fn(U'\u038B')); + REQUIRE(!fn(U'\u038D')); + REQUIRE(!fn(U'\u03A2')); + REQUIRE(!fn(U'\u03F6')); + REQUIRE(!fn(U'\u0530')); + REQUIRE(!fn(U'\u0670')); + REQUIRE(!fn(U'\u06D4')); + REQUIRE(!fn(U'\u0711')); + REQUIRE(!fn(U'\u08B5')); + REQUIRE(!fn(U'\u09A9')); + REQUIRE(!fn(U'\u09B1')); + REQUIRE(!fn(U'\u09DE')); + REQUIRE(!fn(U'\u0A29')); + REQUIRE(!fn(U'\u0A31')); + REQUIRE(!fn(U'\u0A34')); + REQUIRE(!fn(U'\u0A37')); + REQUIRE(!fn(U'\u0A5D')); + REQUIRE(!fn(U'\u0A8E')); + REQUIRE(!fn(U'\u0A92')); + REQUIRE(!fn(U'\u0AA9')); + REQUIRE(!fn(U'\u0AB1')); + REQUIRE(!fn(U'\u0AB4')); + REQUIRE(!fn(U'\u0B29')); + REQUIRE(!fn(U'\u0B31')); + REQUIRE(!fn(U'\u0B34')); + REQUIRE(!fn(U'\u0B5E')); + REQUIRE(!fn(U'\u0B84')); + REQUIRE(!fn(U'\u0B91')); + REQUIRE(!fn(U'\u0B9B')); + REQUIRE(!fn(U'\u0B9D')); + REQUIRE(!fn(U'\u0C0D')); + REQUIRE(!fn(U'\u0C11')); + REQUIRE(!fn(U'\u0C29')); + REQUIRE(!fn(U'\u0C8D')); + REQUIRE(!fn(U'\u0C91')); + REQUIRE(!fn(U'\u0CA9')); + REQUIRE(!fn(U'\u0CB4')); + REQUIRE(!fn(U'\u0CDF')); + REQUIRE(!fn(U'\u0D0D')); + REQUIRE(!fn(U'\u0D11')); + REQUIRE(!fn(U'\u0DB2')); + REQUIRE(!fn(U'\u0DBC')); + REQUIRE(!fn(U'\u0E31')); + REQUIRE(!fn(U'\u0E83')); + REQUIRE(!fn(U'\u0E85')); + REQUIRE(!fn(U'\u0E8B')); + REQUIRE(!fn(U'\u0EA4')); + REQUIRE(!fn(U'\u0EA6')); + REQUIRE(!fn(U'\u0EB1')); + REQUIRE(!fn(U'\u0EC5')); + REQUIRE(!fn(U'\u0F48')); + REQUIRE(!fn(U'\u10C6')); + REQUIRE(!fn(U'\u10FB')); + REQUIRE(!fn(U'\u1249')); + REQUIRE(!fn(U'\u1257')); + REQUIRE(!fn(U'\u1259')); + REQUIRE(!fn(U'\u1289')); + REQUIRE(!fn(U'\u12B1')); + REQUIRE(!fn(U'\u12BF')); + REQUIRE(!fn(U'\u12C1')); + REQUIRE(!fn(U'\u12D7')); + REQUIRE(!fn(U'\u1311')); + REQUIRE(!fn(U'\u1680')); + REQUIRE(!fn(U'\u170D')); + REQUIRE(!fn(U'\u176D')); + REQUIRE(!fn(U'\u18A9')); + REQUIRE(!fn(U'\u1CED')); + REQUIRE(!fn(U'\u1CF4')); + REQUIRE(!fn(U'\u1F58')); + REQUIRE(!fn(U'\u1F5A')); + REQUIRE(!fn(U'\u1F5C')); + REQUIRE(!fn(U'\u1F5E')); + REQUIRE(!fn(U'\u1FB5')); + REQUIRE(!fn(U'\u1FBD')); + REQUIRE(!fn(U'\u1FC5')); + REQUIRE(!fn(U'\u1FF5')); + REQUIRE(!fn(U'\u2114')); + REQUIRE(!fn(U'\u2125')); + REQUIRE(!fn(U'\u2127')); + REQUIRE(!fn(U'\u2129')); + REQUIRE(!fn(U'\u212E')); + REQUIRE(!fn(U'\u2C2F')); + REQUIRE(!fn(U'\u2C5F')); + REQUIRE(!fn(U'\u2D26')); + REQUIRE(!fn(U'\u2DA7')); + REQUIRE(!fn(U'\u2DAF')); + REQUIRE(!fn(U'\u2DB7')); + REQUIRE(!fn(U'\u2DBF')); + REQUIRE(!fn(U'\u2DC7')); + REQUIRE(!fn(U'\u2DCF')); + REQUIRE(!fn(U'\u2DD7')); + REQUIRE(!fn(U'\u30A0')); + REQUIRE(!fn(U'\u30FB')); + REQUIRE(!fn(U'\u3130')); + REQUIRE(!fn(U'\uA802')); + REQUIRE(!fn(U'\uA806')); + REQUIRE(!fn(U'\uA80B')); + REQUIRE(!fn(U'\uA8FC')); + REQUIRE(!fn(U'\uA9E5')); + REQUIRE(!fn(U'\uA9FF')); + REQUIRE(!fn(U'\uAA43')); + REQUIRE(!fn(U'\uAAB0')); + REQUIRE(!fn(U'\uAAC1')); + REQUIRE(!fn(U'\uAB27')); + REQUIRE(!fn(U'\uAB2F')); + REQUIRE(!fn(U'\uAB5B')); + REQUIRE(!fn(U'\uFB1E')); + REQUIRE(!fn(U'\uFB29')); + REQUIRE(!fn(U'\uFB37')); + REQUIRE(!fn(U'\uFB3D')); + REQUIRE(!fn(U'\uFB3F')); + REQUIRE(!fn(U'\uFB42')); + REQUIRE(!fn(U'\uFB45')); + REQUIRE(!fn(U'\uFE75')); + REQUIRE(!fn(U'\U0001000C')); + REQUIRE(!fn(U'\U00010027')); + REQUIRE(!fn(U'\U0001003B')); + REQUIRE(!fn(U'\U0001003E')); + REQUIRE(!fn(U'\U00010341')); + REQUIRE(!fn(U'\U00010809')); + REQUIRE(!fn(U'\U00010836')); + REQUIRE(!fn(U'\U000108F3')); + REQUIRE(!fn(U'\U00010A14')); + REQUIRE(!fn(U'\U00010A18')); + REQUIRE(!fn(U'\U00010AC8')); + REQUIRE(!fn(U'\U000111DB')); + REQUIRE(!fn(U'\U00011212')); + REQUIRE(!fn(U'\U00011287')); + REQUIRE(!fn(U'\U00011289')); + REQUIRE(!fn(U'\U0001128E')); + REQUIRE(!fn(U'\U0001129E')); + REQUIRE(!fn(U'\U00011329')); + REQUIRE(!fn(U'\U00011331')); + REQUIRE(!fn(U'\U00011334')); + REQUIRE(!fn(U'\U000114C6')); + REQUIRE(!fn(U'\U00011914')); + REQUIRE(!fn(U'\U00011917')); + REQUIRE(!fn(U'\U00011940')); + REQUIRE(!fn(U'\U000119E2')); + REQUIRE(!fn(U'\U00011C09')); + REQUIRE(!fn(U'\U00011D07')); + REQUIRE(!fn(U'\U00011D0A')); + REQUIRE(!fn(U'\U00011D66')); + REQUIRE(!fn(U'\U00011D69')); + REQUIRE(!fn(U'\U00016FE2')); + REQUIRE(!fn(U'\U0001D455')); + REQUIRE(!fn(U'\U0001D49D')); + REQUIRE(!fn(U'\U0001D4AD')); + REQUIRE(!fn(U'\U0001D4BA')); + REQUIRE(!fn(U'\U0001D4BC')); + REQUIRE(!fn(U'\U0001D4C4')); + REQUIRE(!fn(U'\U0001D506')); + REQUIRE(!fn(U'\U0001D515')); + REQUIRE(!fn(U'\U0001D51D')); + REQUIRE(!fn(U'\U0001D53A')); + REQUIRE(!fn(U'\U0001D53F')); + REQUIRE(!fn(U'\U0001D545')); + REQUIRE(!fn(U'\U0001D551')); + REQUIRE(!fn(U'\U0001D6C1')); + REQUIRE(!fn(U'\U0001D6DB')); + REQUIRE(!fn(U'\U0001D6FB')); + REQUIRE(!fn(U'\U0001D715')); + REQUIRE(!fn(U'\U0001D735')); + REQUIRE(!fn(U'\U0001D74F')); + REQUIRE(!fn(U'\U0001D76F')); + REQUIRE(!fn(U'\U0001D789')); + REQUIRE(!fn(U'\U0001D7A9')); + REQUIRE(!fn(U'\U0001D7C3')); + REQUIRE(!fn(U'\U0001EE04')); + REQUIRE(!fn(U'\U0001EE20')); + REQUIRE(!fn(U'\U0001EE23')); + REQUIRE(!fn(U'\U0001EE28')); + REQUIRE(!fn(U'\U0001EE33')); + REQUIRE(!fn(U'\U0001EE38')); + REQUIRE(!fn(U'\U0001EE3A')); + REQUIRE(!fn(U'\U0001EE48')); + REQUIRE(!fn(U'\U0001EE4A')); + REQUIRE(!fn(U'\U0001EE4C')); + REQUIRE(!fn(U'\U0001EE50')); + REQUIRE(!fn(U'\U0001EE53')); + REQUIRE(!fn(U'\U0001EE58')); + REQUIRE(!fn(U'\U0001EE5A')); + REQUIRE(!fn(U'\U0001EE5C')); + REQUIRE(!fn(U'\U0001EE5E')); + REQUIRE(!fn(U'\U0001EE60')); + REQUIRE(!fn(U'\U0001EE63')); + REQUIRE(!fn(U'\U0001EE6B')); + REQUIRE(!fn(U'\U0001EE73')); + REQUIRE(!fn(U'\U0001EE78')); + REQUIRE(!fn(U'\U0001EE7D')); + REQUIRE(!fn(U'\U0001EE7F')); + REQUIRE(!fn(U'\U0001EE8A')); + REQUIRE(!fn(U'\U0001EEA4')); + REQUIRE(!fn(U'\U0001EEAA')); + } + + //----- is_unicode_number ----------------------------------------------------------- + { + INFO("is_unicode_number"sv) + static constexpr auto fn = is_unicode_number; + + // contiguous ranges of values which should return true + REQUIRE(in(fn, { U'\u0660', U'\u0669' })); + REQUIRE(in(fn, { U'\u06F0', U'\u06F9' })); + REQUIRE(in(fn, { U'\u07C0', U'\u07C9' })); + REQUIRE(in(fn, { U'\u0966', U'\u096F' })); + REQUIRE(in(fn, { U'\u09E6', U'\u09EF' })); + REQUIRE(in(fn, { U'\u0A66', U'\u0A6F' })); + REQUIRE(in(fn, { U'\u0AE6', U'\u0AEF' })); + REQUIRE(in(fn, { U'\u0B66', U'\u0B6F' })); + REQUIRE(in(fn, { U'\u0BE6', U'\u0BEF' })); + REQUIRE(in(fn, { U'\u0C66', U'\u0C6F' })); + REQUIRE(in(fn, { U'\u0CE6', U'\u0CEF' })); + REQUIRE(in(fn, { U'\u0D66', U'\u0D6F' })); + REQUIRE(in(fn, { U'\u0DE6', U'\u0DEF' })); + REQUIRE(in(fn, { U'\u0E50', U'\u0E59' })); + REQUIRE(in(fn, { U'\u0ED0', U'\u0ED9' })); + REQUIRE(in(fn, { U'\u0F20', U'\u0F29' })); + REQUIRE(in(fn, { U'\u1040', U'\u1049' })); + REQUIRE(in(fn, { U'\u1090', U'\u1099' })); + REQUIRE(in(fn, { U'\u16EE', U'\u16F0' })); + REQUIRE(in(fn, { U'\u17E0', U'\u17E9' })); + REQUIRE(in(fn, { U'\u1810', U'\u1819' })); + REQUIRE(in(fn, { U'\u1946', U'\u194F' })); + REQUIRE(in(fn, { U'\u19D0', U'\u19D9' })); + REQUIRE(in(fn, { U'\u1A80', U'\u1A89' })); + REQUIRE(in(fn, { U'\u1A90', U'\u1A99' })); + REQUIRE(in(fn, { U'\u1B50', U'\u1B59' })); + REQUIRE(in(fn, { U'\u1BB0', U'\u1BB9' })); + REQUIRE(in(fn, { U'\u1C40', U'\u1C49' })); + REQUIRE(in(fn, { U'\u1C50', U'\u1C59' })); + REQUIRE(in(fn, { U'\u2160', U'\u2182' })); + REQUIRE(in(fn, { U'\u2185', U'\u2188' })); + REQUIRE(in(fn, { U'\u3021', U'\u3029' })); + REQUIRE(in(fn, { U'\u3038', U'\u303A' })); + REQUIRE(in(fn, { U'\uA620', U'\uA629' })); + REQUIRE(in(fn, { U'\uA6E6', U'\uA6EF' })); + REQUIRE(in(fn, { U'\uA8D0', U'\uA8D9' })); + REQUIRE(in(fn, { U'\uA900', U'\uA909' })); + REQUIRE(in(fn, { U'\uA9D0', U'\uA9D9' })); + REQUIRE(in(fn, { U'\uA9F0', U'\uA9F9' })); + REQUIRE(in(fn, { U'\uAA50', U'\uAA59' })); + REQUIRE(in(fn, { U'\uABF0', U'\uABF9' })); + REQUIRE(in(fn, { U'\uFF10', U'\uFF19' })); + REQUIRE(in(fn, { U'\U00010140', U'\U00010174' })); + REQUIRE(in(fn, { U'\U000103D1', U'\U000103D5' })); + REQUIRE(in(fn, { U'\U000104A0', U'\U000104A9' })); + REQUIRE(in(fn, { U'\U00010D30', U'\U00010D39' })); + REQUIRE(in(fn, { U'\U00011066', U'\U0001106F' })); + REQUIRE(in(fn, { U'\U000110F0', U'\U000110F9' })); + REQUIRE(in(fn, { U'\U00011136', U'\U0001113F' })); + REQUIRE(in(fn, { U'\U000111D0', U'\U000111D9' })); + REQUIRE(in(fn, { U'\U000112F0', U'\U000112F9' })); + REQUIRE(in(fn, { U'\U00011450', U'\U00011459' })); + REQUIRE(in(fn, { U'\U000114D0', U'\U000114D9' })); + REQUIRE(in(fn, { U'\U00011650', U'\U00011659' })); + REQUIRE(in(fn, { U'\U000116C0', U'\U000116C9' })); + REQUIRE(in(fn, { U'\U00011730', U'\U00011739' })); + REQUIRE(in(fn, { U'\U000118E0', U'\U000118E9' })); + REQUIRE(in(fn, { U'\U00011950', U'\U00011959' })); + REQUIRE(in(fn, { U'\U00011C50', U'\U00011C59' })); + REQUIRE(in(fn, { U'\U00011D50', U'\U00011D59' })); + REQUIRE(in(fn, { U'\U00011DA0', U'\U00011DA9' })); + REQUIRE(in(fn, { U'\U00012400', U'\U0001246E' })); + REQUIRE(in(fn, { U'\U00016A60', U'\U00016A69' })); + REQUIRE(in(fn, { U'\U00016B50', U'\U00016B59' })); + REQUIRE(in(fn, { U'\U0001D7CE', U'\U0001D7FF' })); + REQUIRE(in(fn, { U'\U0001E140', U'\U0001E149' })); + REQUIRE(in(fn, { U'\U0001E2F0', U'\U0001E2F9' })); + REQUIRE(in(fn, { U'\U0001E950', U'\U0001E959' })); + REQUIRE(in(fn, { U'\U0001FBF0', U'\U0001FBF9' })); + + // individual values which should return true + REQUIRE(fn(U'\u3007')); + REQUIRE(fn(U'\U00010341')); + REQUIRE(fn(U'\U0001034A')); + + // contiguous ranges of values which should return false + REQUIRE(not_in(fn, { U'\u0000', U'\u065F' })); + REQUIRE(not_in(fn, { U'\u066A', U'\u06EF' })); + REQUIRE(not_in(fn, { U'\u06FA', U'\u07BF' })); + REQUIRE(not_in(fn, { U'\u07CA', U'\u0965' })); + REQUIRE(not_in(fn, { U'\u0970', U'\u09E5' })); + REQUIRE(not_in(fn, { U'\u09F0', U'\u0A65' })); + REQUIRE(not_in(fn, { U'\u0A70', U'\u0AE5' })); + REQUIRE(not_in(fn, { U'\u0AF0', U'\u0B65' })); + REQUIRE(not_in(fn, { U'\u0B70', U'\u0BE5' })); + REQUIRE(not_in(fn, { U'\u0BF0', U'\u0C65' })); + REQUIRE(not_in(fn, { U'\u0C70', U'\u0CE5' })); + REQUIRE(not_in(fn, { U'\u0CF0', U'\u0D65' })); + REQUIRE(not_in(fn, { U'\u0D70', U'\u0DE5' })); + REQUIRE(not_in(fn, { U'\u0DF0', U'\u0E4F' })); + REQUIRE(not_in(fn, { U'\u0E5A', U'\u0ECF' })); + REQUIRE(not_in(fn, { U'\u0EDA', U'\u0F1F' })); + REQUIRE(not_in(fn, { U'\u0F2A', U'\u103F' })); + REQUIRE(not_in(fn, { U'\u104A', U'\u108F' })); + REQUIRE(not_in(fn, { U'\u109A', U'\u16ED' })); + REQUIRE(not_in(fn, { U'\u16F1', U'\u17DF' })); + REQUIRE(not_in(fn, { U'\u17EA', U'\u180F' })); + REQUIRE(not_in(fn, { U'\u181A', U'\u1945' })); + REQUIRE(not_in(fn, { U'\u1950', U'\u19CF' })); + REQUIRE(not_in(fn, { U'\u19DA', U'\u1A7F' })); + REQUIRE(not_in(fn, { U'\u1A8A', U'\u1A8F' })); + REQUIRE(not_in(fn, { U'\u1A9A', U'\u1B4F' })); + REQUIRE(not_in(fn, { U'\u1B5A', U'\u1BAF' })); + REQUIRE(not_in(fn, { U'\u1BBA', U'\u1C3F' })); + REQUIRE(not_in(fn, { U'\u1C4A', U'\u1C4F' })); + REQUIRE(not_in(fn, { U'\u1C5A', U'\u215F' })); + REQUIRE(not_in(fn, { U'\u2183', U'\u2184' })); + REQUIRE(not_in(fn, { U'\u2189', U'\u3006' })); + REQUIRE(not_in(fn, { U'\u3008', U'\u3020' })); + REQUIRE(not_in(fn, { U'\u302A', U'\u3037' })); + REQUIRE(not_in(fn, { U'\u303B', U'\uA61F' })); + REQUIRE(not_in(fn, { U'\uA62A', U'\uA6E5' })); + REQUIRE(not_in(fn, { U'\uA6F0', U'\uA8CF' })); + REQUIRE(not_in(fn, { U'\uA8DA', U'\uA8FF' })); + REQUIRE(not_in(fn, { U'\uA90A', U'\uA9CF' })); + REQUIRE(not_in(fn, { U'\uA9DA', U'\uA9EF' })); + REQUIRE(not_in(fn, { U'\uA9FA', U'\uAA4F' })); + REQUIRE(not_in(fn, { U'\uAA5A', U'\uABEF' })); + REQUIRE(not_in(fn, { U'\uABFA', U'\uFF0F' })); + REQUIRE(not_in(fn, { U'\uFF1A', U'\U0001013F' })); + REQUIRE(not_in(fn, { U'\U00010175', U'\U00010340' })); + REQUIRE(not_in(fn, { U'\U00010342', U'\U00010349' })); + REQUIRE(not_in(fn, { U'\U0001034B', U'\U000103D0' })); + REQUIRE(not_in(fn, { U'\U000103D6', U'\U0001049F' })); + REQUIRE(not_in(fn, { U'\U000104AA', U'\U00010D2F' })); + REQUIRE(not_in(fn, { U'\U00010D3A', U'\U00011065' })); + REQUIRE(not_in(fn, { U'\U00011070', U'\U000110EF' })); + REQUIRE(not_in(fn, { U'\U000110FA', U'\U00011135' })); + REQUIRE(not_in(fn, { U'\U00011140', U'\U000111CF' })); + REQUIRE(not_in(fn, { U'\U000111DA', U'\U000112EF' })); + REQUIRE(not_in(fn, { U'\U000112FA', U'\U0001144F' })); + REQUIRE(not_in(fn, { U'\U0001145A', U'\U000114CF' })); + REQUIRE(not_in(fn, { U'\U000114DA', U'\U0001164F' })); + REQUIRE(not_in(fn, { U'\U0001165A', U'\U000116BF' })); + REQUIRE(not_in(fn, { U'\U000116CA', U'\U0001172F' })); + REQUIRE(not_in(fn, { U'\U0001173A', U'\U000118DF' })); + REQUIRE(not_in(fn, { U'\U000118EA', U'\U0001194F' })); + REQUIRE(not_in(fn, { U'\U0001195A', U'\U00011C4F' })); + REQUIRE(not_in(fn, { U'\U00011C5A', U'\U00011D4F' })); + REQUIRE(not_in(fn, { U'\U00011D5A', U'\U00011D9F' })); + REQUIRE(not_in(fn, { U'\U00011DAA', U'\U000123FF' })); + REQUIRE(not_in(fn, { U'\U0001246F', U'\U00016A5F' })); + REQUIRE(not_in(fn, { U'\U00016A6A', U'\U00016B4F' })); + REQUIRE(not_in(fn, { U'\U00016B5A', U'\U0001D7CD' })); + REQUIRE(not_in(fn, { U'\U0001D800', U'\U0001E13F' })); + REQUIRE(not_in(fn, { U'\U0001E14A', U'\U0001E2EF' })); + REQUIRE(not_in(fn, { U'\U0001E2FA', U'\U0001E94F' })); + REQUIRE(not_in(fn, { U'\U0001E95A', U'\U0001FBEF' })); + REQUIRE(not_in(fn, { U'\U0001FBFA', U'\U0010FFFF' })); + } + + //----- is_unicode_combining_mark --------------------------------------------------- + { + INFO("is_unicode_combining_mark"sv) + static constexpr auto fn = is_unicode_combining_mark; + + // contiguous ranges of values which should return true + REQUIRE(in(fn, { U'\u0300', U'\u036F' })); + REQUIRE(in(fn, { U'\u0483', U'\u0487' })); + REQUIRE(in(fn, { U'\u0591', U'\u05BD' })); + REQUIRE(in(fn, { U'\u05C1', U'\u05C2' })); + REQUIRE(in(fn, { U'\u05C4', U'\u05C5' })); + REQUIRE(in(fn, { U'\u0610', U'\u061A' })); + REQUIRE(in(fn, { U'\u064B', U'\u065F' })); + REQUIRE(in(fn, { U'\u06D6', U'\u06DC' })); + REQUIRE(in(fn, { U'\u06DF', U'\u06E4' })); + REQUIRE(in(fn, { U'\u06E7', U'\u06E8' })); + REQUIRE(in(fn, { U'\u06EA', U'\u06ED' })); + REQUIRE(in(fn, { U'\u0730', U'\u074A' })); + REQUIRE(in(fn, { U'\u07A6', U'\u07B0' })); + REQUIRE(in(fn, { U'\u07EB', U'\u07F3' })); + REQUIRE(in(fn, { U'\u0816', U'\u0819' })); + REQUIRE(in(fn, { U'\u081B', U'\u0823' })); + REQUIRE(in(fn, { U'\u0825', U'\u0827' })); + REQUIRE(in(fn, { U'\u0829', U'\u082D' })); + REQUIRE(in(fn, { U'\u0859', U'\u085B' })); + REQUIRE(in(fn, { U'\u08D3', U'\u08E1' })); + REQUIRE(in(fn, { U'\u08E3', U'\u0903' })); + REQUIRE(in(fn, { U'\u093A', U'\u093C' })); + REQUIRE(in(fn, { U'\u093E', U'\u094F' })); + REQUIRE(in(fn, { U'\u0951', U'\u0957' })); + REQUIRE(in(fn, { U'\u0962', U'\u0963' })); + REQUIRE(in(fn, { U'\u0981', U'\u0983' })); + REQUIRE(in(fn, { U'\u09BE', U'\u09C4' })); + REQUIRE(in(fn, { U'\u09C7', U'\u09C8' })); + REQUIRE(in(fn, { U'\u09CB', U'\u09CD' })); + REQUIRE(in(fn, { U'\u09E2', U'\u09E3' })); + REQUIRE(in(fn, { U'\u0A01', U'\u0A03' })); + REQUIRE(in(fn, { U'\u0A3E', U'\u0A42' })); + REQUIRE(in(fn, { U'\u0A47', U'\u0A48' })); + REQUIRE(in(fn, { U'\u0A4B', U'\u0A4D' })); + REQUIRE(in(fn, { U'\u0A70', U'\u0A71' })); + REQUIRE(in(fn, { U'\u0A81', U'\u0A83' })); + REQUIRE(in(fn, { U'\u0ABE', U'\u0AC5' })); + REQUIRE(in(fn, { U'\u0AC7', U'\u0AC9' })); + REQUIRE(in(fn, { U'\u0ACB', U'\u0ACD' })); + REQUIRE(in(fn, { U'\u0AE2', U'\u0AE3' })); + REQUIRE(in(fn, { U'\u0AFA', U'\u0AFF' })); + REQUIRE(in(fn, { U'\u0B01', U'\u0B03' })); + REQUIRE(in(fn, { U'\u0B3E', U'\u0B44' })); + REQUIRE(in(fn, { U'\u0B47', U'\u0B48' })); + REQUIRE(in(fn, { U'\u0B4B', U'\u0B4D' })); + REQUIRE(in(fn, { U'\u0B55', U'\u0B57' })); + REQUIRE(in(fn, { U'\u0B62', U'\u0B63' })); + REQUIRE(in(fn, { U'\u0BBE', U'\u0BC2' })); + REQUIRE(in(fn, { U'\u0BC6', U'\u0BC8' })); + REQUIRE(in(fn, { U'\u0BCA', U'\u0BCD' })); + REQUIRE(in(fn, { U'\u0C00', U'\u0C04' })); + REQUIRE(in(fn, { U'\u0C3E', U'\u0C44' })); + REQUIRE(in(fn, { U'\u0C46', U'\u0C48' })); + REQUIRE(in(fn, { U'\u0C4A', U'\u0C4D' })); + REQUIRE(in(fn, { U'\u0C55', U'\u0C56' })); + REQUIRE(in(fn, { U'\u0C62', U'\u0C63' })); + REQUIRE(in(fn, { U'\u0C81', U'\u0C83' })); + REQUIRE(in(fn, { U'\u0CBE', U'\u0CC4' })); + REQUIRE(in(fn, { U'\u0CC6', U'\u0CC8' })); + REQUIRE(in(fn, { U'\u0CCA', U'\u0CCD' })); + REQUIRE(in(fn, { U'\u0CD5', U'\u0CD6' })); + REQUIRE(in(fn, { U'\u0CE2', U'\u0CE3' })); + REQUIRE(in(fn, { U'\u0D00', U'\u0D03' })); + REQUIRE(in(fn, { U'\u0D3B', U'\u0D3C' })); + REQUIRE(in(fn, { U'\u0D3E', U'\u0D44' })); + REQUIRE(in(fn, { U'\u0D46', U'\u0D48' })); + REQUIRE(in(fn, { U'\u0D4A', U'\u0D4D' })); + REQUIRE(in(fn, { U'\u0D62', U'\u0D63' })); + REQUIRE(in(fn, { U'\u0D81', U'\u0D83' })); + REQUIRE(in(fn, { U'\u0DCF', U'\u0DD4' })); + REQUIRE(in(fn, { U'\u0DD8', U'\u0DDF' })); + REQUIRE(in(fn, { U'\u0DF2', U'\u0DF3' })); + REQUIRE(in(fn, { U'\u0E34', U'\u0E3A' })); + REQUIRE(in(fn, { U'\u0E47', U'\u0E4E' })); + REQUIRE(in(fn, { U'\u0EB4', U'\u0EBC' })); + REQUIRE(in(fn, { U'\u0EC8', U'\u0ECD' })); + REQUIRE(in(fn, { U'\u0F18', U'\u0F19' })); + REQUIRE(in(fn, { U'\u0F3E', U'\u0F3F' })); + REQUIRE(in(fn, { U'\u0F71', U'\u0F84' })); + REQUIRE(in(fn, { U'\u0F86', U'\u0F87' })); + REQUIRE(in(fn, { U'\u0F8D', U'\u0F97' })); + REQUIRE(in(fn, { U'\u0F99', U'\u0FBC' })); + REQUIRE(in(fn, { U'\u102B', U'\u103E' })); + REQUIRE(in(fn, { U'\u1056', U'\u1059' })); + REQUIRE(in(fn, { U'\u105E', U'\u1060' })); + REQUIRE(in(fn, { U'\u1062', U'\u1064' })); + REQUIRE(in(fn, { U'\u1067', U'\u106D' })); + REQUIRE(in(fn, { U'\u1071', U'\u1074' })); + REQUIRE(in(fn, { U'\u1082', U'\u108D' })); + REQUIRE(in(fn, { U'\u109A', U'\u109D' })); + REQUIRE(in(fn, { U'\u135D', U'\u135F' })); + REQUIRE(in(fn, { U'\u1712', U'\u1714' })); + REQUIRE(in(fn, { U'\u1732', U'\u1734' })); + REQUIRE(in(fn, { U'\u1752', U'\u1753' })); + REQUIRE(in(fn, { U'\u1772', U'\u1773' })); + REQUIRE(in(fn, { U'\u17B4', U'\u17D3' })); + REQUIRE(in(fn, { U'\u180B', U'\u180D' })); + REQUIRE(in(fn, { U'\u1885', U'\u1886' })); + REQUIRE(in(fn, { U'\u1920', U'\u192B' })); + REQUIRE(in(fn, { U'\u1930', U'\u193B' })); + REQUIRE(in(fn, { U'\u1A17', U'\u1A1B' })); + REQUIRE(in(fn, { U'\u1A55', U'\u1A5E' })); + REQUIRE(in(fn, { U'\u1A60', U'\u1A7C' })); + REQUIRE(in(fn, { U'\u1AB0', U'\u1ABD' })); + REQUIRE(in(fn, { U'\u1ABF', U'\u1AC0' })); + REQUIRE(in(fn, { U'\u1B00', U'\u1B04' })); + REQUIRE(in(fn, { U'\u1B34', U'\u1B44' })); + REQUIRE(in(fn, { U'\u1B6B', U'\u1B73' })); + REQUIRE(in(fn, { U'\u1B80', U'\u1B82' })); + REQUIRE(in(fn, { U'\u1BA1', U'\u1BAD' })); + REQUIRE(in(fn, { U'\u1BE6', U'\u1BF3' })); + REQUIRE(in(fn, { U'\u1C24', U'\u1C37' })); + REQUIRE(in(fn, { U'\u1CD0', U'\u1CD2' })); + REQUIRE(in(fn, { U'\u1CD4', U'\u1CE8' })); + REQUIRE(in(fn, { U'\u1CF7', U'\u1CF9' })); + REQUIRE(in(fn, { U'\u1DC0', U'\u1DF9' })); + REQUIRE(in(fn, { U'\u1DFB', U'\u1DFF' })); + REQUIRE(in(fn, { U'\u20D0', U'\u20DC' })); + REQUIRE(in(fn, { U'\u20E5', U'\u20F0' })); + REQUIRE(in(fn, { U'\u2CEF', U'\u2CF1' })); + REQUIRE(in(fn, { U'\u2DE0', U'\u2DFF' })); + REQUIRE(in(fn, { U'\u302A', U'\u302F' })); + REQUIRE(in(fn, { U'\u3099', U'\u309A' })); + REQUIRE(in(fn, { U'\uA674', U'\uA67D' })); + REQUIRE(in(fn, { U'\uA69E', U'\uA69F' })); + REQUIRE(in(fn, { U'\uA6F0', U'\uA6F1' })); + REQUIRE(in(fn, { U'\uA823', U'\uA827' })); + REQUIRE(in(fn, { U'\uA880', U'\uA881' })); + REQUIRE(in(fn, { U'\uA8B4', U'\uA8C5' })); + REQUIRE(in(fn, { U'\uA8E0', U'\uA8F1' })); + REQUIRE(in(fn, { U'\uA926', U'\uA92D' })); + REQUIRE(in(fn, { U'\uA947', U'\uA953' })); + REQUIRE(in(fn, { U'\uA980', U'\uA983' })); + REQUIRE(in(fn, { U'\uA9B3', U'\uA9C0' })); + REQUIRE(in(fn, { U'\uAA29', U'\uAA36' })); + REQUIRE(in(fn, { U'\uAA4C', U'\uAA4D' })); + REQUIRE(in(fn, { U'\uAA7B', U'\uAA7D' })); + REQUIRE(in(fn, { U'\uAAB2', U'\uAAB4' })); + REQUIRE(in(fn, { U'\uAAB7', U'\uAAB8' })); + REQUIRE(in(fn, { U'\uAABE', U'\uAABF' })); + REQUIRE(in(fn, { U'\uAAEB', U'\uAAEF' })); + REQUIRE(in(fn, { U'\uAAF5', U'\uAAF6' })); + REQUIRE(in(fn, { U'\uABE3', U'\uABEA' })); + REQUIRE(in(fn, { U'\uABEC', U'\uABED' })); + REQUIRE(in(fn, { U'\uFE00', U'\uFE0F' })); + REQUIRE(in(fn, { U'\uFE20', U'\uFE2F' })); + REQUIRE(in(fn, { U'\U00010376', U'\U0001037A' })); + REQUIRE(in(fn, { U'\U00010A01', U'\U00010A03' })); + REQUIRE(in(fn, { U'\U00010A05', U'\U00010A06' })); + REQUIRE(in(fn, { U'\U00010A0C', U'\U00010A0F' })); + REQUIRE(in(fn, { U'\U00010A38', U'\U00010A3A' })); + REQUIRE(in(fn, { U'\U00010AE5', U'\U00010AE6' })); + REQUIRE(in(fn, { U'\U00010D24', U'\U00010D27' })); + REQUIRE(in(fn, { U'\U00010EAB', U'\U00010EAC' })); + REQUIRE(in(fn, { U'\U00010F46', U'\U00010F50' })); + REQUIRE(in(fn, { U'\U00011000', U'\U00011002' })); + REQUIRE(in(fn, { U'\U00011038', U'\U00011046' })); + REQUIRE(in(fn, { U'\U0001107F', U'\U00011082' })); + REQUIRE(in(fn, { U'\U000110B0', U'\U000110BA' })); + REQUIRE(in(fn, { U'\U00011100', U'\U00011102' })); + REQUIRE(in(fn, { U'\U00011127', U'\U00011134' })); + REQUIRE(in(fn, { U'\U00011145', U'\U00011146' })); + REQUIRE(in(fn, { U'\U00011180', U'\U00011182' })); + REQUIRE(in(fn, { U'\U000111B3', U'\U000111C0' })); + REQUIRE(in(fn, { U'\U000111C9', U'\U000111CC' })); + REQUIRE(in(fn, { U'\U000111CE', U'\U000111CF' })); + REQUIRE(in(fn, { U'\U0001122C', U'\U00011237' })); + REQUIRE(in(fn, { U'\U000112DF', U'\U000112EA' })); + REQUIRE(in(fn, { U'\U00011300', U'\U00011303' })); + REQUIRE(in(fn, { U'\U0001133B', U'\U0001133C' })); + REQUIRE(in(fn, { U'\U0001133E', U'\U00011344' })); + REQUIRE(in(fn, { U'\U00011347', U'\U00011348' })); + REQUIRE(in(fn, { U'\U0001134B', U'\U0001134D' })); + REQUIRE(in(fn, { U'\U00011362', U'\U00011363' })); + REQUIRE(in(fn, { U'\U00011366', U'\U0001136C' })); + REQUIRE(in(fn, { U'\U00011370', U'\U00011374' })); + REQUIRE(in(fn, { U'\U00011435', U'\U00011446' })); + REQUIRE(in(fn, { U'\U000114B0', U'\U000114C3' })); + REQUIRE(in(fn, { U'\U000115AF', U'\U000115B5' })); + REQUIRE(in(fn, { U'\U000115B8', U'\U000115C0' })); + REQUIRE(in(fn, { U'\U000115DC', U'\U000115DD' })); + REQUIRE(in(fn, { U'\U00011630', U'\U00011640' })); + REQUIRE(in(fn, { U'\U000116AB', U'\U000116B7' })); + REQUIRE(in(fn, { U'\U0001171D', U'\U0001172B' })); + REQUIRE(in(fn, { U'\U0001182C', U'\U0001183A' })); + REQUIRE(in(fn, { U'\U00011930', U'\U00011935' })); + REQUIRE(in(fn, { U'\U00011937', U'\U00011938' })); + REQUIRE(in(fn, { U'\U0001193B', U'\U0001193E' })); + REQUIRE(in(fn, { U'\U00011942', U'\U00011943' })); + REQUIRE(in(fn, { U'\U000119D1', U'\U000119D7' })); + REQUIRE(in(fn, { U'\U000119DA', U'\U000119E0' })); + REQUIRE(in(fn, { U'\U00011A01', U'\U00011A0A' })); + REQUIRE(in(fn, { U'\U00011A33', U'\U00011A39' })); + REQUIRE(in(fn, { U'\U00011A3B', U'\U00011A3E' })); + REQUIRE(in(fn, { U'\U00011A51', U'\U00011A5B' })); + REQUIRE(in(fn, { U'\U00011A8A', U'\U00011A99' })); + REQUIRE(in(fn, { U'\U00011C2F', U'\U00011C36' })); + REQUIRE(in(fn, { U'\U00011C38', U'\U00011C3F' })); + REQUIRE(in(fn, { U'\U00011C92', U'\U00011CA7' })); + REQUIRE(in(fn, { U'\U00011CA9', U'\U00011CB6' })); + REQUIRE(in(fn, { U'\U00011D31', U'\U00011D36' })); + REQUIRE(in(fn, { U'\U00011D3C', U'\U00011D3D' })); + REQUIRE(in(fn, { U'\U00011D3F', U'\U00011D45' })); + REQUIRE(in(fn, { U'\U00011D8A', U'\U00011D8E' })); + REQUIRE(in(fn, { U'\U00011D90', U'\U00011D91' })); + REQUIRE(in(fn, { U'\U00011D93', U'\U00011D97' })); + REQUIRE(in(fn, { U'\U00011EF3', U'\U00011EF6' })); + REQUIRE(in(fn, { U'\U00016AF0', U'\U00016AF4' })); + REQUIRE(in(fn, { U'\U00016B30', U'\U00016B36' })); + REQUIRE(in(fn, { U'\U00016F51', U'\U00016F87' })); + REQUIRE(in(fn, { U'\U00016F8F', U'\U00016F92' })); + REQUIRE(in(fn, { U'\U00016FF0', U'\U00016FF1' })); + REQUIRE(in(fn, { U'\U0001BC9D', U'\U0001BC9E' })); + REQUIRE(in(fn, { U'\U0001D165', U'\U0001D169' })); + REQUIRE(in(fn, { U'\U0001D16D', U'\U0001D172' })); + REQUIRE(in(fn, { U'\U0001D17B', U'\U0001D182' })); + REQUIRE(in(fn, { U'\U0001D185', U'\U0001D18B' })); + REQUIRE(in(fn, { U'\U0001D1AA', U'\U0001D1AD' })); + REQUIRE(in(fn, { U'\U0001D242', U'\U0001D244' })); + REQUIRE(in(fn, { U'\U0001DA00', U'\U0001DA36' })); + REQUIRE(in(fn, { U'\U0001DA3B', U'\U0001DA6C' })); + REQUIRE(in(fn, { U'\U0001DA9B', U'\U0001DA9F' })); + REQUIRE(in(fn, { U'\U0001DAA1', U'\U0001DAAF' })); + REQUIRE(in(fn, { U'\U0001E000', U'\U0001E006' })); + REQUIRE(in(fn, { U'\U0001E008', U'\U0001E018' })); + REQUIRE(in(fn, { U'\U0001E01B', U'\U0001E021' })); + REQUIRE(in(fn, { U'\U0001E023', U'\U0001E024' })); + REQUIRE(in(fn, { U'\U0001E026', U'\U0001E02A' })); + REQUIRE(in(fn, { U'\U0001E130', U'\U0001E136' })); + REQUIRE(in(fn, { U'\U0001E2EC', U'\U0001E2EF' })); + REQUIRE(in(fn, { U'\U0001E8D0', U'\U0001E8D6' })); + REQUIRE(in(fn, { U'\U0001E944', U'\U0001E94A' })); + REQUIRE(in(fn, { U'\U000E0100', U'\U000E01EF' })); + + // individual values which should return true + REQUIRE(fn(U'\u05BF')); + REQUIRE(fn(U'\u05C7')); + REQUIRE(fn(U'\u0670')); + REQUIRE(fn(U'\u0711')); + REQUIRE(fn(U'\u07FD')); + REQUIRE(fn(U'\u09BC')); + REQUIRE(fn(U'\u09D7')); + REQUIRE(fn(U'\u09FE')); + REQUIRE(fn(U'\u0A3C')); + REQUIRE(fn(U'\u0A51')); + REQUIRE(fn(U'\u0A75')); + REQUIRE(fn(U'\u0ABC')); + REQUIRE(fn(U'\u0B3C')); + REQUIRE(fn(U'\u0B82')); + REQUIRE(fn(U'\u0BD7')); + REQUIRE(fn(U'\u0CBC')); + REQUIRE(fn(U'\u0D57')); + REQUIRE(fn(U'\u0DCA')); + REQUIRE(fn(U'\u0DD6')); + REQUIRE(fn(U'\u0E31')); + REQUIRE(fn(U'\u0EB1')); + REQUIRE(fn(U'\u0F35')); + REQUIRE(fn(U'\u0F37')); + REQUIRE(fn(U'\u0F39')); + REQUIRE(fn(U'\u0FC6')); + REQUIRE(fn(U'\u108F')); + REQUIRE(fn(U'\u17DD')); + REQUIRE(fn(U'\u18A9')); + REQUIRE(fn(U'\u1A7F')); + REQUIRE(fn(U'\u1CED')); + REQUIRE(fn(U'\u1CF4')); + REQUIRE(fn(U'\u20E1')); + REQUIRE(fn(U'\u2D7F')); + REQUIRE(fn(U'\uA66F')); + REQUIRE(fn(U'\uA802')); + REQUIRE(fn(U'\uA806')); + REQUIRE(fn(U'\uA80B')); + REQUIRE(fn(U'\uA82C')); + REQUIRE(fn(U'\uA8FF')); + REQUIRE(fn(U'\uA9E5')); + REQUIRE(fn(U'\uAA43')); + REQUIRE(fn(U'\uAAB0')); + REQUIRE(fn(U'\uAAC1')); + REQUIRE(fn(U'\uFB1E')); + REQUIRE(fn(U'\U000101FD')); + REQUIRE(fn(U'\U000102E0')); + REQUIRE(fn(U'\U00010A3F')); + REQUIRE(fn(U'\U00011173')); + REQUIRE(fn(U'\U0001123E')); + REQUIRE(fn(U'\U00011357')); + REQUIRE(fn(U'\U0001145E')); + REQUIRE(fn(U'\U00011940')); + REQUIRE(fn(U'\U000119E4')); + REQUIRE(fn(U'\U00011A47')); + REQUIRE(fn(U'\U00011D3A')); + REQUIRE(fn(U'\U00011D47')); + REQUIRE(fn(U'\U00016F4F')); + REQUIRE(fn(U'\U00016FE4')); + REQUIRE(fn(U'\U0001DA75')); + REQUIRE(fn(U'\U0001DA84')); + + // contiguous ranges of values which should return false + REQUIRE(not_in(fn, { U'\u0000', U'\u02FF' })); + REQUIRE(not_in(fn, { U'\u0370', U'\u0482' })); + REQUIRE(not_in(fn, { U'\u0488', U'\u0590' })); + REQUIRE(not_in(fn, { U'\u05C8', U'\u060F' })); + REQUIRE(not_in(fn, { U'\u061B', U'\u064A' })); + REQUIRE(not_in(fn, { U'\u0660', U'\u066F' })); + REQUIRE(not_in(fn, { U'\u0671', U'\u06D5' })); + REQUIRE(not_in(fn, { U'\u06DD', U'\u06DE' })); + REQUIRE(not_in(fn, { U'\u06E5', U'\u06E6' })); + REQUIRE(not_in(fn, { U'\u06EE', U'\u0710' })); + REQUIRE(not_in(fn, { U'\u0712', U'\u072F' })); + REQUIRE(not_in(fn, { U'\u074B', U'\u07A5' })); + REQUIRE(not_in(fn, { U'\u07B1', U'\u07EA' })); + REQUIRE(not_in(fn, { U'\u07F4', U'\u07FC' })); + REQUIRE(not_in(fn, { U'\u07FE', U'\u0815' })); + REQUIRE(not_in(fn, { U'\u082E', U'\u0858' })); + REQUIRE(not_in(fn, { U'\u085C', U'\u08D2' })); + REQUIRE(not_in(fn, { U'\u0904', U'\u0939' })); + REQUIRE(not_in(fn, { U'\u0958', U'\u0961' })); + REQUIRE(not_in(fn, { U'\u0964', U'\u0980' })); + REQUIRE(not_in(fn, { U'\u0984', U'\u09BB' })); + REQUIRE(not_in(fn, { U'\u09C5', U'\u09C6' })); + REQUIRE(not_in(fn, { U'\u09C9', U'\u09CA' })); + REQUIRE(not_in(fn, { U'\u09CE', U'\u09D6' })); + REQUIRE(not_in(fn, { U'\u09D8', U'\u09E1' })); + REQUIRE(not_in(fn, { U'\u09E4', U'\u09FD' })); + REQUIRE(not_in(fn, { U'\u09FF', U'\u0A00' })); + REQUIRE(not_in(fn, { U'\u0A04', U'\u0A3B' })); + REQUIRE(not_in(fn, { U'\u0A43', U'\u0A46' })); + REQUIRE(not_in(fn, { U'\u0A49', U'\u0A4A' })); + REQUIRE(not_in(fn, { U'\u0A4E', U'\u0A50' })); + REQUIRE(not_in(fn, { U'\u0A52', U'\u0A6F' })); + REQUIRE(not_in(fn, { U'\u0A72', U'\u0A74' })); + REQUIRE(not_in(fn, { U'\u0A76', U'\u0A80' })); + REQUIRE(not_in(fn, { U'\u0A84', U'\u0ABB' })); + REQUIRE(not_in(fn, { U'\u0ACE', U'\u0AE1' })); + REQUIRE(not_in(fn, { U'\u0AE4', U'\u0AF9' })); + REQUIRE(not_in(fn, { U'\u0B04', U'\u0B3B' })); + REQUIRE(not_in(fn, { U'\u0B45', U'\u0B46' })); + REQUIRE(not_in(fn, { U'\u0B49', U'\u0B4A' })); + REQUIRE(not_in(fn, { U'\u0B4E', U'\u0B54' })); + REQUIRE(not_in(fn, { U'\u0B58', U'\u0B61' })); + REQUIRE(not_in(fn, { U'\u0B64', U'\u0B81' })); + REQUIRE(not_in(fn, { U'\u0B83', U'\u0BBD' })); + REQUIRE(not_in(fn, { U'\u0BC3', U'\u0BC5' })); + REQUIRE(not_in(fn, { U'\u0BCE', U'\u0BD6' })); + REQUIRE(not_in(fn, { U'\u0BD8', U'\u0BFF' })); + REQUIRE(not_in(fn, { U'\u0C05', U'\u0C3D' })); + REQUIRE(not_in(fn, { U'\u0C4E', U'\u0C54' })); + REQUIRE(not_in(fn, { U'\u0C57', U'\u0C61' })); + REQUIRE(not_in(fn, { U'\u0C64', U'\u0C80' })); + REQUIRE(not_in(fn, { U'\u0C84', U'\u0CBB' })); + REQUIRE(not_in(fn, { U'\u0CCE', U'\u0CD4' })); + REQUIRE(not_in(fn, { U'\u0CD7', U'\u0CE1' })); + REQUIRE(not_in(fn, { U'\u0CE4', U'\u0CFF' })); + REQUIRE(not_in(fn, { U'\u0D04', U'\u0D3A' })); + REQUIRE(not_in(fn, { U'\u0D4E', U'\u0D56' })); + REQUIRE(not_in(fn, { U'\u0D58', U'\u0D61' })); + REQUIRE(not_in(fn, { U'\u0D64', U'\u0D80' })); + REQUIRE(not_in(fn, { U'\u0D84', U'\u0DC9' })); + REQUIRE(not_in(fn, { U'\u0DCB', U'\u0DCE' })); + REQUIRE(not_in(fn, { U'\u0DE0', U'\u0DF1' })); + REQUIRE(not_in(fn, { U'\u0DF4', U'\u0E30' })); + REQUIRE(not_in(fn, { U'\u0E32', U'\u0E33' })); + REQUIRE(not_in(fn, { U'\u0E3B', U'\u0E46' })); + REQUIRE(not_in(fn, { U'\u0E4F', U'\u0EB0' })); + REQUIRE(not_in(fn, { U'\u0EB2', U'\u0EB3' })); + REQUIRE(not_in(fn, { U'\u0EBD', U'\u0EC7' })); + REQUIRE(not_in(fn, { U'\u0ECE', U'\u0F17' })); + REQUIRE(not_in(fn, { U'\u0F1A', U'\u0F34' })); + REQUIRE(not_in(fn, { U'\u0F3A', U'\u0F3D' })); + REQUIRE(not_in(fn, { U'\u0F40', U'\u0F70' })); + REQUIRE(not_in(fn, { U'\u0F88', U'\u0F8C' })); + REQUIRE(not_in(fn, { U'\u0FBD', U'\u0FC5' })); + REQUIRE(not_in(fn, { U'\u0FC7', U'\u102A' })); + REQUIRE(not_in(fn, { U'\u103F', U'\u1055' })); + REQUIRE(not_in(fn, { U'\u105A', U'\u105D' })); + REQUIRE(not_in(fn, { U'\u1065', U'\u1066' })); + REQUIRE(not_in(fn, { U'\u106E', U'\u1070' })); + REQUIRE(not_in(fn, { U'\u1075', U'\u1081' })); + REQUIRE(not_in(fn, { U'\u1090', U'\u1099' })); + REQUIRE(not_in(fn, { U'\u109E', U'\u135C' })); + REQUIRE(not_in(fn, { U'\u1360', U'\u1711' })); + REQUIRE(not_in(fn, { U'\u1715', U'\u1731' })); + REQUIRE(not_in(fn, { U'\u1735', U'\u1751' })); + REQUIRE(not_in(fn, { U'\u1754', U'\u1771' })); + REQUIRE(not_in(fn, { U'\u1774', U'\u17B3' })); + REQUIRE(not_in(fn, { U'\u17D4', U'\u17DC' })); + REQUIRE(not_in(fn, { U'\u17DE', U'\u180A' })); + REQUIRE(not_in(fn, { U'\u180E', U'\u1884' })); + REQUIRE(not_in(fn, { U'\u1887', U'\u18A8' })); + REQUIRE(not_in(fn, { U'\u18AA', U'\u191F' })); + REQUIRE(not_in(fn, { U'\u192C', U'\u192F' })); + REQUIRE(not_in(fn, { U'\u193C', U'\u1A16' })); + REQUIRE(not_in(fn, { U'\u1A1C', U'\u1A54' })); + REQUIRE(not_in(fn, { U'\u1A7D', U'\u1A7E' })); + REQUIRE(not_in(fn, { U'\u1A80', U'\u1AAF' })); + REQUIRE(not_in(fn, { U'\u1AC1', U'\u1AFF' })); + REQUIRE(not_in(fn, { U'\u1B05', U'\u1B33' })); + REQUIRE(not_in(fn, { U'\u1B45', U'\u1B6A' })); + REQUIRE(not_in(fn, { U'\u1B74', U'\u1B7F' })); + REQUIRE(not_in(fn, { U'\u1B83', U'\u1BA0' })); + REQUIRE(not_in(fn, { U'\u1BAE', U'\u1BE5' })); + REQUIRE(not_in(fn, { U'\u1BF4', U'\u1C23' })); + REQUIRE(not_in(fn, { U'\u1C38', U'\u1CCF' })); + REQUIRE(not_in(fn, { U'\u1CE9', U'\u1CEC' })); + REQUIRE(not_in(fn, { U'\u1CEE', U'\u1CF3' })); + REQUIRE(not_in(fn, { U'\u1CF5', U'\u1CF6' })); + REQUIRE(not_in(fn, { U'\u1CFA', U'\u1DBF' })); + REQUIRE(not_in(fn, { U'\u1E00', U'\u20CF' })); + REQUIRE(not_in(fn, { U'\u20DD', U'\u20E0' })); + REQUIRE(not_in(fn, { U'\u20E2', U'\u20E4' })); + REQUIRE(not_in(fn, { U'\u20F1', U'\u2CEE' })); + REQUIRE(not_in(fn, { U'\u2CF2', U'\u2D7E' })); + REQUIRE(not_in(fn, { U'\u2D80', U'\u2DDF' })); + REQUIRE(not_in(fn, { U'\u2E00', U'\u3029' })); + REQUIRE(not_in(fn, { U'\u3030', U'\u3098' })); + REQUIRE(not_in(fn, { U'\u309B', U'\uA66E' })); + REQUIRE(not_in(fn, { U'\uA670', U'\uA673' })); + REQUIRE(not_in(fn, { U'\uA67E', U'\uA69D' })); + REQUIRE(not_in(fn, { U'\uA6A0', U'\uA6EF' })); + REQUIRE(not_in(fn, { U'\uA6F2', U'\uA801' })); + REQUIRE(not_in(fn, { U'\uA803', U'\uA805' })); + REQUIRE(not_in(fn, { U'\uA807', U'\uA80A' })); + REQUIRE(not_in(fn, { U'\uA80C', U'\uA822' })); + REQUIRE(not_in(fn, { U'\uA828', U'\uA82B' })); + REQUIRE(not_in(fn, { U'\uA82D', U'\uA87F' })); + REQUIRE(not_in(fn, { U'\uA882', U'\uA8B3' })); + REQUIRE(not_in(fn, { U'\uA8C6', U'\uA8DF' })); + REQUIRE(not_in(fn, { U'\uA8F2', U'\uA8FE' })); + REQUIRE(not_in(fn, { U'\uA900', U'\uA925' })); + REQUIRE(not_in(fn, { U'\uA92E', U'\uA946' })); + REQUIRE(not_in(fn, { U'\uA954', U'\uA97F' })); + REQUIRE(not_in(fn, { U'\uA984', U'\uA9B2' })); + REQUIRE(not_in(fn, { U'\uA9C1', U'\uA9E4' })); + REQUIRE(not_in(fn, { U'\uA9E6', U'\uAA28' })); + REQUIRE(not_in(fn, { U'\uAA37', U'\uAA42' })); + REQUIRE(not_in(fn, { U'\uAA44', U'\uAA4B' })); + REQUIRE(not_in(fn, { U'\uAA4E', U'\uAA7A' })); + REQUIRE(not_in(fn, { U'\uAA7E', U'\uAAAF' })); + REQUIRE(not_in(fn, { U'\uAAB5', U'\uAAB6' })); + REQUIRE(not_in(fn, { U'\uAAB9', U'\uAABD' })); + REQUIRE(not_in(fn, { U'\uAAC2', U'\uAAEA' })); + REQUIRE(not_in(fn, { U'\uAAF0', U'\uAAF4' })); + REQUIRE(not_in(fn, { U'\uAAF7', U'\uABE2' })); + REQUIRE(not_in(fn, { U'\uABEE', U'\uFB1D' })); + REQUIRE(not_in(fn, { U'\uFB1F', U'\uFDFF' })); + REQUIRE(not_in(fn, { U'\uFE10', U'\uFE1F' })); + REQUIRE(not_in(fn, { U'\uFE30', U'\U000101FC' })); + REQUIRE(not_in(fn, { U'\U000101FE', U'\U000102DF' })); + REQUIRE(not_in(fn, { U'\U000102E1', U'\U00010375' })); + REQUIRE(not_in(fn, { U'\U0001037B', U'\U00010A00' })); + REQUIRE(not_in(fn, { U'\U00010A07', U'\U00010A0B' })); + REQUIRE(not_in(fn, { U'\U00010A10', U'\U00010A37' })); + REQUIRE(not_in(fn, { U'\U00010A3B', U'\U00010A3E' })); + REQUIRE(not_in(fn, { U'\U00010A40', U'\U00010AE4' })); + REQUIRE(not_in(fn, { U'\U00010AE7', U'\U00010D23' })); + REQUIRE(not_in(fn, { U'\U00010D28', U'\U00010EAA' })); + REQUIRE(not_in(fn, { U'\U00010EAD', U'\U00010F45' })); + REQUIRE(not_in(fn, { U'\U00010F51', U'\U00010FFF' })); + REQUIRE(not_in(fn, { U'\U00011003', U'\U00011037' })); + REQUIRE(not_in(fn, { U'\U00011047', U'\U0001107E' })); + REQUIRE(not_in(fn, { U'\U00011083', U'\U000110AF' })); + REQUIRE(not_in(fn, { U'\U000110BB', U'\U000110FF' })); + REQUIRE(not_in(fn, { U'\U00011103', U'\U00011126' })); + REQUIRE(not_in(fn, { U'\U00011135', U'\U00011144' })); + REQUIRE(not_in(fn, { U'\U00011147', U'\U00011172' })); + REQUIRE(not_in(fn, { U'\U00011174', U'\U0001117F' })); + REQUIRE(not_in(fn, { U'\U00011183', U'\U000111B2' })); + REQUIRE(not_in(fn, { U'\U000111C1', U'\U000111C8' })); + REQUIRE(not_in(fn, { U'\U000111D0', U'\U0001122B' })); + REQUIRE(not_in(fn, { U'\U00011238', U'\U0001123D' })); + REQUIRE(not_in(fn, { U'\U0001123F', U'\U000112DE' })); + REQUIRE(not_in(fn, { U'\U000112EB', U'\U000112FF' })); + REQUIRE(not_in(fn, { U'\U00011304', U'\U0001133A' })); + REQUIRE(not_in(fn, { U'\U00011345', U'\U00011346' })); + REQUIRE(not_in(fn, { U'\U00011349', U'\U0001134A' })); + REQUIRE(not_in(fn, { U'\U0001134E', U'\U00011356' })); + REQUIRE(not_in(fn, { U'\U00011358', U'\U00011361' })); + REQUIRE(not_in(fn, { U'\U00011364', U'\U00011365' })); + REQUIRE(not_in(fn, { U'\U0001136D', U'\U0001136F' })); + REQUIRE(not_in(fn, { U'\U00011375', U'\U00011434' })); + REQUIRE(not_in(fn, { U'\U00011447', U'\U0001145D' })); + REQUIRE(not_in(fn, { U'\U0001145F', U'\U000114AF' })); + REQUIRE(not_in(fn, { U'\U000114C4', U'\U000115AE' })); + REQUIRE(not_in(fn, { U'\U000115B6', U'\U000115B7' })); + REQUIRE(not_in(fn, { U'\U000115C1', U'\U000115DB' })); + REQUIRE(not_in(fn, { U'\U000115DE', U'\U0001162F' })); + REQUIRE(not_in(fn, { U'\U00011641', U'\U000116AA' })); + REQUIRE(not_in(fn, { U'\U000116B8', U'\U0001171C' })); + REQUIRE(not_in(fn, { U'\U0001172C', U'\U0001182B' })); + REQUIRE(not_in(fn, { U'\U0001183B', U'\U0001192F' })); + REQUIRE(not_in(fn, { U'\U00011939', U'\U0001193A' })); + REQUIRE(not_in(fn, { U'\U00011944', U'\U000119D0' })); + REQUIRE(not_in(fn, { U'\U000119D8', U'\U000119D9' })); + REQUIRE(not_in(fn, { U'\U000119E1', U'\U000119E3' })); + REQUIRE(not_in(fn, { U'\U000119E5', U'\U00011A00' })); + REQUIRE(not_in(fn, { U'\U00011A0B', U'\U00011A32' })); + REQUIRE(not_in(fn, { U'\U00011A3F', U'\U00011A46' })); + REQUIRE(not_in(fn, { U'\U00011A48', U'\U00011A50' })); + REQUIRE(not_in(fn, { U'\U00011A5C', U'\U00011A89' })); + REQUIRE(not_in(fn, { U'\U00011A9A', U'\U00011C2E' })); + REQUIRE(not_in(fn, { U'\U00011C40', U'\U00011C91' })); + REQUIRE(not_in(fn, { U'\U00011CB7', U'\U00011D30' })); + REQUIRE(not_in(fn, { U'\U00011D37', U'\U00011D39' })); + REQUIRE(not_in(fn, { U'\U00011D48', U'\U00011D89' })); + REQUIRE(not_in(fn, { U'\U00011D98', U'\U00011EF2' })); + REQUIRE(not_in(fn, { U'\U00011EF7', U'\U00016AEF' })); + REQUIRE(not_in(fn, { U'\U00016AF5', U'\U00016B2F' })); + REQUIRE(not_in(fn, { U'\U00016B37', U'\U00016F4E' })); + REQUIRE(not_in(fn, { U'\U00016F88', U'\U00016F8E' })); + REQUIRE(not_in(fn, { U'\U00016F93', U'\U00016FE3' })); + REQUIRE(not_in(fn, { U'\U00016FE5', U'\U00016FEF' })); + REQUIRE(not_in(fn, { U'\U00016FF2', U'\U0001BC9C' })); + REQUIRE(not_in(fn, { U'\U0001BC9F', U'\U0001D164' })); + REQUIRE(not_in(fn, { U'\U0001D16A', U'\U0001D16C' })); + REQUIRE(not_in(fn, { U'\U0001D173', U'\U0001D17A' })); + REQUIRE(not_in(fn, { U'\U0001D183', U'\U0001D184' })); + REQUIRE(not_in(fn, { U'\U0001D18C', U'\U0001D1A9' })); + REQUIRE(not_in(fn, { U'\U0001D1AE', U'\U0001D241' })); + REQUIRE(not_in(fn, { U'\U0001D245', U'\U0001D9FF' })); + REQUIRE(not_in(fn, { U'\U0001DA37', U'\U0001DA3A' })); + REQUIRE(not_in(fn, { U'\U0001DA6D', U'\U0001DA74' })); + REQUIRE(not_in(fn, { U'\U0001DA76', U'\U0001DA83' })); + REQUIRE(not_in(fn, { U'\U0001DA85', U'\U0001DA9A' })); + REQUIRE(not_in(fn, { U'\U0001DAB0', U'\U0001DFFF' })); + REQUIRE(not_in(fn, { U'\U0001E019', U'\U0001E01A' })); + REQUIRE(not_in(fn, { U'\U0001E02B', U'\U0001E12F' })); + REQUIRE(not_in(fn, { U'\U0001E137', U'\U0001E2EB' })); + REQUIRE(not_in(fn, { U'\U0001E2F0', U'\U0001E8CF' })); + REQUIRE(not_in(fn, { U'\U0001E8D7', U'\U0001E943' })); + REQUIRE(not_in(fn, { U'\U0001E94B', U'\U000E00FF' })); + REQUIRE(not_in(fn, { U'\U000E01F0', U'\U0010FFFF' })); + + // individual values which should return false + REQUIRE(!fn(U'\u05BE')); + REQUIRE(!fn(U'\u05C0')); + REQUIRE(!fn(U'\u05C3')); + REQUIRE(!fn(U'\u05C6')); + REQUIRE(!fn(U'\u06E9')); + REQUIRE(!fn(U'\u081A')); + REQUIRE(!fn(U'\u0824')); + REQUIRE(!fn(U'\u0828')); + REQUIRE(!fn(U'\u08E2')); + REQUIRE(!fn(U'\u093D')); + REQUIRE(!fn(U'\u0950')); + REQUIRE(!fn(U'\u09BD')); + REQUIRE(!fn(U'\u0A3D')); + REQUIRE(!fn(U'\u0ABD')); + REQUIRE(!fn(U'\u0AC6')); + REQUIRE(!fn(U'\u0ACA')); + REQUIRE(!fn(U'\u0B00')); + REQUIRE(!fn(U'\u0B3D')); + REQUIRE(!fn(U'\u0BC9')); + REQUIRE(!fn(U'\u0C45')); + REQUIRE(!fn(U'\u0C49')); + REQUIRE(!fn(U'\u0CBD')); + REQUIRE(!fn(U'\u0CC5')); + REQUIRE(!fn(U'\u0CC9')); + REQUIRE(!fn(U'\u0D3D')); + REQUIRE(!fn(U'\u0D45')); + REQUIRE(!fn(U'\u0D49')); + REQUIRE(!fn(U'\u0DD5')); + REQUIRE(!fn(U'\u0DD7')); + REQUIRE(!fn(U'\u0F36')); + REQUIRE(!fn(U'\u0F38')); + REQUIRE(!fn(U'\u0F85')); + REQUIRE(!fn(U'\u0F98')); + REQUIRE(!fn(U'\u1061')); + REQUIRE(!fn(U'\u108E')); + REQUIRE(!fn(U'\u1A5F')); + REQUIRE(!fn(U'\u1ABE')); + REQUIRE(!fn(U'\u1CD3')); + REQUIRE(!fn(U'\u1DFA')); + REQUIRE(!fn(U'\uAAB1')); + REQUIRE(!fn(U'\uAAC0')); + REQUIRE(!fn(U'\uABEB')); + REQUIRE(!fn(U'\U00010A04')); + REQUIRE(!fn(U'\U000111CD')); + REQUIRE(!fn(U'\U0001133D')); + REQUIRE(!fn(U'\U00011936')); + REQUIRE(!fn(U'\U0001193F')); + REQUIRE(!fn(U'\U00011941')); + REQUIRE(!fn(U'\U00011A3A')); + REQUIRE(!fn(U'\U00011C37')); + REQUIRE(!fn(U'\U00011CA8')); + REQUIRE(!fn(U'\U00011D3B')); + REQUIRE(!fn(U'\U00011D3E')); + REQUIRE(!fn(U'\U00011D46')); + REQUIRE(!fn(U'\U00011D8F')); + REQUIRE(!fn(U'\U00011D92')); + REQUIRE(!fn(U'\U00016F50')); + REQUIRE(!fn(U'\U0001DAA0')); + REQUIRE(!fn(U'\U0001E007')); + REQUIRE(!fn(U'\U0001E022')); + REQUIRE(!fn(U'\U0001E025')); + } + + #endif // TOML_LANG_UNRELEASED +} diff --git a/toml.hpp b/toml.hpp index 55ae6d5..878df28 100644 --- a/toml.hpp +++ b/toml.hpp @@ -130,6 +130,8 @@ #else #define TOML_COMPILER_EXCEPTIONS 0 #endif + #define TOML_LIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 1) ) + #define TOML_UNLIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 0) ) //floating-point from_chars and to_chars are not implemented in any version of clang as of 1/1/2020 #ifndef TOML_FLOAT_CHARCONV @@ -192,10 +194,8 @@ #else #define TOML_COMPILER_EXCEPTIONS 0 #endif - - // these pass the __has_attribute() test but cause warnings on if/else branches =/ - #define TOML_LIKELY - #define TOML_UNLIKELY + #define TOML_LIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 1) ) + #define TOML_UNLIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 0) ) // floating-point from_chars and to_chars are not implemented in any version of gcc as of 1/1/2020 #ifndef TOML_FLOAT_CHARCONV @@ -304,20 +304,20 @@ #endif #if !TOML_DOXYGEN && !defined(__INTELLISENSE__) #if !defined(TOML_LIKELY) && __has_cpp_attribute(likely) - #define TOML_LIKELY [[likely]] + #define TOML_LIKELY(...) (__VA_ARGS__) [[likely]] #endif #if !defined(TOML_UNLIKELY) && __has_cpp_attribute(unlikely) - #define TOML_UNLIKELY [[unlikely]] + #define TOML_UNLIKELY(...) (__VA_ARGS__) [[unlikely]] #endif #if __has_cpp_attribute(nodiscard) >= 201907L #define TOML_NODISCARD_CTOR [[nodiscard]] #endif #endif #ifndef TOML_LIKELY - #define TOML_LIKELY + #define TOML_LIKELY(...) (__VA_ARGS__) #endif #ifndef TOML_UNLIKELY - #define TOML_UNLIKELY + #define TOML_UNLIKELY(...) (__VA_ARGS__) #endif #ifndef TOML_NODISCARD_CTOR #define TOML_NODISCARD_CTOR @@ -1521,13 +1521,13 @@ namespace toml::impl static_assert(sizeof(Char) == 1); for (auto c : str) { - if (c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) TOML_UNLIKELY + if TOML_UNLIKELY(c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) print_to_stream(low_character_escape_table[c], stream); - else if (c == TOML_STRING_PREFIX('\x7F')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('\x7F')) print_to_stream(TOML_STRING_PREFIX("\\u007F"sv), stream); - else if (c == TOML_STRING_PREFIX('"')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('"')) print_to_stream(TOML_STRING_PREFIX("\\\""sv), stream); - else if (c == TOML_STRING_PREFIX('\\')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('\\')) print_to_stream(TOML_STRING_PREFIX("\\\\"sv), stream); else print_to_stream(c, stream); @@ -3333,874 +3333,904 @@ namespace toml //-------------------------------------- ↓ toml_utf8_generated.h ----------------------------------------------------- #pragma region -#if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) - -#define TOML_ASSUME_CODEPOINT_BETWEEN(first, last) \ - TOML_ASSUME(codepoint >= first); \ - TOML_ASSUME(codepoint <= last) - namespace toml::impl { [[nodiscard]] TOML_GNU_ATTR(const) - constexpr bool is_unicode_letter(char32_t codepoint) noexcept + constexpr bool is_hexadecimal_digit(char32_t cp) noexcept { - if (codepoint < U'\u00AA' || codepoint > U'\U00031349') + return cp >= U'0' && cp <= U'f' && (1ull << (static_cast(cp) - 0x30ull)) & 0x7E0000007E03FFull; + } + + #if TOML_LANG_UNRELEASED // toml/issues/687 (unicode bare keys) + + [[nodiscard]] + TOML_GNU_ATTR(const) + constexpr bool is_unicode_letter(char32_t cp) noexcept + { + if (cp < U'\u00AA' || cp > U'\U00031349') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u00AA', U'\U00031349'); - switch ((static_cast(codepoint) - 0xAAu) / 3147u) + const auto child_index_0 = (static_cast(cp) - 0xAAull) / 0xC4Bull; + if ((1ull << child_index_0) & 0x8A7FFC004001CFA0ull) + return true; + if ((1ull << child_index_0) & 0x26180C0000ull) + return false; + switch (child_index_0) { - case 0: + case 0x00: // [0] 00AA - 0CF4 { - if (codepoint > U'\u0CF2') + if (cp > U'\u0CF2') + return false; + TOML_ASSUME(cp >= U'\u00AA'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFDFFFFFC10801ull, 0xFFFFFFFFFFFFDFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x07C000FFF0FFFFFFull, 0x0000000000000014ull, 0x0000000000000000ull, 0xFEFFFFF5D02F37C0ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFEFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00FFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFC09FFFFFFFFFBFull, 0x000000007FFFFFFFull, + 0xFFFFFFC000000000ull, 0xFFC00000000001E1ull, 0x00000001FFFFFFFFull, 0xFFFFFFFFFFFFFFB0ull, + 0x18000BFFFFFFFFFFull, 0xFFFFFF4000270030ull, 0xFFFFFFF80000003Full, 0x0FFFFFFFFFFFFFFFull, + 0xFFFFFFFF00000080ull, 0x44010FFFFFC10C01ull, 0xFFC07FFFFFC00000ull, 0xFFC0000000000001ull, + 0x000000003FFFF7FFull, 0xFFFFFFFFFC000000ull, 0x00FFC0400008FFFFull, 0x7FFFFE67F87FFF80ull, + 0x00EC00100008F17Full, 0x7FFFFE61F80400C0ull, 0x001780000000DB7Full, 0x7FFFFEEFF8000700ull, + 0x00C000400008FB7Full, 0x7FFFFE67F8008000ull, 0x00EC00000008FB7Full, 0xC6358F71FA000080ull, + 0x000000400000FFF1ull, 0x7FFFFF77F8000000ull, 0x00C1C0000008FFFFull, 0x7FFFFF77F8400000ull, + 0x00D000000008FBFFull, 0x0000000000000180ull, + }; + return lookup_table_1[(static_cast(cp) - 0xAAull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xAAull) % 0x40ull)); + } + case 0x01: // [1] 0CF5 - 193F + { + if (cp < U'\u0D04' || cp > U'\u191E') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u00AA', U'\u0CF2'); - switch ((static_cast(codepoint) - 0xAAu) / 63u) + constexpr uint_least64_t lookup_table_1[] = { - case 0: return (1ull << (static_cast(codepoint) - 0xAAull)) & 0x7FFFDFFFFFC10801ull; - case 1: return codepoint != U'\u00F7'; - case 8: return (1ull << (static_cast(codepoint) - 0x2A2ull)) & 0x4000FFF0FFFFFFFFull; - case 9: return (1u << (static_cast(codepoint) - 0x2E1u)) & 0x280Fu; - case 10: return false; - case 11: return (1ull << (static_cast(codepoint) - 0x370ull)) & 0x3FFFD740BCDFull; - case 12: return codepoint != U'\u03A2'; - case 13: return codepoint != U'\u03F6'; - case 15: return codepoint <= U'\u0481' || codepoint >= U'\u048A'; - case 18: return codepoint != U'\u0530'; - case 19: return codepoint <= U'\u0559' || codepoint >= U'\u0560'; - case 21: return codepoint <= U'\u05EA' || codepoint >= U'\u05EF'; - case 23: return codepoint != U'\u0653'; - case 25: return (1ull << (static_cast(codepoint) - 0x6D1ull)) & 0x4E0060300017ull; - case 26: return (1ull << (static_cast(codepoint) - 0x710ull)) & 0x60000000FFFFFFFDull; - case 28: return (1ull << (static_cast(codepoint) - 0x78Eull)) & 0x7000000800FFFFFFull; - case 29: return (1ull << (static_cast(codepoint) - 0x7CDull)) & 0x7FF821803FFFFFFFull; - case 30: return (1ull << (static_cast(codepoint) - 0x80Cull)) & 0x7FF00000110043FFull; - case 31: return codepoint <= U'\u0858' || codepoint >= U'\u0860'; - case 32: return codepoint != U'\u088A'; - case 34: return codepoint <= U'\u0939' || codepoint >= U'\u093D'; - case 35: return (1ull << (static_cast(codepoint) - 0x950ull)) & 0x21FFFE0003FF01ull; - case 36: return (1ull << (static_cast(codepoint) - 0x986ull)) & 0x8F17F7FFFFE67Full; - case 37: return (1ull << (static_cast(codepoint) - 0x9CEull)) & 0x400C000EC001ull; - case 38: return (1ull << (static_cast(codepoint) - 0xA05ull)) & 0x1B6FEFFFFFCC3Full; - case 39: return (1u << (static_cast(codepoint) - 0xA59u)) & 0xE00002Fu; - case 40: return (1ull << (static_cast(codepoint) - 0xA85ull)) & 0x11F6FEFFFFFDDFFull; - case 41: return (1ull << (static_cast(codepoint) - 0xAD0ull)) & 0x20000030001ull; - case 42: return (1ull << (static_cast(codepoint) - 0xB05ull)) & 0x11F6FEFFFFFCCFFull; - case 43: return (1u << (static_cast(codepoint) - 0xB5Cu)) & 0x20003Bu; - case 44: return (1ull << (static_cast(codepoint) - 0xB83ull)) & 0x7FF8E31AC7B8FDull; - case 46: return (1ull << (static_cast(codepoint) - 0xC05ull)) & 0x1FFFEFFFFFEEFFull; - case 47: return (1ull << (static_cast(codepoint) - 0xC3Dull)) & 0x1838000001ull; - case 48: return (1ull << (static_cast(codepoint) - 0xC80ull)) & 0x1EFFDFFFFFDDFE1ull; - case 49: return (1ull << (static_cast(codepoint) - 0xCB9ull)) & 0x30001A000000011ull; + 0x027FFFFFFFFFDDFFull, 0x0FC0000038070400ull, 0xF2FFBFFFFFC7FFFEull, 0xE000000000000007ull, + 0xF000DFFFFFFFFFFFull, 0x6000000000000007ull, 0xF200DFFAFFFFFF7Dull, 0x100000000F000005ull, + 0xF000000000000000ull, 0x000001FFFFFFFFEFull, 0x00000000000001F0ull, 0xF000000000000000ull, + 0x0800007FFFFFFFFFull, 0x3FFE1C0623C3F000ull, 0xFFFFFFFFF0000400ull, 0xFF7FFFFFFFFFF20Bull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFF3D7F3DFull, 0xD7F3DFFFFFFFF3DFull, 0xFFFFFFFFFFF7FFF3ull, + 0xFFFFFFFFFFF3DFFFull, 0xF0000000007FFFFFull, 0xFFFFFFFFF0000FFFull, 0xE3F3FFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xEFFFF9FFFFFFFFFFull, 0xFFFFFFFFF07FFFFFull, 0xF01FE07FFFFFFFFFull, + 0xF0003FFFF0003DFFull, 0xF0001DFFF0003FFFull, 0x0000FFFFFFFFFFFFull, 0x0000000001080000ull, + 0xFFFFFFFFF0000000ull, 0xF01FFFFFFFFFFFFFull, 0xFFFFF05FFFFFFFF9ull, 0xF003FFFFFFFFFFFFull, + 0x0000000007FFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0xD04ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xD04ull) % 0x40ull)); + } + case 0x02: // [2] 1940 - 258A + { + if (cp < U'\u1950' || cp > U'\u2184') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFF001F3FFFFFFFull, 0x03FFFFFF0FFFFFFFull, 0xFFFF000000000000ull, 0xFFFFFFFFFFFF007Full, + 0x000000000000001Full, 0x0000000000800000ull, 0xFFE0000000000000ull, 0x0FE0000FFFFFFFFFull, + 0xFFF8000000000000ull, 0xFFFFFC00C001FFFFull, 0xFFFF0000003FFFFFull, 0xE0000000000FFFFFull, + 0x01FF3FFFFFFFFC00ull, 0x0000E7FFFFFFFFFFull, 0xFFFF046FDE000000ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x0000FFFFFFFFFFFFull, 0xFFFF000000000000ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x3F3FFFFFFFFF3F3Full, + 0xFFFF3FFFFFFFAAFFull, 0x1FDC5FDFFFFFFFFFull, 0x00001FDC1FFF0FCFull, 0x0000000000000000ull, + 0x0000800200000000ull, 0x0000000000001FFFull, 0xFC84000000000000ull, 0x43E0F3FFBD503E2Full, + 0x0018000000000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x1950ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x1950ull) % 0x40ull)); + } + case 0x03: // [3] 258B - 31D5 + { + if (cp < U'\u2C00' || cp > U'\u31BF') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFF7FFFFFFFFFFFull, 0xFFFFFFFF7FFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x000C781FFFFFFFFFull, + 0xFFFF20BFFFFFFFFFull, 0x000080FFFFFFFFFFull, 0x7F7F7F7F007FFFFFull, 0x000000007F7F7F7Full, + 0x0000800000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x183E000000000060ull, 0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFEE07FFFFFull, 0xF7FFFFFFFFFFFFFFull, + 0xFFFEFFFFFFFFFFE0ull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00007FFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x2C00ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x04: return (cp >= U'\u31F0' && cp <= U'\u31FF') || (cp >= U'\u3400' && cp <= U'\u3E20'); + case 0x06: return (cp >= U'\u4A6C' && cp <= U'\u4DBE') || (cp >= U'\u4E00' && cp <= U'\u56B6'); + case 0x0C: return (cp >= U'\u942E' && cp <= U'\u9FFB') || (cp >= U'\uA000' && cp <= U'\uA078'); + case 0x0D: // [13] A079 - ACC3 + { + TOML_ASSUME(cp >= U'\uA079' && cp <= U'\uACC3'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x00000000000FFFFFull, 0xFFFFFFFFFF800000ull, 0xFFFFFFFFFFFFFF9Full, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0006007FFF8FFFFFull, 0x003FFFFFFFFFFF80ull, + 0xFFFFFF9FFFFFFFC0ull, 0x00001FFFFFFFFFFFull, 0xFFFFFE7FC0000000ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFCFFFFull, 0xF00000000003FE7Full, 0x000003FFFFFBDDFFull, 0x07FFFFFFFFFFFF80ull, + 0x07FFFFFFFFFFFE00ull, 0x7E00000000000000ull, 0xFF801FFFFFFE0034ull, 0xFFFFFF8000003FFFull, + 0x03FFFFFFFFFFF80Full, 0x007FEF8000400000ull, 0x0000FFFFFFFFFFBEull, 0x3FFFFF800007FB80ull, + 0x317FFFFFFFFFFFE2ull, 0x0E03FF9C0000029Full, 0xFFBFBF803F3F3F00ull, 0xFF81FFFBFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x000003FFFFFFFFFFull, 0xFFFFFFFFFFFFFF80ull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x00000000000007FFull, + }; + return lookup_table_1[(static_cast(cp) - 0xA079ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xA079ull) % 0x40ull)); + } + case 0x11: return (cp >= U'\uD1A5' && cp <= U'\uD7A2') || (cp >= U'\uD7B0' && cp <= U'\uD7C6') + || (cp >= U'\uD7CB' && cp <= U'\uD7FB'); + case 0x14: // [20] F686 - 102D0 + { + if (cp < U'\uF900') + return false; + TOML_ASSUME(cp <= U'\U000102D0'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFF3FFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0000000003FFFFFFull, + 0x5F7FFDFFA0F8007Full, 0xFFFFFFFFFFFFFFDBull, 0x0003FFFFFFFFFFFFull, 0xFFFFFFFFFFF80000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x3FFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFF0000ull, 0xFFFFFFFFFFFCFFFFull, 0x0FFF0000000000FFull, + 0x0000000000000000ull, 0xFFDF000000000000ull, 0xFFFFFFFFFFFFFFFFull, 0x1FFFFFFFFFFFFFFFull, + 0x07FFFFFE00000000ull, 0xFFFFFFC007FFFFFEull, 0x7FFFFFFFFFFFFFFFull, 0x000000001CFCFCFCull, + 0xB7FFFF7FFFFFEFFFull, 0x000000003FFF3FFFull, 0xFFFFFFFFFFFFFFFFull, 0x07FFFFFFFFFFFFFFull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFFF1FFFFFFFull, 0x000000000001FFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0xF900ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x15: // [21] 102D1 - 10F1B + { + if (cp < U'\U00010300') + return false; + TOML_ASSUME(cp <= U'\U00010F1B'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFE000FFFFFFFFull, 0x003FFFFFFFFF03FDull, 0xFFFFFFFF3FFFFFFFull, 0x000000000000FF0Full, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFF00003FFFFFFFull, 0x0FFFFFFFFF0FFFFFull, + 0xFFFF00FFFFFFFFFFull, 0x0000000FFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x007FFFFFFFFFFFFFull, 0x000000FF003FFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x91BFFFFFFFFFFD3Full, 0x007FFFFF003FFFFFull, 0x000000007FFFFFFFull, 0x0037FFFF00000000ull, + 0x03FFFFFF003FFFFFull, 0x0000000000000000ull, 0xC0FFFFFFFFFFFFFFull, 0x0000000000000000ull, + 0x003FFFFFFEEF0001ull, 0x1FFFFFFF00000000ull, 0x000000001FFFFFFFull, 0x0000001FFFFFFEFFull, + 0x003FFFFFFFFFFFFFull, 0x0007FFFF003FFFFFull, 0x000000000003FFFFull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0x00000000000001FFull, 0x0007FFFFFFFFFFFFull, 0x0007FFFFFFFFFFFFull, + 0x0000000FFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x000303FFFFFFFFFFull, 0x0000000000000000ull, + 0x000000000FFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x10300ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x16: // [22] 10F1C - 11B66 + { + if (cp > U'\U00011AF8') + return false; + TOML_ASSUME(cp >= U'\U00010F1C'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0x000003FFFFF00801ull, 0x0000000000000000ull, 0x000001FFFFF00000ull, 0xFFFFFF8007FFFFF0ull, + 0x000000000FFFFFFFull, 0xFFFFFF8000000000ull, 0xFFF00000000FFFFFull, 0xFFFFFF8000001FFFull, + 0xFFF00900000007FFull, 0xFFFFFF80047FFFFFull, 0x400001E0007FFFFFull, 0xFFBFFFF000000001ull, + 0x000000000000FFFFull, 0xFFFBD7F000000000ull, 0xFFFFFFFFFFF01FFBull, 0xFF99FE0000000007ull, + 0x001000023EDFDFFFull, 0x000000000000003Eull, 0x0000000000000000ull, 0xFFFFFFF000000000ull, + 0x0000780001FFFFFFull, 0xFFFFFFF000000038ull, 0x00000B00000FFFFFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0xFFFFFFF000000000ull, 0xF00000000007FFFFull, 0xFFFFFFF000000000ull, + 0x00000100000FFFFFull, 0xFFFFFFF000000000ull, 0x0000000010007FFFull, 0x7FFFFFF000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFF000000000ull, + 0x000000000000FFFFull, 0x0000000000000000ull, 0xFFFFFFFFFFFFFFF0ull, 0xF6FF27F80000000Full, + 0x00000028000FFFFFull, 0x0000000000000000ull, 0x001FFFFFFFFFCFF0ull, 0xFFFF8010000000A0ull, + 0x00100000407FFFFFull, 0x00003FFFFFFFFFFFull, 0xFFFFFFF000000002ull, 0x000000001FFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x10F1Cull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x10F1Cull) % 0x40ull)); + } + case 0x17: // [23] 11B67 - 127B1 + { + if (cp < U'\U00011C00' || cp > U'\U00012543') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00007FFFFFFFFDFFull, 0xFFFC000000000001ull, 0x000000000000FFFFull, 0x0000000000000000ull, + 0x0001FFFFFFFFFB7Full, 0xFFFFFDBF00000040ull, 0x00000000010003FFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0007FFFF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0001000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0000000003FFFFFFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x000000000000000Full, + }; + return lookup_table_1[(static_cast(cp) - 0x11C00ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x18: return cp >= U'\U00013000'; + case 0x19: return cp <= U'\U0001342E'; + case 0x1A: return (cp >= U'\U00014400' && cp <= U'\U00014646'); + case 0x1D: // [29] 16529 - 17173 + { + if (cp < U'\U00016800') + return false; + TOML_ASSUME(cp <= U'\U00017173'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x01FFFFFFFFFFFFFFull, 0x000000007FFFFFFFull, 0x0000000000000000ull, 0x00003FFFFFFF0000ull, + 0x0000FFFFFFFFFFFFull, 0xE0FFFFF80000000Full, 0x000000000000FFFFull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0xFFFFFFFFFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0x00000000000107FFull, 0x00000000FFF80000ull, 0x0000000B00000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0x000FFFFFFFFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x16800ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x1F: return (cp >= U'\U00017DBF' && cp <= U'\U000187F6') || (cp >= U'\U00018800' && cp <= U'\U00018A09'); + case 0x20: return (cp >= U'\U00018A0A' && cp <= U'\U00018CD5') || (cp >= U'\U00018D00' && cp <= U'\U00018D07'); + case 0x23: // [35] 1AEEB - 1BB35 + { + if (cp < U'\U0001B000' || cp > U'\U0001B2FB') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0x000000007FFFFFFFull, 0xFFFF00F000070000ull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x0FFFFFFFFFFFFFFFull, + }; + return lookup_table_1[(static_cast(cp) - 0x1B000ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x24: // [36] 1BB36 - 1C780 + { + if (cp < U'\U0001BC00' || cp > U'\U0001BC99') + return false; + + switch ((static_cast(cp) - 0x1BC00ull) / 0x40ull) + { + case 0x01: return (cp <= U'\U0001BC7C' && (1ull << (static_cast(cp) - 0x1BC40ull)) & 0x1FFF07FFFFFFFFFFull); + case 0x02: return (1u << (static_cast(cp) - 0x1BC80u)) & 0x3FF01FFu; default: return true; } } - case 1: + case 0x26: // [38] 1D3CC - 1E016 { - if (codepoint < U'\u0D04' || codepoint > U'\u191E') + if (cp < U'\U0001D400' || cp > U'\U0001D7CB') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0D04', U'\u191E'); - switch ((static_cast(codepoint) - 0xD04u) / 64u) + constexpr uint_least64_t lookup_table_1[] = { - case 0: return (1ull << (static_cast(codepoint) - 0xD04ull)) & 0x27FFFFFFFFFDDFFull; - case 1: return (1ull << (static_cast(codepoint) - 0xD4Eull)) & 0x3F000000E01C1ull; - case 2: return (1ull << (static_cast(codepoint) - 0xD85ull)) & 0x797FDFFFFFE3FFFFull; - case 3: return codepoint <= U'\u0DC6' || codepoint >= U'\u0E01'; - case 4: return (1ull << (static_cast(codepoint) - 0xE04ull)) & 0xF000DFFFFFFFFFFFull; - case 5: return codepoint <= U'\u0E46' || codepoint >= U'\u0E81'; - case 6: return (1ull << (static_cast(codepoint) - 0xE84ull)) & 0xF200DFFAFFFFFF7Dull; - case 7: return (1ull << (static_cast(codepoint) - 0xEC4ull)) & 0x100000000F000005ull; - case 9: return codepoint != U'\u0F48'; - case 12: return codepoint <= U'\u102A' || codepoint >= U'\u103F'; - case 13: return (1ull << (static_cast(codepoint) - 0x1050ull)) & 0x3FFE1C0623C3Full; - case 14: return codepoint <= U'\u108E' || codepoint >= U'\u10A0'; - case 15: return (1ull << (static_cast(codepoint) - 0x10C4ull)) & 0xFF7FFFFFFFFFF20Bull; - case 21: return (1ull << (static_cast(codepoint) - 0x1244ull)) & 0xFFFFFFFFF3D7F3DFull; - case 22: return (1ull << (static_cast(codepoint) - 0x1284ull)) & 0xD7F3DFFFFFFFF3DFull; - case 23: return (1ull << (static_cast(codepoint) - 0x12C4ull)) & 0xFFFFFFFFFFF7FFF3ull; - case 24: return (1ull << (static_cast(codepoint) - 0x1304ull)) & 0xFFFFFFFFFFF3DFFFull; - case 25: return codepoint <= U'\u135A' || codepoint >= U'\u1380'; - case 26: return codepoint <= U'\u138F' || codepoint >= U'\u13A0'; - case 27: return (1ull << (static_cast(codepoint) - 0x13C4ull)) & 0xE3F3FFFFFFFFFFFFull; - case 37: return (1ull << (static_cast(codepoint) - 0x1644ull)) & 0xEFFFF9FFFFFFFFFFull; - case 38: return codepoint <= U'\u169A' || codepoint >= U'\u16A0'; - case 39: return (1ull << (static_cast(codepoint) - 0x16C4ull)) & 0xF01FE07FFFFFFFFFull; - case 40: return (1ull << (static_cast(codepoint) - 0x1704ull)) & 0xF0003FFFF0003DFFull; - case 41: return (1ull << (static_cast(codepoint) - 0x1744ull)) & 0xF0001DFFF0003FFFull; - case 43: return codepoint <= U'\u17D7' || codepoint >= U'\u17DC'; - case 45: return codepoint <= U'\u1878' || codepoint >= U'\u1880'; - case 46: return (1ull << (static_cast(codepoint) - 0x1884ull)) & 0xFFFFF05FFFFFFFF9ull; - case 47: return codepoint <= U'\u18F5' || codepoint >= U'\u1900'; - default: return true; - } + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFDFFFFFull, 0xEBFFDE64DFFFFFFFull, 0xFFFFFFFFFFFFFFEFull, + 0x7BFFFFFFDFDFE7BFull, 0xFFFFFFFFFFFDFC5Full, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFF3FFFFFFFFFull, 0xF7FFFFFFF7FFFFFDull, + 0xFFDFFFFFFFDFFFFFull, 0xFFFF7FFFFFFF7FFFull, 0xFFFFFDFFFFFFFDFFull, 0x0000000000000FF7ull, + }; + return lookup_table_1[(static_cast(cp) - 0x1D400ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); } - case 2: + case 0x27: // [39] 1E017 - 1EC61 { - if (codepoint < U'\u1950' || codepoint > U'\u2184') + if (cp < U'\U0001E100' || cp > U'\U0001E94B') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u1950', U'\u2184'); - switch ((static_cast(codepoint) - 0x1950u) / 64u) + constexpr uint_least64_t lookup_table_1[] = { - case 0: return (1ull << (static_cast(codepoint) - 0x1950ull)) & 0xFFFF001F3FFFFFFFull; - case 1: return codepoint <= U'\u19AB' || codepoint >= U'\u19B0'; - case 3: return codepoint <= U'\u1A16' || codepoint >= U'\u1A20'; - case 7: return codepoint <= U'\u1B33' || codepoint >= U'\u1B45'; - case 9: return (1ull << (static_cast(codepoint) - 0x1B90ull)) & 0xFFFFFC00C001FFFFull; - case 10: return codepoint <= U'\u1BE5' || codepoint >= U'\u1C00'; - case 11: return codepoint <= U'\u1C23' || codepoint >= U'\u1C4D'; - case 12: return codepoint <= U'\u1C7D' || codepoint >= U'\u1C80'; - case 13: return codepoint <= U'\u1CBA' || codepoint >= U'\u1CBD'; - case 14: return (1ull << (static_cast(codepoint) - 0x1CE9ull)) & 0x7FFF8237EFull; - case 23: return (1ull << (static_cast(codepoint) - 0x1F10ull)) & 0x3F3FFFFFFFFF3F3Full; - case 24: return (1ull << (static_cast(codepoint) - 0x1F50ull)) & 0xFFFF3FFFFFFFAAFFull; - case 25: return (1ull << (static_cast(codepoint) - 0x1F90ull)) & 0x1FDC5FDFFFFFFFFFull; - case 26: return (1ull << (static_cast(codepoint) - 0x1FD0ull)) & 0x1FDC1FFF0FCFull; - case 27: return false; - case 28: return codepoint <= U'\u2071' || codepoint >= U'\u207F'; - case 30: return (1u << (static_cast(codepoint) - 0x2102u)) & 0x3F21u; - case 31: return (1ull << (static_cast(codepoint) - 0x2110ull)) & 0x43E0F3FFBD503E2Full; - default: return true; - } + 0x3F801FFFFFFFFFFFull, 0x0000000000004000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x00000FFFFFFFFFFFull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x000000000000001Full, + 0xFFFFFFFFFFFFFFFFull, 0x000000000000080Full, + }; + return lookup_table_1[(static_cast(cp) - 0x1E100ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); } - case 3: + case 0x28: // [40] 1EC62 - 1F8AC { - if (codepoint < U'\u2C00' || codepoint > U'\u31BF') + if (cp < U'\U0001EE00' || cp > U'\U0001EEBB') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u2C00', U'\u31BF'); - switch ((static_cast(codepoint) - 0x2C00u) / 64u) + switch ((static_cast(cp) - 0x1EE00ull) / 0x40ull) { - case 0: return codepoint != U'\u2C2F'; - case 1: return codepoint != U'\u2C5F'; - case 2: return true; - case 3: return (1ull << (static_cast(codepoint) - 0x2CC0ull)) & 0xC781FFFFFFFFFull; - case 4: return (1ull << (static_cast(codepoint) - 0x2D00ull)) & 0xFFFF20BFFFFFFFFFull; - case 5: return codepoint <= U'\u2D67' || codepoint >= U'\u2D6F'; - case 6: return (1ull << (static_cast(codepoint) - 0x2D80ull)) & 0x7F7F7F7F007FFFFFull; - case 7: return (1u << (static_cast(codepoint) - 0x2DC0u)) & 0x7F7F7F7Fu; - case 8: return true; - case 16: return (1ull << (static_cast(codepoint) - 0x3005ull)) & 0xC1F00000000003ull; - case 17: return true; - case 18: return (1ull << (static_cast(codepoint) - 0x3080ull)) & 0xFFFFFFFEE07FFFFFull; - case 19: return codepoint != U'\u30FB'; - case 20: return codepoint != U'\u3100'; - case 21: return true; - case 22: return codepoint <= U'\u318E' || codepoint >= U'\u31A0'; - default: return false; - } - } - case 4: return codepoint <= U'\u31FF' || codepoint >= U'\u3400'; - case 6: return codepoint <= U'\u4DBE' || codepoint >= U'\u4E00'; - case 12: return codepoint <= U'\u9FFB' || codepoint >= U'\uA000'; - case 13: - { - if (codepoint < U'\uA079' || codepoint > U'\uACC3') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uA079', U'\uACC3'); - switch ((static_cast(codepoint) - 0xA079u) / 63u) - { - case 18: return codepoint <= U'\uA4FD' || codepoint >= U'\uA500'; - case 22: return codepoint <= U'\uA60C' || codepoint >= U'\uA610'; - case 23: return codepoint <= U'\uA62B' || codepoint >= U'\uA640'; - case 24: return codepoint <= U'\uA66E' || codepoint >= U'\uA67F'; - case 26: return codepoint <= U'\uA6E5' || codepoint >= U'\uA717'; - case 27: return codepoint <= U'\uA71F' || codepoint >= U'\uA722'; - case 28: return codepoint <= U'\uA788' || codepoint >= U'\uA78B'; - case 29: return codepoint <= U'\uA7BF' || codepoint >= U'\uA7C2'; - case 30: return (1ull << (static_cast(codepoint) - 0xA7F5ull)) & 0x1FFFBDDFFFull; - case 31: return codepoint <= U'\uA822' || codepoint >= U'\uA840'; - case 32: return codepoint <= U'\uA873' || codepoint >= U'\uA882'; - case 34: return (1ull << (static_cast(codepoint) - 0xA8F2ull)) & 0xFFF001A3Full; - case 35: return codepoint <= U'\uA925' || codepoint >= U'\uA930'; - case 36: return codepoint <= U'\uA97C' || codepoint >= U'\uA984'; - case 37: return codepoint <= U'\uA9B2' || codepoint >= U'\uA9CF'; - case 38: return (1ull << (static_cast(codepoint) - 0xA9E0ull)) & 0x3FFFF7C00FFDFull; - case 39: return (1ull << (static_cast(codepoint) - 0xAA12ull)) & 0x3FDC000007FFFFFull; - case 40: return (1ull << (static_cast(codepoint) - 0xAA60ull)) & 0xFFFFC47FFFFFull; - case 41: return (1ull << (static_cast(codepoint) - 0xAA90ull)) & 0x53E62FFFFFFFFull; - case 42: return (1ull << (static_cast(codepoint) - 0xAADBull)) & 0x7CFC00380FFE7ull; - case 43: return (1ull << (static_cast(codepoint) - 0xAB0Eull)) & 0x7FFFFFFDFDFC01F9ull; - case 44: return (1ull << (static_cast(codepoint) - 0xAB4Dull)) & 0x7FFFFFF81FFFBFFFull; - case 46: return codepoint <= U'\uABE2' || codepoint >= U'\uAC00'; - default: return true; - } - } - case 17: return codepoint <= U'\uD7A2' || (codepoint >= U'\uD7B0' && codepoint <= U'\uD7C6') - || codepoint >= U'\uD7CB'; - case 18: return false; - case 19: return false; - case 20: - { - if (codepoint < U'\uF900' || codepoint > U'\U000102D0') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uF900', U'\U000102D0'); - switch ((static_cast(codepoint) - 0xF900u) / 63u) - { - case 5: return codepoint <= U'\uFA6D' || codepoint >= U'\uFA70'; - case 8: return (1ull << (static_cast(codepoint) - 0xFB00ull)) & 0x7FFDFFA0F8007Full; - case 9: return (1ull << (static_cast(codepoint) - 0xFB38ull)) & 0x3FFFFFFFFFFFDB5Full; - case 17: return codepoint <= U'\uFD3D' || codepoint >= U'\uFD50'; - case 18: return codepoint <= U'\uFD8F' || codepoint >= U'\uFD92'; - case 21: return false; - case 22: return codepoint != U'\uFE6A'; - case 24: return codepoint <= U'\uFEFC' || codepoint >= U'\uFF21'; - case 25: return codepoint <= U'\uFF3A' || codepoint >= U'\uFF41'; - case 27: return (1ull << (static_cast(codepoint) - 0xFFA5ull)) & 0xE7E7E7E3FFFFFFull; - case 28: return codepoint != U'\uFFE4'; - case 29: return (1ull << (static_cast(codepoint) - 0x10023ull)) & 0x7FFE7FFF6FFFFEFull; - case 33: return false; - case 34: return false; - case 35: return false; - case 36: return false; - case 37: return false; - case 39: return codepoint <= U'\U0001029C' || codepoint >= U'\U000102A0'; - default: return true; - } - } - case 21: - { - if (codepoint < U'\U00010300' || codepoint > U'\U00010F1B') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010300', U'\U00010F1B'); - switch ((static_cast(codepoint) - 0x10300u) / 64u) - { - case 0: return codepoint <= U'\U0001031F' || codepoint >= U'\U0001032D'; - case 1: return (1ull << (static_cast(codepoint) - 0x10340ull)) & 0x3FFFFFFFFF03FDull; - case 2: return codepoint <= U'\U0001039D' || codepoint >= U'\U000103A0'; - case 3: return codepoint <= U'\U000103C3' || codepoint >= U'\U000103C8'; - case 6: return codepoint <= U'\U0001049D' || codepoint >= U'\U000104B0'; - case 7: return codepoint <= U'\U000104D3' || codepoint >= U'\U000104D8'; - case 8: return codepoint <= U'\U00010527' || codepoint >= U'\U00010530'; - case 10: return false; - case 11: return false; - case 17: return codepoint <= U'\U00010755' || codepoint >= U'\U00010760'; - case 18: return false; - case 19: return false; - case 20: return (1ull << (static_cast(codepoint) - 0x10800ull)) & 0x91BFFFFFFFFFFD3Full; - case 21: return codepoint <= U'\U00010855' || codepoint >= U'\U00010860'; - case 23: return codepoint != U'\U000108C0'; - case 24: return codepoint <= U'\U00010915' || codepoint >= U'\U00010920'; - case 25: return false; - case 26: return codepoint <= U'\U000109B7' || codepoint >= U'\U000109BE'; - case 27: return false; - case 28: return (1ull << (static_cast(codepoint) - 0x10A00ull)) & 0x3FFFFFFEEF0001ull; - case 31: return codepoint != U'\U00010AC8'; - case 33: return codepoint <= U'\U00010B55' || codepoint >= U'\U00010B60'; - case 35: return false; - case 41: return false; - case 42: return false; - case 43: return false; - case 44: return false; - case 45: return false; - case 46: return codepoint <= U'\U00010EA9' || codepoint >= U'\U00010EB0'; - case 47: return false; - default: return true; - } - } - case 22: - { - if (codepoint < U'\U00010F1C' || codepoint > U'\U00011AF8') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010F1C', U'\U00011AF8'); - switch ((static_cast(codepoint) - 0x10F1Cu) / 64u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x10F1Cull)) & 0x3FFFFF00801ull; - case 1: return false; - case 3: return codepoint <= U'\U00010FF6' || codepoint >= U'\U00011003'; - case 6: return codepoint <= U'\U000110AF' || codepoint >= U'\U000110D0'; - case 7: return codepoint <= U'\U000110E8' || codepoint >= U'\U00011103'; - case 8: return (1ull << (static_cast(codepoint) - 0x1111Cull)) & 0xFFF00900000007FFull; - case 9: return (1ull << (static_cast(codepoint) - 0x1115Cull)) & 0xFFFFFF80047FFFFFull; - case 10: return (1ull << (static_cast(codepoint) - 0x1119Cull)) & 0x400001E0007FFFFFull; - case 11: return (1ull << (static_cast(codepoint) - 0x111DCull)) & 0xFFBFFFF000000001ull; - case 13: return (1u << (static_cast(codepoint) - 0x11280u)) & 0xFFFBD7Fu; - case 14: return (1ull << (static_cast(codepoint) - 0x1129Cull)) & 0xFFFFFFFFFFF01FFBull; - case 15: return (1ull << (static_cast(codepoint) - 0x112DCull)) & 0xFF99FE0000000007ull; - case 16: return (1ull << (static_cast(codepoint) - 0x1131Cull)) & 0x1000023EDFDFFFull; - case 18: return false; - case 20: return codepoint <= U'\U00011434' || codepoint >= U'\U00011447'; - case 21: return codepoint <= U'\U00011461' || codepoint >= U'\U00011480'; - case 22: return (1ull << (static_cast(codepoint) - 0x1149Cull)) & 0xB00000FFFFFull; - case 23: return false; - case 24: return false; - case 26: return codepoint <= U'\U000115AE' || codepoint >= U'\U000115D8'; - case 28: return codepoint <= U'\U0001162F' || codepoint >= U'\U00011644'; - case 30: return codepoint <= U'\U000116AA' || codepoint >= U'\U000116B8'; - case 32: return false; - case 33: return false; - case 34: return false; - case 37: return false; - case 39: return (1ull << (static_cast(codepoint) - 0x118DCull)) & 0xF6FF27F80000000Full; - case 40: return (1ull << (static_cast(codepoint) - 0x1191Cull)) & 0x28000FFFFFull; - case 41: return false; - case 42: return codepoint <= U'\U000119A7' || codepoint >= U'\U000119AA'; - case 43: return (1ull << (static_cast(codepoint) - 0x119E1ull)) & 0x7FFFC0080000005ull; - case 44: return (1ull << (static_cast(codepoint) - 0x11A1Cull)) & 0x100000407FFFFFull; - case 46: return codepoint <= U'\U00011A9D' || codepoint >= U'\U00011AC0'; - default: return true; - } - } - case 23: - { - if (codepoint < U'\U00011C00' || codepoint > U'\U00012543') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00011C00', U'\U00012543'); - switch ((static_cast(codepoint) - 0x11C00u) / 63u) - { - case 0: return codepoint != U'\U00011C09'; - case 1: return codepoint <= U'\U00011C40' || codepoint >= U'\U00011C72'; - case 3: return false; - case 4: return (1ull << (static_cast(codepoint) - 0x11D00ull)) & 0x1FFFFFFFFFB7Full; - case 5: return (1ull << (static_cast(codepoint) - 0x11D46ull)) & 0xFFFF6FC000001ull; - case 6: return codepoint <= U'\U00011D89' || codepoint >= U'\U00011D98'; - case 7: return false; - case 8: return false; - case 9: return false; - case 10: return false; - case 12: return false; - case 13: return false; - case 15: return false; - case 31: return false; - case 32: return false; - case 33: return false; - default: return true; - } - } - case 27: return false; - case 28: return false; - case 29: - { - if (codepoint < U'\U00016800' || codepoint > U'\U00017173') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00016800', U'\U00017173'); - switch ((static_cast(codepoint) - 0x16800u) / 64u) - { - case 10: return false; - case 13: return (1ull << (static_cast(codepoint) - 0x16B40ull)) & 0xE0FFFFF80000000Full; - case 15: return false; - case 16: return false; - case 17: return false; - case 18: return false; - case 19: return false; - case 20: return false; - case 21: return false; - case 22: return false; - case 23: return false; - case 24: return false; - case 26: return false; - case 27: return false; - case 29: return codepoint <= U'\U00016F4A' || codepoint >= U'\U00016F50'; - case 31: return codepoint != U'\U00016FC0'; - default: return true; - } - } - case 31: return codepoint <= U'\U000187F6' || codepoint >= U'\U00018800'; - case 32: return codepoint <= U'\U00018CD5' || codepoint >= U'\U00018D00'; - case 33: return false; - case 34: return false; - case 35: return codepoint <= U'\U0001B11E' || (codepoint >= U'\U0001B150' && codepoint <= U'\U0001B152') - || (codepoint >= U'\U0001B164' && codepoint <= U'\U0001B167') || codepoint >= U'\U0001B170'; - case 36: return codepoint <= U'\U0001BC6A' || (codepoint >= U'\U0001BC70' && codepoint <= U'\U0001BC7C') - || (codepoint >= U'\U0001BC80' && codepoint <= U'\U0001BC88') || codepoint >= U'\U0001BC90'; - case 37: return false; - case 38: - { - if (codepoint < U'\U0001D400' || codepoint > U'\U0001D7CB') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001D400', U'\U0001D7CB'); - switch ((static_cast(codepoint) - 0x1D400u) / 61u) - { - case 1: return codepoint != U'\U0001D455'; - case 2: return (1ull << (static_cast(codepoint) - 0x1D47Aull)) & 0x1FF79937FFFFFFFFull; - case 3: return (1ull << (static_cast(codepoint) - 0x1D4B7ull)) & 0x1FFFFFFFFFFFDFD7ull; - case 4: return (1ull << (static_cast(codepoint) - 0x1D4F4ull)) & 0x1FFFFDFDFE7BFFFFull; - case 5: return (1ull << (static_cast(codepoint) - 0x1D531ull)) & 0x1FFFFFFEFE2FBDFFull; - case 11: return (1ull << (static_cast(codepoint) - 0x1D69Full)) & 0xFFFFFFBFFFFFE7Full; - case 12: return (1ull << (static_cast(codepoint) - 0x1D6DCull)) & 0x1DFFFFFF7FFFFFFFull; - case 13: return (1ull << (static_cast(codepoint) - 0x1D719ull)) & 0x1FBFFFFFEFFFFFFFull; - case 14: return (1ull << (static_cast(codepoint) - 0x1D756ull)) & 0x1FF7FFFFFDFFFFFFull; - case 15: return (1ull << (static_cast(codepoint) - 0x1D793ull)) & 0x1FEFFFFFFBFFFFFull; - default: return true; - } - } - case 39: - { - if (codepoint < U'\U0001E100' || codepoint > U'\U0001E94B') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001E100', U'\U0001E94B'); - switch ((static_cast(codepoint) - 0x1E100u) / 63u) - { - case 0: return codepoint <= U'\U0001E12C' || codepoint >= U'\U0001E137'; - case 1: return true; - case 7: return true; - case 28: return true; - case 29: return true; - case 30: return true; - case 31: return true; - case 32: return true; - case 33: return codepoint <= U'\U0001E943' || codepoint >= U'\U0001E94B'; - default: return false; - } - } - case 40: - { - if (codepoint < U'\U0001EE00' || codepoint > U'\U0001EEBB') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001EE00', U'\U0001EEBB'); - switch ((static_cast(codepoint) - 0x1EE00u) / 63u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x1EE00ull)) & 0xAF7FE96FFFFFFEFull; - case 1: return (1ull << (static_cast(codepoint) - 0x1EE42ull)) & 0x7BDFDE5AAA5BAA1ull; - case 2: return (1ull << (static_cast(codepoint) - 0x1EE7Eull)) & 0x3FFFEFB83FFFEFFDull; + case 0x00: return (cp <= U'\U0001EE3B' && (1ull << (static_cast(cp) - 0x1EE00ull)) & 0xAF7FE96FFFFFFEFull); + case 0x01: return (cp >= U'\U0001EE42' && cp <= U'\U0001EE7E' && (1ull << (static_cast(cp) - 0x1EE42ull)) & 0x17BDFDE5AAA5BAA1ull); + case 0x02: return (1ull << (static_cast(cp) - 0x1EE80ull)) & 0xFFFFBEE0FFFFBFFull; TOML_NO_DEFAULT_CASE; } } - case 55: return codepoint <= U'\U0002A6DC' || codepoint >= U'\U0002A700'; - case 56: return codepoint <= U'\U0002B733' || (codepoint >= U'\U0002B740' && codepoint <= U'\U0002B81C') - || codepoint >= U'\U0002B820'; - case 58: return codepoint <= U'\U0002CEA0' || codepoint >= U'\U0002CEB0'; - default: return true; + case 0x29: return cp >= U'\U00020000'; + case 0x37: return (cp >= U'\U0002A4C7' && cp <= U'\U0002A6DC') || (cp >= U'\U0002A700' && cp <= U'\U0002B111'); + case 0x38: return (cp >= U'\U0002B112' && cp <= U'\U0002B733') || (cp >= U'\U0002B740' && cp <= U'\U0002B81C') + || (cp >= U'\U0002B820' && cp <= U'\U0002BD5C'); + case 0x3A: return (cp >= U'\U0002C9A8' && cp <= U'\U0002CEA0') || (cp >= U'\U0002CEB0' && cp <= U'\U0002D5F2'); + case 0x3C: return cp <= U'\U0002EBDF'; + case 0x3D: return (cp >= U'\U0002F800' && cp <= U'\U0002FA1D'); + case 0x3E: return cp >= U'\U00030000'; + TOML_NO_DEFAULT_CASE; } } [[nodiscard]] TOML_GNU_ATTR(const) - constexpr bool is_unicode_number(char32_t codepoint) noexcept + constexpr bool is_unicode_number(char32_t cp) noexcept { - if (codepoint < U'\u0660' || codepoint > U'\U0001FBF9') + if (cp < U'\u0660' || cp > U'\U0001FBF9') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0660', U'\U0001FBF9'); - switch ((static_cast(codepoint) - 0x660u) / 2007u) + const auto child_index_0 = (static_cast(cp) - 0x660ull) / 0x7D7ull; + if ((1ull << child_index_0) & 0x47FFDFE07FCFFFD0ull) + return false; + switch (child_index_0) { - case 0: + case 0x00: // [0] 0660 - 0E36 { - if (codepoint > U'\u0DEF') + if (cp > U'\u0DEF') return false; + TOML_ASSUME(cp >= U'\u0660'); - return ((static_cast(codepoint) - 0x660u) / 63u) & 0x55555025ull; - } - case 1: - { - if (codepoint < U'\u0E50' || codepoint > U'\u1099') - return false; - - return ((static_cast(codepoint) - 0xE50u) / 59u) & 0x30Dull; - } - case 2: - { - if (codepoint < U'\u16EE' || codepoint > U'\u1C59') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u16EE', U'\u1C59'); - switch ((static_cast(codepoint) - 0x16EEu) / 64u) + constexpr uint_least64_t lookup_table_1[] = { - case 0: return true; - case 3: return true; - case 4: return true; - case 9: return true; - case 11: return true; - case 14: return codepoint <= U'\u1A89' || codepoint >= U'\u1A90'; - case 17: return true; - case 19: return true; - case 21: return codepoint <= U'\u1C49' || codepoint >= U'\u1C50'; - default: return false; - } + 0x00000000000003FFull, 0x0000000000000000ull, 0x0000000003FF0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, 0x0000000000000000ull, + 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000000000000FFC0ull, + }; + return lookup_table_1[(static_cast(cp) - 0x660ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x660ull) % 0x40ull)); } - case 3: return codepoint <= U'\u2182' || codepoint >= U'\u2185'; - case 5: return (1ull << (static_cast(codepoint) - 0x3007ull)) & 0xE0007FC000001ull; - case 20: + case 0x01: // [1] 0E37 - 160D { - if (codepoint < U'\uA620' || codepoint > U'\uAA59') + if (cp < U'\u0E50' || cp > U'\u1099') return false; - return ((static_cast(codepoint) - 0xA620u) / 64u) & 0x1CC09ull; - } - case 21: return true; - case 31: return true; - case 32: - { - if (codepoint < U'\U00010140' || codepoint > U'\U000104A9') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010140', U'\U000104A9'); - switch ((static_cast(codepoint) - 0x10140u) / 63u) + constexpr uint_least64_t lookup_table_1[] = { - case 0: return true; - case 8: return codepoint <= U'\U00010341' || codepoint >= U'\U0001034A'; - case 10: return true; - case 13: return true; - default: return false; - } + 0x00000000000003FFull, 0x0000000000000000ull, 0x00000000000003FFull, 0x0000000003FF0000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x03FF000000000000ull, + 0x0000000000000000ull, 0x00000000000003FFull, + }; + return lookup_table_1[(static_cast(cp) - 0xE50ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xE50ull) % 0x40ull)); } - case 33: return codepoint <= U'\U00010D39' || codepoint >= U'\U00011066'; - case 34: + case 0x02: // [2] 160E - 1DE4 { - if (codepoint < U'\U000110F0' || codepoint > U'\U00011739') + if (cp < U'\u16EE' || cp > U'\u1C59') return false; - return ((static_cast(codepoint) - 0x110F0u) / 62u) & 0x341610Bull; + constexpr uint_least64_t lookup_table_1[] = + { + 0x0000000000000007ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0FFC000000000000ull, + 0x00000FFC00000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x00000003FF000000ull, 0x0000000000000000ull, 0x00000FFC00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x00000FFC0FFC0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x00000FFC00000000ull, 0x0000000000000000ull, 0x0000000000000FFCull, + 0x0000000000000000ull, 0x00000FFC0FFC0000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x16EEull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x16EEull) % 0x40ull)); } - case 35: + case 0x03: return (cp >= U'\u2160' && cp <= U'\u2188' && (1ull << (static_cast(cp) - 0x2160ull)) & 0x1E7FFFFFFFFull); + case 0x05: return (cp >= U'\u3007' && cp <= U'\u303A' && (1ull << (static_cast(cp) - 0x3007ull)) & 0xE0007FC000001ull); + case 0x14: // [20] A32C - AB02 { - if (codepoint < U'\U000118E0' || codepoint > U'\U00011DA9') + if (cp < U'\uA620' || cp > U'\uAA59') return false; - return ((static_cast(codepoint) - 0x118E0u) / 62u) & 0xC4003ull; + constexpr uint_least64_t lookup_table_1[] = + { + 0x00000000000003FFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x000000000000FFC0ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x03FF000000000000ull, 0x000003FF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x03FF000000000000ull, 0x0000000003FF0000ull, + 0x03FF000000000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0xA620ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xA620ull) % 0x40ull)); } - case 36: return true; - case 45: return codepoint <= U'\U00016A69' || codepoint >= U'\U00016B50'; - case 59: return true; - case 60: return codepoint <= U'\U0001E149' || codepoint >= U'\U0001E2F0'; - case 61: return true; - case 63: return true; - default: return false; + case 0x15: return (cp >= U'\uABF0' && cp <= U'\uABF9'); + case 0x1F: return (cp >= U'\uFF10' && cp <= U'\uFF19'); + case 0x20: // [32] 10140 - 10916 + { + if (cp > U'\U000104A9') + return false; + TOML_ASSUME(cp >= U'\U00010140'); + + constexpr uint_least64_t lookup_table_1[] = + { + 0x001FFFFFFFFFFFFFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000402ull, 0x0000000000000000ull, 0x00000000003E0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x10140ull) / 0x40ull] + & (0x1ull << (static_cast(cp) % 0x40ull)); + } + case 0x21: return (cp >= U'\U00010D30' && cp <= U'\U00010D39') || (cp >= U'\U00011066' && cp <= U'\U0001106F'); + case 0x22: // [34] 110EE - 118C4 + { + if (cp < U'\U000110F0' || cp > U'\U00011739') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00000000000003FFull, 0x000000000000FFC0ull, 0x0000000000000000ull, 0x000003FF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x00000000000003FFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, 0x0000000000000000ull, 0x000003FF00000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x000003FF00000000ull, 0x0000000000000000ull, 0x0000000003FF0000ull, + 0x0000000000000000ull, 0x00000000000003FFull, + }; + return lookup_table_1[(static_cast(cp) - 0x110F0ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x110F0ull) % 0x40ull)); + } + case 0x23: // [35] 118C5 - 1209B + { + if (cp < U'\U000118E0' || cp > U'\U00011DA9') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x00000000000003FFull, 0x03FF000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x03FF000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x03FF000000000000ull, 0x0000000000000000ull, 0x00000000000003FFull, + }; + return lookup_table_1[(static_cast(cp) - 0x118E0ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x118E0ull) % 0x40ull)); + } + case 0x24: return (cp >= U'\U00012400' && cp <= U'\U0001246E'); + case 0x2D: return (cp >= U'\U00016A60' && cp <= U'\U00016A69') || (cp >= U'\U00016B50' && cp <= U'\U00016B59'); + case 0x3B: return (cp >= U'\U0001D7CE' && cp <= U'\U0001D7FF'); + case 0x3C: return (cp >= U'\U0001E140' && cp <= U'\U0001E149') || (cp >= U'\U0001E2F0' && cp <= U'\U0001E2F9'); + case 0x3D: return (cp >= U'\U0001E950' && cp <= U'\U0001E959'); + case 0x3F: return cp >= U'\U0001FBF0'; + TOML_NO_DEFAULT_CASE; } } [[nodiscard]] TOML_GNU_ATTR(const) - constexpr bool is_unicode_combining_mark(char32_t codepoint) noexcept + constexpr bool is_unicode_combining_mark(char32_t cp) noexcept { - if (codepoint < U'\u0300' || codepoint > U'\U000E01EF') + if (cp < U'\u0300' || cp > U'\U000E01EF') return false; - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0300', U'\U000E01EF'); - switch ((static_cast(codepoint) - 0x300u) / 14332u) + const auto child_index_0 = (static_cast(cp) - 0x300ull) / 0x37FCull; + if ((1ull << child_index_0) & 0x7FFFFFFFFFFFFE02ull) + return false; + switch (child_index_0) { - case 0: + case 0x00: // [0] 0300 - 3AFB { - if (codepoint > U'\u309A') + if (cp > U'\u309A') return false; + TOML_ASSUME(cp >= U'\u0300'); - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0300', U'\u309A'); - switch ((static_cast(codepoint) - 0x300u) / 1460u) + const auto child_index_1 = (static_cast(cp) - 0x300ull) / 0xB7ull; + if ((1ull << child_index_1) & 0x63FFFDC00FB00002ull) + return false; + switch (child_index_1) { - case 0: + case 0x00: return cp <= U'\u036F'; + case 0x02: return (cp >= U'\u0483' && cp <= U'\u0487'); + case 0x03: return (cp >= U'\u0591' && cp <= U'\u05C7' && (1ull << (static_cast(cp) - 0x591ull)) & 0x5B5FFFFFFFFFFFull); + case 0x04: return (cp >= U'\u0610' && cp <= U'\u061A') || (cp >= U'\u064B' && cp <= U'\u065F') + || cp == U'\u0670'; + case 0x05: // [5] 0693 - 0749 { - if (codepoint > U'\u085B') + if (cp < U'\u06D6') return false; + TOML_ASSUME(cp <= U'\u0749'); - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0300', U'\u085B'); - switch ((static_cast(codepoint) - 0x300u) / 63u) + switch ((static_cast(cp) - 0x6D6ull) / 0x40ull) { - case 0: return true; - case 1: return true; - case 6: return true; - case 10: return true; - case 11: return (1u << (static_cast(codepoint) - 0x5B5u)) & 0x5B5FFu; - case 12: return true; - case 13: return codepoint <= U'\u065F' || codepoint >= U'\u0670'; - case 15: return (1u << (static_cast(codepoint) - 0x6D6u)) & 0xF67E7Fu; - case 16: return true; - case 17: return true; - case 18: return true; - case 19: return codepoint <= U'\u07B0' || codepoint >= U'\u07EB'; - case 20: return (1ull << (static_cast(codepoint) - 0x7ECull)) & 0x6EFFBC00000200FFull; - case 21: return codepoint <= U'\u082D' || codepoint >= U'\u0859'; - default: return false; - } - } - case 1: - { - if (codepoint < U'\u08D3' || codepoint > U'\u0E4E') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u08D3', U'\u0E4E'); - switch ((static_cast(codepoint) - 0x8D3u) / 64u) - { - case 0: return codepoint != U'\u08E2'; - case 1: return (1u << (static_cast(codepoint) - 0x93Au)) & 0x1BFFFF7u; - case 2: return (1ull << (static_cast(codepoint) - 0x953ull)) & 0x1C0000001801Full; - case 3: return (1u << (static_cast(codepoint) - 0x9BCu)) & 0x399FDu; - case 4: return (1ull << (static_cast(codepoint) - 0x9D7ull)) & 0x1C8000001801ull; - case 5: return (1u << (static_cast(codepoint) - 0xA3Cu)) & 0x23987Du; - case 6: return (1u << (static_cast(codepoint) - 0xA70u)) & 0xE0023u; - case 7: return (1u << (static_cast(codepoint) - 0xABCu)) & 0x3BBFDu; - case 8: return (1ull << (static_cast(codepoint) - 0xAE2ull)) & 0x3BF000003ull; - case 9: return (1u << (static_cast(codepoint) - 0xB3Cu)) & 0x399FDu; - case 10: return (1ull << (static_cast(codepoint) - 0xB55ull)) & 0x200000006007ull; - case 11: return (1u << (static_cast(codepoint) - 0xBBEu)) & 0xF71Fu; - case 12: return codepoint <= U'\u0BD7' || codepoint >= U'\u0C00'; - case 13: return (1u << (static_cast(codepoint) - 0xC3Eu)) & 0xF77Fu; - case 14: return (1ull << (static_cast(codepoint) - 0xC55ull)) & 0x700000006003ull; - case 15: return (1u << (static_cast(codepoint) - 0xCBCu)) & 0x3DDFDu; - case 16: return (1ull << (static_cast(codepoint) - 0xCD5ull)) & 0x780000006003ull; - case 17: return (1u << (static_cast(codepoint) - 0xD3Bu)) & 0x7BBFBu; - case 18: return (1ull << (static_cast(codepoint) - 0xD57ull)) & 0x1C0000001801ull; - case 19: return codepoint <= U'\u0DCA' || codepoint >= U'\u0DCF'; - case 20: return (1ull << (static_cast(codepoint) - 0xDD3ull)) & 0x180001FEBull; - case 21: return (1u << (static_cast(codepoint) - 0xE31u)) & 0x3FC003F9u; + case 0x00: return (cp <= U'\u0711' && (1ull << (static_cast(cp) - 0x6D6ull)) & 0x800000000F67E7Full); + case 0x01: return cp >= U'\u0730'; TOML_NO_DEFAULT_CASE; } } - case 2: + case 0x06: // [6] 074A - 0800 { - if (codepoint < U'\u0EB1' || codepoint > U'\u135F') + if (cp > U'\u07FD') return false; + TOML_ASSUME(cp >= U'\u074A'); - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u0EB1', U'\u135F'); - switch ((static_cast(codepoint) - 0xEB1u) / 64u) + switch ((static_cast(cp) - 0x74Aull) / 0x40ull) { - case 0: return (1u << (static_cast(codepoint) - 0xEB1u)) & 0x1F800FF9u; - case 1: return true; - case 2: return (1u << (static_cast(codepoint) - 0xF35u)) & 0x615u; - case 3: return (1ull << (static_cast(codepoint) - 0xF71ull)) & 0xFFFFFF7FF06FFFFFull; - case 4: return codepoint <= U'\u0FBC' || codepoint >= U'\u0FC6'; - case 5: return true; - case 6: return (1ull << (static_cast(codepoint) - 0x1031ull)) & 0x1FCEE1E000003FFFull; - case 7: return (1ull << (static_cast(codepoint) - 0x1071ull)) & 0x1E005FFE000Full; - case 18: return true; - default: return false; - } - } - case 3: - { - if (codepoint < U'\u1712' || codepoint > U'\u193B') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u1712', U'\u193B'); - switch ((static_cast(codepoint) - 0x1712u) / 62u) - { - case 0: return codepoint <= U'\u1714' || codepoint >= U'\u1732'; - case 1: return codepoint <= U'\u1753' || codepoint >= U'\u1772'; - case 3: return codepoint <= U'\u17D3' || codepoint >= U'\u17DD'; - case 6: return codepoint <= U'\u1886' || codepoint >= U'\u18A9'; - case 7: return false; - case 8: return codepoint <= U'\u192B' || codepoint >= U'\u1930'; - default: return true; - } - } - case 4: - { - if (codepoint < U'\u1A17' || codepoint > U'\u1DFF') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\u1A17', U'\u1DFF'); - switch ((static_cast(codepoint) - 0x1A17u) / 63u) - { - case 0: return codepoint <= U'\u1A1B' || codepoint >= U'\u1A55'; - case 1: return (1ull << (static_cast(codepoint) - 0x1A56ull)) & 0x27FFFFFFDFFull; - case 2: return codepoint != U'\u1A95'; - case 5: return codepoint <= U'\u1B73' || codepoint >= U'\u1B80'; - case 9: return false; - case 10: return false; - case 11: return (1ull << (static_cast(codepoint) - 0x1CD0ull)) & 0x39021FFFFF7ull; - case 12: return false; - case 13: return false; - case 15: return codepoint != U'\u1DFA'; - default: return true; - } - } - case 5: return (1ull << (static_cast(codepoint) - 0x20D0ull)) & 0x1FFE21FFFull; - case 7: - { - if (codepoint < U'\u2CEF') - return false; - - return ((static_cast(codepoint) - 0x2CEFu) / 63u) & 0x601Dull; - } - default: return false; - } - } - case 2: - { - if (codepoint < U'\uA66F' || codepoint > U'\uAAEF') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uA66F', U'\uAAEF'); - switch ((static_cast(codepoint) - 0xA66Fu) / 61u) - { - case 0: return (1ull << (static_cast(codepoint) - 0xA66Full)) & 0x1800000007FE1ull; - case 1: return false; - case 3: return false; - case 4: return false; - case 5: return false; - case 6: return (1u << (static_cast(codepoint) - 0xA802u)) & 0x211u; - case 7: return codepoint <= U'\uA827' || codepoint >= U'\uA82C'; - case 10: return codepoint <= U'\uA8F1' || codepoint >= U'\uA8FF'; - case 11: return codepoint <= U'\uA92D' || codepoint >= U'\uA947'; - case 12: return codepoint <= U'\uA953' || codepoint >= U'\uA980'; - case 16: return (1ull << (static_cast(codepoint) - 0xAA43ull)) & 0x100000000000601ull; - case 17: return (1ull << (static_cast(codepoint) - 0xAA7Cull)) & 0x19D0000000000003ull; - case 18: return (1ull << (static_cast(codepoint) - 0xAABEull)) & 0x3E0000000000Bull; - default: return true; - } - } - case 3: return codepoint <= U'\uAAF6' || (codepoint >= U'\uABE3' && codepoint <= U'\uABEA') - || codepoint >= U'\uABEC'; - case 4: - { - if (codepoint < U'\uFB1E' || codepoint > U'\U00011A99') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\uFB1E', U'\U00011A99'); - switch ((static_cast(codepoint) - 0xFB1Eu) / 1008u) - { - case 0: return codepoint == U'\uFB1E' || (codepoint >= U'\uFE00' && codepoint <= U'\uFE0F') - || codepoint >= U'\uFE20'; - case 1: return codepoint <= U'\U000101FD' || codepoint >= U'\U000102E0'; - case 3: return (1ull << (static_cast(codepoint) - 0x10A01ull)) & 0x4380000000007837ull; - case 4: return codepoint <= U'\U00010AE6' || (codepoint >= U'\U00010D24' && codepoint <= U'\U00010D27') - || codepoint >= U'\U00010EAB'; - case 5: - { - if (codepoint < U'\U00010F46' || codepoint > U'\U0001123E') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00010F46', U'\U0001123E'); - switch ((static_cast(codepoint) - 0x10F46u) / 64u) - { - case 1: return false; - case 4: return codepoint <= U'\U00011046' || codepoint >= U'\U0001107F'; - case 7: return codepoint <= U'\U00011134' || codepoint >= U'\U00011145'; - case 8: return (1ull << (static_cast(codepoint) - 0x11146ull)) & 0x1C00200000000001ull; - case 10: return codepoint != U'\U000111C6'; - case 11: return codepoint <= U'\U00011237' || codepoint >= U'\U0001123E'; - default: return true; - } - } - case 6: - { - if (codepoint < U'\U000112DF' || codepoint > U'\U000116AD') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U000112DF', U'\U000116AD'); - switch ((static_cast(codepoint) - 0x112DFu) / 61u) - { - case 0: return codepoint <= U'\U000112EA' || codepoint >= U'\U00011300'; - case 1: return (1u << (static_cast(codepoint) - 0x1133Bu)) & 0x100733FBu; - case 2: return (1u << (static_cast(codepoint) - 0x11362u)) & 0x7C7F3u; - case 3: return false; - case 4: return false; - case 8: return false; - case 9: return false; - case 10: return false; - case 11: return codepoint <= U'\U000115B5' || codepoint >= U'\U000115B8'; - case 12: return codepoint <= U'\U000115C0' || codepoint >= U'\U000115DC'; - default: return true; - } - } - case 7: - { - if (codepoint < U'\U000116AE') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U000116AE', U'\U00011A99'); - switch ((static_cast(codepoint) - 0x116AEu) / 63u) - { - case 0: return true; - case 1: return true; - case 6: return true; - case 10: return (1u << (static_cast(codepoint) - 0x11930u)) & 0xD79BFu; - case 12: return codepoint <= U'\U000119D7' || codepoint >= U'\U000119DA'; - case 13: return codepoint <= U'\U000119E4' || codepoint >= U'\U00011A01'; - case 14: return (1ull << (static_cast(codepoint) - 0x11A33ull)) & 0x1FFC0100F7Full; - case 15: return true; - default: return false; - } - } - default: return true; - } - } - case 5: - { - if (codepoint < U'\U00011C2F' || codepoint > U'\U00011EF6') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00011C2F', U'\U00011EF6'); - switch ((static_cast(codepoint) - 0x11C2Fu) / 60u) - { - case 0: return codepoint != U'\U00011C37'; - case 1: return true; - case 2: return codepoint != U'\U00011CA8'; - case 4: return (1u << (static_cast(codepoint) - 0x11D31u)) & 0x5FDA3Fu; - case 5: return (1u << (static_cast(codepoint) - 0x11D8Au)) & 0x1EDFu; - case 6: return true; - case 11: return true; - default: return false; - } - } - case 6: - { - if (codepoint < U'\U00016AF0' || codepoint > U'\U00016FF1') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U00016AF0', U'\U00016FF1'); - switch ((static_cast(codepoint) - 0x16AF0u) / 62u) - { - case 0: return true; - case 1: return true; - case 18: return codepoint != U'\U00016F4C'; - case 19: return true; - case 20: return codepoint <= U'\U00016FE4' || codepoint >= U'\U00016FF0'; - default: return false; - } - } - case 7: return true; - case 8: - { - if (codepoint < U'\U0001D165' || codepoint > U'\U0001E94A') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001D165', U'\U0001E94A'); - switch ((static_cast(codepoint) - 0x1D165u) / 765u) - { - case 0: - { - if (codepoint > U'\U0001D244') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001D165', U'\U0001D244'); - switch ((static_cast(codepoint) - 0x1D165u) / 56u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x1D165ull)) & 0x7F3FC03F1Full; - case 2: return false; - default: return true; - } - } - case 2: return codepoint <= U'\U0001DA36' || codepoint >= U'\U0001DA3B'; - case 3: - { - if (codepoint < U'\U0001DA5C' || codepoint > U'\U0001DAAF') - return false; - - TOML_ASSUME_CODEPOINT_BETWEEN(U'\U0001DA5C', U'\U0001DAAF'); - switch ((static_cast(codepoint) - 0x1DA5Cu) / 42u) - { - case 0: return (1ull << (static_cast(codepoint) - 0x1DA5Cull)) & 0x1000201FFFFull; - case 1: return codepoint != U'\U0001DA86'; + case 0x00: return cp == U'\u074A'; + case 0x01: return (cp >= U'\u07A6' && cp <= U'\u07B0'); + case 0x02: return (cp >= U'\u07EB' && (1u << (static_cast(cp) - 0x7EBu)) & 0x401FFu); TOML_NO_DEFAULT_CASE; } } - case 4: return (1ull << (static_cast(codepoint) - 0x1E000ull)) & 0x7DBF9FFFF7Full; - case 5: return codepoint <= U'\U0001E136' || codepoint >= U'\U0001E2EC'; - case 7: return codepoint <= U'\U0001E8D6' || codepoint >= U'\U0001E944'; - default: return false; + case 0x07: // [7] 0801 - 08B7 + { + if (cp < U'\u0816' || cp > U'\u085B') + return false; + + switch ((static_cast(cp) - 0x816ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u082D' && (1u << (static_cast(cp) - 0x816u)) & 0xFBBFEFu); + case 0x01: return cp >= U'\u0859'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x08: // [8] 08B8 - 096E + { + if (cp < U'\u08D3' || cp > U'\u0963') + return false; + + switch ((static_cast(cp) - 0x8D3ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0903' && (1ull << (static_cast(cp) - 0x8D3ull)) & 0x1FFFFFFFF7FFFull); + case 0x01: return (cp >= U'\u093A' && (1u << (static_cast(cp) - 0x93Au)) & 0x1BFFFF7u); + case 0x02: return (1u << (static_cast(cp) - 0x953u)) & 0x1801Fu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x09: // [9] 096F - 0A25 + { + if (cp < U'\u0981' || cp > U'\u0A03') + return false; + + switch ((static_cast(cp) - 0x981ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x981ull)) & 0xE800000000000007ull; + case 0x01: return (cp <= U'\u09FE' && (1ull << (static_cast(cp) - 0x9C1ull)) & 0x2000000600401CCFull); + default: return true; + } + } + case 0x0A: // [10] 0A26 - 0ADC + { + if (cp < U'\u0A3C' || cp > U'\u0ACD') + return false; + + switch ((static_cast(cp) - 0xA3Cull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0A75' && (1ull << (static_cast(cp) - 0xA3Cull)) & 0x23000000023987Dull); + case 0x01: return (cp >= U'\u0A81' && cp <= U'\u0A83'); + case 0x02: return (1u << (static_cast(cp) - 0xABCu)) & 0x3BBFDu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0B: // [11] 0ADD - 0B93 + { + if (cp < U'\u0AE2' || cp > U'\u0B82') + return false; + + switch ((static_cast(cp) - 0xAE2ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0B03' && (1ull << (static_cast(cp) - 0xAE2ull)) & 0x3BF000003ull); + case 0x01: return (cp >= U'\u0B3C' && cp <= U'\u0B57' && (1u << (static_cast(cp) - 0xB3Cu)) & 0xE0399FDu); + case 0x02: return (1ull << (static_cast(cp) - 0xB62ull)) & 0x100000003ull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0C: // [12] 0B94 - 0C4A + { + if (cp < U'\u0BBE') + return false; + TOML_ASSUME(cp <= U'\u0C4A'); + + switch ((static_cast(cp) - 0xBBEull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0BD7' && (1u << (static_cast(cp) - 0xBBEu)) & 0x200F71Fu); + case 0x01: return (cp >= U'\u0C00' && cp <= U'\u0C04'); + case 0x02: return (1u << (static_cast(cp) - 0xC3Eu)) & 0x177Fu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0D: // [13] 0C4B - 0D01 + { + TOML_ASSUME(cp >= U'\u0C4B' && cp <= U'\u0D01'); + + switch ((static_cast(cp) - 0xC4Bull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0C83' && (1ull << (static_cast(cp) - 0xC4Bull)) & 0x1C0000001800C07ull); + case 0x01: return (cp >= U'\u0CBC' && (1u << (static_cast(cp) - 0xCBCu)) & 0x5DFDu); + case 0x02: return (1ull << (static_cast(cp) - 0xCCBull)) & 0x60000001800C07ull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x0E: // [14] 0D02 - 0DB8 + { + if (cp > U'\u0D83') + return false; + TOML_ASSUME(cp >= U'\u0D02'); + + switch ((static_cast(cp) - 0xD02ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0xD02ull)) & 0xF600000000000003ull; + case 0x01: return (1ull << (static_cast(cp) - 0xD42ull)) & 0x8000000300200F77ull; + default: return true; + } + } + case 0x0F: // [15] 0DB9 - 0E6F + { + if (cp < U'\u0DCA' || cp > U'\u0E4E') + return false; + + switch ((static_cast(cp) - 0xDCAull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0DF3' && (1ull << (static_cast(cp) - 0xDCAull)) & 0x300003FD7E1ull); + case 0x01: return (cp >= U'\u0E31' && (1u << (static_cast(cp) - 0xE31u)) & 0x1C003F9u); + default: return true; + } + } + case 0x10: // [16] 0E70 - 0F26 + { + if (cp < U'\u0EB1' || cp > U'\u0F19') + return false; + + switch ((static_cast(cp) - 0xEB1ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u0ECD' && (1u << (static_cast(cp) - 0xEB1u)) & 0x1F800FF9u); + case 0x01: return cp >= U'\u0F18'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x11: // [17] 0F27 - 0FDD + { + if (cp < U'\u0F35' || cp > U'\u0FC6') + return false; + + switch ((static_cast(cp) - 0xF35ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0xF35ull)) & 0xF000000000000615ull; + case 0x01: return (1ull << (static_cast(cp) - 0xF75ull)) & 0xFFFFFFF7FF06FFFFull; + case 0x02: return (1u << (static_cast(cp) - 0xFB5u)) & 0x200FFu; + TOML_NO_DEFAULT_CASE; + } + } + case 0x12: // [18] 0FDE - 1094 + { + if (cp < U'\u102B' || cp > U'\u108F') + return false; + + switch ((static_cast(cp) - 0x102Bull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x102Bull)) & 0xF3B87800000FFFFFull; + case 0x01: return (1ull << (static_cast(cp) - 0x106Bull)) & 0x17FF8003C7ull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x13: return (cp >= U'\u109A' && cp <= U'\u109D'); + case 0x16: return (cp >= U'\u135D' && cp <= U'\u135F'); + case 0x1C: // [28] 1704 - 17BA + { + if (cp < U'\u1712') + return false; + TOML_ASSUME(cp <= U'\u17BA'); + + switch ((static_cast(cp) - 0x1712ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u1734' && (1ull << (static_cast(cp) - 0x1712ull)) & 0x700000007ull); + case 0x01: return (cp <= U'\u1773' && (1ull << (static_cast(cp) - 0x1752ull)) & 0x300000003ull); + case 0x02: return cp >= U'\u17B4'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x1D: return (cp >= U'\u17BB' && cp <= U'\u17D3') || (cp >= U'\u180B' && cp <= U'\u180D') + || cp == U'\u17DD'; + case 0x1E: return (cp >= U'\u1885' && cp <= U'\u1886') || (cp >= U'\u1920' && cp <= U'\u1928') + || cp == U'\u18A9'; + case 0x1F: return (cp <= U'\u193B' && (1u << (static_cast(cp) - 0x1929u)) & 0x7FF87u); + case 0x20: // [32] 19E0 - 1A96 + { + if (cp < U'\u1A17' || cp > U'\u1A7F') + return false; + + switch ((static_cast(cp) - 0x1A17ull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x1A17ull)) & 0xC00000000000001Full; + case 0x01: return (1ull << (static_cast(cp) - 0x1A57ull)) & 0x13FFFFFFEFFull; + TOML_NO_DEFAULT_CASE; + } + } + case 0x21: // [33] 1A97 - 1B4D + { + if (cp < U'\u1AB0' || cp > U'\u1B44') + return false; + + switch ((static_cast(cp) - 0x1AB0ull) / 0x40ull) + { + case 0x00: return (cp <= U'\u1AC0' && (1u << (static_cast(cp) - 0x1AB0u)) & 0x1BFFFu); + case 0x01: return (cp >= U'\u1B00' && cp <= U'\u1B04'); + case 0x02: return cp >= U'\u1B34'; + TOML_NO_DEFAULT_CASE; + } + } + case 0x22: // [34] 1B4E - 1C04 + { + if (cp < U'\u1B6B' || cp > U'\u1BF3') + return false; + + switch ((static_cast(cp) - 0x1B6Bull) / 0x40ull) + { + case 0x00: return (1ull << (static_cast(cp) - 0x1B6Bull)) & 0xFFC0000000E001FFull; + case 0x01: return (1ull << (static_cast(cp) - 0x1BABull)) & 0xF800000000000007ull; + default: return true; + } + } + case 0x23: return (cp >= U'\u1C24' && cp <= U'\u1C37'); + case 0x24: return (cp >= U'\u1CD0' && cp <= U'\u1CF9' && (1ull << (static_cast(cp) - 0x1CD0ull)) & 0x39021FFFFF7ull); + case 0x25: return (cp >= U'\u1DC0' && cp <= U'\u1DFF' && (1ull << (static_cast(cp) - 0x1DC0ull)) & 0xFBFFFFFFFFFFFFFFull); + case 0x29: return (cp >= U'\u20D0' && cp <= U'\u20F0' && (1ull << (static_cast(cp) - 0x20D0ull)) & 0x1FFE21FFFull); + case 0x3A: return (cp >= U'\u2CEF' && cp <= U'\u2CF1'); + case 0x3B: return (cp >= U'\u2DE0' && cp <= U'\u2DE3') || cp == U'\u2D7F'; + case 0x3C: return cp <= U'\u2DFF'; + case 0x3F: return (cp >= U'\u302A' && cp <= U'\u302F') || (cp >= U'\u3099' && cp <= U'\u309A'); + TOML_NO_DEFAULT_CASE; } } - case 63: return true; - default: return false; + case 0x02: // [2] 72F8 - AAF3 + { + if (cp < U'\uA66F' || cp > U'\uAAEF') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0001800000007FE1ull, 0x0000000000000000ull, 0x0000000000000006ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x21F0000010880000ull, 0x0000000000000000ull, + 0x0000000000060000ull, 0xFFFE0000007FFFE0ull, 0x7F80000000010007ull, 0x0000001FFF000000ull, + 0x00000000001E0000ull, 0x004000000003FFF0ull, 0xFC00000000000000ull, 0x00000000601000FFull, + 0x0000000000007000ull, 0xF00000000005833Aull, 0x0000000000000001ull, + }; + return lookup_table_1[(static_cast(cp) - 0xA66Full) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xA66Full) % 0x40ull)); + } + case 0x03: return (cp >= U'\uAAF5' && cp <= U'\uAAF6') || (cp >= U'\uABE3' && cp <= U'\uABEA') + || (cp >= U'\uABEC' && cp <= U'\uABED'); + case 0x04: // [4] E2F0 - 11AEB + { + if (cp < U'\uFB1E' || cp > U'\U00011A99') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0000000000000001ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0003FFFC00000000ull, + 0x000000000003FFFCull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000080000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000004ull, + 0x0000000000000000ull, 0x000000001F000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0003C1B800000000ull, + 0x000000021C000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000180ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x00000000000003C0ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000006000ull, 0x0000000000000000ull, + 0x0007FF0000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000001C00000000ull, + 0x000001FFFC000000ull, 0x0000001E00000000ull, 0x000000001FFC0000ull, 0x0000001C00000000ull, + 0x00000180007FFE00ull, 0x0000001C00200000ull, 0x00037807FFE00000ull, 0x0000000000000000ull, + 0x0000000103FFC000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000003C00001FFEull, + 0x0200E67F60000000ull, 0x00000000007C7F30ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x000001FFFF800000ull, 0x0000000000000001ull, 0x0000003FFFFC0000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xC0000007FCFE0000ull, 0x0000000000000000ull, + 0x00000007FFFC0000ull, 0x0000000000000000ull, 0x0000000003FFE000ull, 0x8000000000000000ull, + 0x0000000000003FFFull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x000000001FFFC000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x00000035E6FC0000ull, 0x0000000000000000ull, 0xF3F8000000000000ull, 0x00001FF800000047ull, + 0x3FF80201EFE00000ull, 0x0FFFF00000000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0xFB1Eull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0xFB1Eull) % 0x40ull)); + } + case 0x05: // [5] 11AEC - 152E7 + { + if (cp < U'\U00011C2F' || cp > U'\U00011EF6') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x000000000001FEFFull, 0xFDFFFFF800000000ull, 0x00000000000000FFull, 0x0000000000000000ull, + 0x00000000017F68FCull, 0x000001F6F8000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x00000000000000F0ull, + }; + return lookup_table_1[(static_cast(cp) - 0x11C2Full) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x11C2Full) % 0x40ull)); + } + case 0x06: // [6] 152E8 - 18AE3 + { + if (cp < U'\U00016AF0' || cp > U'\U00016FF1') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x000000000000001Full, 0x000000000000007Full, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0xFFFFFFFE80000000ull, 0x0000000780FFFFFFull, 0x0010000000000000ull, + 0x0000000000000003ull, + }; + return lookup_table_1[(static_cast(cp) - 0x16AF0ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x16AF0ull) % 0x40ull)); + } + case 0x07: return (cp >= U'\U0001BC9D' && cp <= U'\U0001BC9E'); + case 0x08: // [8] 1C2E0 - 1FADB + { + if (cp < U'\U0001D165' || cp > U'\U0001E94A') + return false; + + constexpr uint_least64_t lookup_table_1[] = + { + 0x0000007F3FC03F1Full, 0x00000000000001E0ull, 0x0000000000000000ull, 0x00000000E0000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xFFFFFFFFF8000000ull, 0xFFFFFFFFFFC3FFFFull, + 0xF7C00000800100FFull, 0x00000000000007FFull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0xDFCFFFFBF8000000ull, 0x000000000000003Eull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x000000000003F800ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000780ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, + 0x0000000000000000ull, 0x0003F80000000000ull, 0x0000000000000000ull, 0x0000003F80000000ull, + }; + return lookup_table_1[(static_cast(cp) - 0x1D165ull) / 0x40ull] + & (0x1ull << ((static_cast(cp) - 0x1D165ull) % 0x40ull)); + } + case 0x3F: return cp >= U'\U000E0100'; + TOML_NO_DEFAULT_CASE; } } -} -#undef TOML_ASSUME_CODEPOINT_BETWEEN - -#endif // TOML_LANG_UNRELEASED + #endif // TOML_LANG_UNRELEASED +} // toml::impl #pragma endregion //-------------------------------------- ↑ toml_utf8_generated.h ----------------------------------------------------- @@ -4322,16 +4352,6 @@ namespace toml::impl return (codepoint >= U'0' && codepoint <= U'9'); } - [[nodiscard]] - TOML_GNU_ATTR(const) - constexpr bool is_hexadecimal_digit(char32_t codepoint) noexcept - { - return (codepoint >= U'a' && codepoint <= U'f') - || (codepoint >= U'A' && codepoint <= U'F') - || is_decimal_digit(codepoint) - ; - } - [[nodiscard]] TOML_GNU_ATTR(const) TOML_ALWAYS_INLINE @@ -4386,6 +4406,14 @@ namespace toml::impl ; } + [[nodiscard]] + TOML_GNU_ATTR(const) + TOML_ALWAYS_INLINE + constexpr bool is_control_character(char32_t codepoint) noexcept + { + return codepoint <= U'\u001F' || codepoint == U'\u007F'; + } + [[nodiscard]] TOML_GNU_ATTR(const) TOML_ALWAYS_INLINE @@ -5454,6 +5482,14 @@ namespace toml::impl { return value; } + + [[nodiscard]] + TOML_GNU_ATTR(pure) + TOML_ALWAYS_INLINE + constexpr const char32_t& operator* () const noexcept + { + return value; + } }; static_assert(std::is_trivial_v); static_assert(std::is_standard_layout_v); @@ -5691,13 +5727,13 @@ namespace toml::impl else { // first character read from stream - if (!history.count && !head) TOML_UNLIKELY + if TOML_UNLIKELY(!history.count && !head) head = reader.read_next(); // subsequent characters and not eof else if (head) { - if (history.count < history_buffer_size) TOML_UNLIKELY + if TOML_UNLIKELY(history.count < history_buffer_size) history.buffer[history.count++] = *head; else history.buffer[(history.first++ + history_buffer_size) % history_buffer_size] = *head; @@ -6545,11 +6581,11 @@ namespace toml::impl s += TOML_STRING_PREFIX('"'); for (auto c : str) { - if (c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) TOML_UNLIKELY + if TOML_UNLIKELY(c >= TOML_STRING_PREFIX('\x00') && c <= TOML_STRING_PREFIX('\x1F')) s.append(low_character_escape_table[c]); - else if (c == TOML_STRING_PREFIX('\x7F')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('\x7F')) s.append(TOML_STRING_PREFIX("\\u007F"sv)); - else if (c == TOML_STRING_PREFIX('"')) TOML_UNLIKELY + else if TOML_UNLIKELY(c == TOML_STRING_PREFIX('"')) s.append(TOML_STRING_PREFIX("\\\""sv)); else s += c; @@ -6742,9 +6778,9 @@ namespace TOML_INTERNAL_NAMESPACE else if constexpr (std::is_same_v) { string_view cp_view; - if (arg.value <= U'\x1F') TOML_UNLIKELY + if TOML_UNLIKELY(arg.value <= U'\x1F') cp_view = low_character_escape_table[arg.value]; - else if (arg.value == U'\x7F') TOML_UNLIKELY + else if TOML_UNLIKELY(arg.value == U'\x7F') cp_view = TOML_STRING_PREFIX("\\u007F"sv); else cp_view = arg.template as_view(); @@ -7282,7 +7318,7 @@ namespace toml::impl if (!skipped_escaped_codepoint) advance_and_return_if_error_or_eof({}); } - else TOML_LIKELY + else { // handle closing delimiters if (*cp == U'"') @@ -8306,11 +8342,24 @@ namespace toml::impl assert_or_assume(!is_value_terminator(*cp)); push_parse_scope("value"sv); + // check if it begins with some control character + // (note that this will also fail for whitespace but we're assuming we've + // called consume_leading_whitespace() before calling parse_value()) + if TOML_UNLIKELY(is_control_character(*cp)) + set_error_and_return_default("unexpected control character"sv); + + // underscores at the beginning + else if (*cp == U'_') + set_error_and_return_default("values may not begin with underscores"sv); + const auto begin_pos = cp->position; std::unique_ptr val; do { + assert_or_assume(!is_control_character(*cp)); + assert_or_assume(*cp != U'_'); + // detect the value type and parse accordingly, // starting with value types that can be detected // unambiguously from just one character. @@ -8349,10 +8398,6 @@ namespace toml::impl else if (is_match(*cp, U'i', U'n', U'I', U'N')) val = std::make_unique>(parse_inf_or_nan()); - // underscores at the beginning - else if (*cp == U'_') - set_error_and_return_default("values may not begin with underscores"sv); - return_if_error({}); if (val) break; @@ -8405,68 +8450,76 @@ namespace toml::impl bool eof_while_scanning = false; const auto scan = [&]() TOML_MAY_THROW { - while (advance_count < utf8_buffered_reader::max_history_length) + if (is_eof()) + return; + assert_or_assume(!is_value_terminator(*cp)); + + do { - if (!cp || is_value_terminator(*cp)) + if (const auto c = **cp; c != U'_') { - eof_while_scanning = !cp; - break; - } + chars[char_count++] = c; - if (*cp != U'_') - { - chars[char_count++] = *cp; - switch (*cp) + if (is_decimal_digit(c)) + add_trait(has_digits); + else if (is_ascii_letter(c)) { - case U'B': [[fallthrough]]; - case U'b': - if (char_count == 2_sz && has_any(begins_zero)) - add_trait(has_b); - break; + assert_or_assume((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z')); + switch (static_cast(c | 32u)) + { + case U'b': + if (char_count == 2_sz && has_any(begins_zero)) + add_trait(has_b); + break; - case U'E': [[fallthrough]]; - case U'e': - if (char_count > 1_sz - && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) - && (has_none(has_plus | has_minus) || has_any(begins_sign))) - add_trait(has_e); - break; + case U'e': + if (char_count > 1_sz + && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) + && (has_none(has_plus | has_minus) || has_any(begins_sign))) + add_trait(has_e); + break; - case U'O': [[fallthrough]]; - case U'o': - if (char_count == 2_sz && has_any(begins_zero)) - add_trait(has_o); - break; + case U'o': + if (char_count == 2_sz && has_any(begins_zero)) + add_trait(has_o); + break; - case U'P': [[fallthrough]]; - case U'p': - if (has_any(has_x)) - add_trait(has_p); - break; + case U'p': + if (has_any(has_x)) + add_trait(has_p); + break; - case U'X': [[fallthrough]]; - case U'x': - if ((char_count == 2_sz && has_any(begins_zero)) - || (char_count == 3_sz && has_any(begins_sign) && chars[1] == U'0')) - add_trait(has_x); - break; + case U'x': + if ((char_count == 2_sz && has_any(begins_zero)) + || (char_count == 3_sz && has_any(begins_sign) && chars[1] == U'0')) + add_trait(has_x); + break; - case U'T': add_trait(has_t); break; - case U'Z': add_trait(has_z); break; - case U'+': add_trait(has_plus); break; - case U'-': add_trait(has_minus); break; - case U'.': add_trait(has_dot); break; - case U':': add_trait(has_colon); break; - - default: - if (is_decimal_digit(*cp)) - add_trait(has_digits); + case U't': add_trait(has_t); break; + case U'z': add_trait(has_z); break; + } + } + else if (c <= U':') + { + assert_or_assume(c < U'0' || c > U'9'); + switch (c) + { + case U'+': add_trait(has_plus); break; + case U'-': add_trait(has_minus); break; + case U'.': add_trait(has_dot); break; + case U':': add_trait(has_colon); break; + } } } advance_and_return_if_error(); advance_count++; + eof_while_scanning = is_eof(); } + while (advance_count < utf8_buffered_reader::max_history_length + && !is_eof() + && !is_value_terminator(*cp) + ); }; scan(); return_if_error({}); @@ -8476,7 +8529,7 @@ namespace toml::impl && traits == (bdigit_msk | has_minus) && chars[4] == U'-' && chars[7] == U'-' - && cp + && !is_eof() && *cp == U' ') { const auto pre_advance_count = advance_count; @@ -8495,7 +8548,7 @@ namespace toml::impl advance_and_return_if_error({}); advance_count++; - if (!cp || !is_decimal_digit(*cp)) + if (is_eof() || !is_decimal_digit(*cp)) backpedal(); else { diff --git a/vs/test_char.vcxproj b/vs/test_char.vcxproj index e5ced5b..cc1336e 100644 --- a/vs/test_char.vcxproj +++ b/vs/test_char.vcxproj @@ -51,7 +51,7 @@ ..\tests;%(AdditionalIncludeDirectories) TOML_CHAR_8_STRINGS=0;%(PreprocessorDefinitions) - TOML_UNRELEASED_FEATURES=1;%(PreprocessorDefinitions) + TOML_UNRELEASED_FEATURES=1;%(PreprocessorDefinitions) Use tests.h USING_PCH=1;%(PreprocessorDefinitions) @@ -84,12 +84,15 @@ Create + + + \ No newline at end of file diff --git a/vs/test_char8.vcxproj b/vs/test_char8.vcxproj index f25b7a0..961c61b 100644 --- a/vs/test_char8.vcxproj +++ b/vs/test_char8.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_char8_noexcept.vcxproj b/vs/test_char8_noexcept.vcxproj index c465631..ed8def0 100644 --- a/vs/test_char8_noexcept.vcxproj +++ b/vs/test_char8_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_char8_strict.vcxproj b/vs/test_char8_strict.vcxproj index 696a694..be4945e 100644 --- a/vs/test_char8_strict.vcxproj +++ b/vs/test_char8_strict.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_char8_strict_noexcept.vcxproj b/vs/test_char8_strict_noexcept.vcxproj index 582c17b..2f41f25 100644 --- a/vs/test_char8_strict_noexcept.vcxproj +++ b/vs/test_char8_strict_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_char_noexcept.vcxproj b/vs/test_char_noexcept.vcxproj index e7641e6..0cee6f1 100644 --- a/vs/test_char_noexcept.vcxproj +++ b/vs/test_char_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_char_strict.vcxproj b/vs/test_char_strict.vcxproj index b838e68..cf735b9 100644 --- a/vs/test_char_strict.vcxproj +++ b/vs/test_char_strict.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_char_strict_noexcept.vcxproj b/vs/test_char_strict_noexcept.vcxproj index d18b7fd..7bd810e 100644 --- a/vs/test_char_strict_noexcept.vcxproj +++ b/vs/test_char_strict_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_x86_char.vcxproj b/vs/test_x86_char.vcxproj index c8a7cd9..ef9157b 100644 --- a/vs/test_x86_char.vcxproj +++ b/vs/test_x86_char.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_x86_char8.vcxproj b/vs/test_x86_char8.vcxproj index b27348a..9f441f1 100644 --- a/vs/test_x86_char8.vcxproj +++ b/vs/test_x86_char8.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_x86_char8_noexcept.vcxproj b/vs/test_x86_char8_noexcept.vcxproj index ba6f311..f93aa25 100644 --- a/vs/test_x86_char8_noexcept.vcxproj +++ b/vs/test_x86_char8_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_x86_char8_strict.vcxproj b/vs/test_x86_char8_strict.vcxproj index c0f5f37..f8a3565 100644 --- a/vs/test_x86_char8_strict.vcxproj +++ b/vs/test_x86_char8_strict.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_x86_char8_strict_noexcept.vcxproj b/vs/test_x86_char8_strict_noexcept.vcxproj index fdc51bf..84958c7 100644 --- a/vs/test_x86_char8_strict_noexcept.vcxproj +++ b/vs/test_x86_char8_strict_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_x86_char_noexcept.vcxproj b/vs/test_x86_char_noexcept.vcxproj index fd9d489..fb0e1c7 100644 --- a/vs/test_x86_char_noexcept.vcxproj +++ b/vs/test_x86_char_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + + diff --git a/vs/test_x86_char_strict.vcxproj b/vs/test_x86_char_strict.vcxproj index f7322aa..baa7a9e 100644 --- a/vs/test_x86_char_strict.vcxproj +++ b/vs/test_x86_char_strict.vcxproj @@ -84,6 +84,8 @@ Create + + diff --git a/vs/test_x86_char_strict_noexcept.vcxproj b/vs/test_x86_char_strict_noexcept.vcxproj index a68a2ee..764b4e4 100644 --- a/vs/test_x86_char_strict_noexcept.vcxproj +++ b/vs/test_x86_char_strict_noexcept.vcxproj @@ -86,6 +86,8 @@ Create + +