tomlplusplus/tests/unicode.cpp
2021-01-02 17:48:47 +02:00

216 lines
5.8 KiB
C++

// This file is a part of toml++ and is subject to the the terms of the MIT license.
// Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
// See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
// SPDX-License-Identifier: MIT
#include "tests.h"
using namespace toml::impl;
using func_type = bool(char32_t)noexcept;
inline constexpr func_type* funcs[] =
{
// these must be mutually-exclusive
impl::is_ascii_letter,
impl::is_ascii_whitespace,
impl::is_ascii_line_break<true>,
impl::is_decimal_digit,
impl::is_string_delimiter,
impl::is_non_ascii_whitespace,
impl::is_non_ascii_line_break,
impl::is_unicode_surrogate,
#if TOML_LANG_UNRELEASED
impl::is_non_ascii_letter,
impl::is_non_ascii_number,
impl::is_combining_mark,
#endif
};
template <typename T>
inline bool in_only(func_type* fptr, T cp) noexcept
{
if (!fptr(static_cast<char32_t>(cp)))
return false;
for (auto fn : funcs)
{
if (fn == fptr)
continue;
if (fn(static_cast<char32_t>(cp)))
return false;
}
return true;
}
inline constexpr uint32_t unimax = 0x10FFFFu;
struct codepoint_range
{
char32_t first;
char32_t last;
template <typename T, typename U>
TOML_NODISCARD_CTOR
constexpr codepoint_range(T first_, U last_) noexcept
: first{ static_cast<char32_t>(first_) },
last{ static_cast<char32_t>(last_) }
{
if (last < first)
std::swap(first, last);
}
template <typename T>
TOML_NODISCARD_CTOR
constexpr codepoint_range(T first_) noexcept
: first{ static_cast<char32_t>(first_) },
last{ first }
{}
};
inline bool in(func_type* fptr, codepoint_range range) noexcept
{
for (auto cp = range.first; cp <= range.last; cp++)
if (!fptr(cp))
return false;
return true;
}
inline bool in_only(func_type* fptr, codepoint_range range) noexcept
{
for (auto cp = range.first; cp <= range.last; cp++)
if (!in_only(fptr, cp))
return false;
return true;
}
inline bool not_in(func_type* fptr, codepoint_range range) noexcept
{
for (auto cp = range.first; cp <= range.last; cp++)
if (fptr(cp))
return false;
return true;
}
TEST_CASE("unicode - is_ascii_letter")
{
static constexpr auto fn = is_ascii_letter;
REQUIRE(not_in(fn, { U'\0', U'@' }));
REQUIRE(in_only(fn, { U'A', U'Z' }));
REQUIRE(not_in(fn, { U'[', U'`' }));
REQUIRE(in_only(fn, { U'a', U'z' }));
REQUIRE(not_in(fn, { U'{', unimax }));
}
TEST_CASE("unicode - is_ascii_whitespace")
{
static constexpr auto fn = is_ascii_whitespace;
REQUIRE(not_in(fn, { U'\0', U'\u0008' }));
REQUIRE(in_only(fn, U'\t' ));
REQUIRE(not_in(fn, { U'\n', U'\u001F' }));
REQUIRE(in_only(fn, U' ' ));
REQUIRE(not_in(fn, { U'!', unimax }));
}
TEST_CASE("unicode - is_ascii_line_break")
{
static constexpr auto fn = is_ascii_line_break<true>;
REQUIRE(not_in(fn, { U'\0', U'\t' }));
REQUIRE(in_only(fn, { U'\n', U'\r' }));
REQUIRE(not_in(fn, { U'\u000E', unimax }));
}
TEST_CASE("unicode - is_binary_digit")
{
static constexpr auto fn = is_binary_digit;
REQUIRE(not_in(fn, { U'\0', U'/' }));
REQUIRE(in(fn, { U'0', U'1' }));
REQUIRE(not_in(fn, { U'2', unimax }));
}
TEST_CASE("unicode - is_octal_digit")
{
static constexpr auto fn = is_octal_digit;
REQUIRE(not_in(fn, { U'\0', U'/' }));
REQUIRE(in(fn, { U'0', U'7' }));
REQUIRE(not_in(fn, { U'8', unimax }));
}
TEST_CASE("unicode - is_decimal_digit")
{
static constexpr auto fn = is_decimal_digit;
REQUIRE(not_in(fn, { U'\0', U'/' }));
REQUIRE(in(fn, { U'0', U'9' }));
REQUIRE(not_in(fn, { U':', unimax }));
}
TEST_CASE("unicode - hex_to_dec")
{
REQUIRE(hex_to_dec(U'0') == 0x0u);
REQUIRE(hex_to_dec(U'1') == 0x1u);
REQUIRE(hex_to_dec(U'2') == 0x2u);
REQUIRE(hex_to_dec(U'3') == 0x3u);
REQUIRE(hex_to_dec(U'4') == 0x4u);
REQUIRE(hex_to_dec(U'5') == 0x5u);
REQUIRE(hex_to_dec(U'6') == 0x6u);
REQUIRE(hex_to_dec(U'7') == 0x7u);
REQUIRE(hex_to_dec(U'8') == 0x8u);
REQUIRE(hex_to_dec(U'9') == 0x9u);
REQUIRE(hex_to_dec(U'A') == 0xAu);
REQUIRE(hex_to_dec(U'B') == 0xBu);
REQUIRE(hex_to_dec(U'C') == 0xCu);
REQUIRE(hex_to_dec(U'D') == 0xDu);
REQUIRE(hex_to_dec(U'E') == 0xEu);
REQUIRE(hex_to_dec(U'F') == 0xFu);
REQUIRE(hex_to_dec(U'a') == 0xau);
REQUIRE(hex_to_dec(U'b') == 0xbu);
REQUIRE(hex_to_dec(U'c') == 0xcu);
REQUIRE(hex_to_dec(U'd') == 0xdu);
REQUIRE(hex_to_dec(U'e') == 0xeu);
REQUIRE(hex_to_dec(U'f') == 0xfu);
}
TEST_CASE("unicode - is_string_delimiter")
{
static constexpr auto fn = is_string_delimiter;
REQUIRE(not_in(fn, { U'\0', U'!' }));
REQUIRE(in_only(fn, U'"' ));
REQUIRE(not_in(fn, { U'#', U'&' }));
REQUIRE(in_only(fn, U'\'' ));
REQUIRE(not_in(fn, { U'(', unimax }));
}
TEST_CASE("unicode - is_non_ascii_whitespace")
{
static constexpr auto fn = is_non_ascii_whitespace;
REQUIRE(not_in(fn, { U'\0', U'\u009F' }));
REQUIRE(in_only(fn, U'\u00A0' ));
REQUIRE(not_in(fn, { U'\u00A1', U'\u167F' }));
REQUIRE(in_only(fn, U'\u1680' ));
REQUIRE(not_in(fn, { U'\u1681', U'\u1FFF' }));
REQUIRE(in_only(fn, { U'\u2000', U'\u200A' }));
REQUIRE(not_in(fn, { U'\u200B', U'\u202E' }));
REQUIRE(in_only(fn, U'\u202F' ));
REQUIRE(not_in(fn, { U'\u2030', U'\u205E' }));
REQUIRE(in_only(fn, U'\u205F' ));
REQUIRE(not_in(fn, { U'\u2060', U'\u2FFF' }));
REQUIRE(in_only(fn, U'\u3000' ));
REQUIRE(not_in(fn, { U'\u3001', unimax }));
}
TEST_CASE("unicode - is_non_ascii_line_break")
{
static constexpr auto fn = is_non_ascii_line_break;
REQUIRE(not_in(fn, { U'\0', U'\u0084' }));
REQUIRE(in_only(fn, U'\u0085' ));
REQUIRE(not_in(fn, { U'\u0086', U'\u2027' }));
REQUIRE(in_only(fn, { U'\u2028', U'\u2029' }));
REQUIRE(not_in(fn, { U'\u202A', unimax }));
}
TEST_CASE("unicode - is_unicode_surrogate")
{
static constexpr auto fn = is_unicode_surrogate;
REQUIRE(not_in(fn, { U'\0', 0xD7FFu }));
REQUIRE(in_only(fn, { 0xD800u, 0xDFFF }));
REQUIRE(not_in(fn, { 0xE000, unimax }));
}