tomlplusplus/include/toml++/impl/unicode.inl
Mark Gillard b41e12f736 fixed incorrect unicode scalar sequence transformations (#125)
also:
- fixed extended-precision fractional times causing parse error instead of truncating per the spec (closes #127)
- fixed some non-spec vertical whitespace being accepted as line breaks (closes #128)
- added `format_flags::allow_unicode_strings`
2022-01-04 23:58:22 +02:00

60 lines
1.5 KiB
C++

//# This file is a part of toml++ and is subject to the the terms of the MIT license.
//# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
// SPDX-License-Identifier: MIT
#pragma once
//# {{
#include "preprocessor.h"
#if !TOML_IMPLEMENTATION
#error This is an implementation-only header.
#endif
//# }}
#include "unicode.h"
#include "simd.h"
#include "header_start.h"
TOML_IMPL_NAMESPACE_START
{
TOML_EXTERNAL_LINKAGE
bool is_ascii(const char* str, size_t len) noexcept
{
const char* const end = str + len;
#if TOML_HAS_SSE2 && (128 % CHAR_BIT) == 0
{
constexpr size_t chars_per_vector = 128u / CHAR_BIT;
if (const size_t simdable = len - (len % chars_per_vector))
{
__m128i mask = _mm_setzero_si128();
for (const char* const e = str + simdable; str < e; str += chars_per_vector)
{
const __m128i current_bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(str));
mask = _mm_or_si128(mask, current_bytes);
}
const __m128i has_error = _mm_cmpgt_epi8(_mm_setzero_si128(), mask);
#if TOML_HAS_SSE4_1
if (!_mm_testz_si128(has_error, has_error))
return false;
#else
if (_mm_movemask_epi8(_mm_cmpeq_epi8(has_error, _mm_setzero_si128())) != 0xFFFF)
return false;
#endif
}
}
#endif
for (; str < end; str++)
if (static_cast<unsigned char>(*str) > 127u)
return false;
return true;
}
}
TOML_IMPL_NAMESPACE_END;
#include "header_end.h"