mirror of
https://github.com/marzer/tomlplusplus.git
synced 2024-11-02 02:26:28 +00:00
fd07301bae
also: - fixed a bunch of doxygen parsing issues - added `#define` leak detection to the single-header script - renamed `TOML_ALL_INLINE` to `TOML_HEADER_ONLY` (the old one still works too) - simplified abi namespace definitions
387 lines
9.8 KiB
C++
387 lines
9.8 KiB
C++
//# This file is a part of toml++ and is subject to the the terms of the MIT license.
|
|
//# Copyright (c) 2019-2020 Mark Gillard <mark.gillard@outlook.com.au>
|
|
//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
//# {{
|
|
#include "toml_preprocessor.h"
|
|
#if !TOML_PARSER
|
|
#error This header cannot not be included when TOML_PARSER is disabled.
|
|
#endif
|
|
//# }}
|
|
#include "toml_utf8.h"
|
|
#include "toml_parse_error.h"
|
|
|
|
TOML_PUSH_WARNINGS
|
|
TOML_DISABLE_PADDING_WARNINGS
|
|
TOML_DISABLE_MISC_WARNINGS
|
|
|
|
TOML_IMPL_NAMESPACE_START
|
|
{
|
|
template <typename T>
|
|
class utf8_byte_stream;
|
|
|
|
inline constexpr auto utf8_byte_order_mark = "\xEF\xBB\xBF"sv;
|
|
|
|
template <typename Char>
|
|
class TOML_API utf8_byte_stream<std::basic_string_view<Char>> final
|
|
{
|
|
static_assert(sizeof(Char) == 1_sz);
|
|
|
|
private:
|
|
std::basic_string_view<Char> source;
|
|
size_t position = {};
|
|
|
|
public:
|
|
explicit constexpr utf8_byte_stream(std::basic_string_view<Char> sv) noexcept
|
|
: source{ sv }
|
|
{
|
|
// trim trailing nulls
|
|
size_t actual_len = source.length();
|
|
for (size_t i = actual_len; i --> 0_sz;)
|
|
{
|
|
if (source[i] != Char{}) // not '\0'
|
|
{
|
|
actual_len = i + 1_sz;
|
|
break;
|
|
}
|
|
}
|
|
if (source.length() != actual_len) // not '\0'
|
|
source = source.substr(0_sz, actual_len);
|
|
|
|
// skip bom
|
|
if (source.length() >= 3_sz && memcmp(utf8_byte_order_mark.data(), source.data(), 3_sz) == 0)
|
|
position += 3_sz;
|
|
}
|
|
|
|
[[nodiscard]]
|
|
TOML_ALWAYS_INLINE
|
|
constexpr bool eof() const noexcept
|
|
{
|
|
return position >= source.length();
|
|
}
|
|
|
|
[[nodiscard]]
|
|
TOML_ALWAYS_INLINE
|
|
constexpr bool peek_eof() const noexcept
|
|
{
|
|
return eof();
|
|
}
|
|
|
|
[[nodiscard]]
|
|
TOML_ALWAYS_INLINE
|
|
constexpr bool error() const noexcept
|
|
{
|
|
return false;
|
|
}
|
|
|
|
[[nodiscard]]
|
|
constexpr unsigned int operator() () noexcept
|
|
{
|
|
if (position >= source.length())
|
|
return 0xFFFFFFFFu;
|
|
return static_cast<unsigned int>(static_cast<uint8_t>(source[position++]));
|
|
}
|
|
};
|
|
|
|
template <typename Char>
|
|
class TOML_API utf8_byte_stream<std::basic_istream<Char>> final
|
|
{
|
|
static_assert(sizeof(Char) == 1_sz);
|
|
|
|
private:
|
|
std::basic_istream<Char>* source;
|
|
|
|
public:
|
|
explicit utf8_byte_stream(std::basic_istream<Char>& stream)
|
|
: source{ &stream }
|
|
{
|
|
if (!source->good()) // eof, fail, bad
|
|
return;
|
|
|
|
const auto initial_pos = source->tellg();
|
|
Char bom[3];
|
|
source->read(bom, 3);
|
|
if (source->bad() || (source->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3_sz) == 0))
|
|
return;
|
|
|
|
source->clear();
|
|
source->seekg(initial_pos, std::ios::beg);
|
|
}
|
|
|
|
[[nodiscard]]
|
|
TOML_ALWAYS_INLINE
|
|
bool eof() const noexcept
|
|
{
|
|
return source->eof();
|
|
}
|
|
|
|
[[nodiscard]]
|
|
TOML_ALWAYS_INLINE
|
|
bool peek_eof() const
|
|
{
|
|
using stream_traits = typename std::remove_pointer_t<decltype(source)>::traits_type;
|
|
return eof() || source->peek() == stream_traits::eof();
|
|
}
|
|
|
|
[[nodiscard]]
|
|
TOML_ALWAYS_INLINE
|
|
bool error() const noexcept
|
|
{
|
|
return !(*source);
|
|
}
|
|
|
|
[[nodiscard]]
|
|
unsigned int operator() ()
|
|
{
|
|
auto val = source->get();
|
|
if (val == std::basic_istream<Char>::traits_type::eof())
|
|
return 0xFFFFFFFFu;
|
|
return static_cast<unsigned int>(val);
|
|
}
|
|
};
|
|
|
|
TOML_ABI_NAMESPACE_BOOL(TOML_LARGE_FILES, lf, sf)
|
|
|
|
struct utf8_codepoint final
|
|
{
|
|
char32_t value;
|
|
char bytes[4];
|
|
source_position position;
|
|
|
|
[[nodiscard]]
|
|
std::string_view as_view() const noexcept
|
|
{
|
|
return bytes[3]
|
|
? std::string_view{ bytes, 4_sz }
|
|
: std::string_view{ bytes };
|
|
}
|
|
|
|
[[nodiscard]] TOML_ATTR(pure) constexpr operator char32_t& () noexcept { return value; }
|
|
[[nodiscard]] TOML_ATTR(pure) constexpr operator const char32_t& () const noexcept { return value; }
|
|
[[nodiscard]] TOML_ATTR(pure) constexpr const char32_t& operator* () const noexcept { return value; }
|
|
};
|
|
static_assert(std::is_trivial_v<utf8_codepoint>);
|
|
static_assert(std::is_standard_layout_v<utf8_codepoint>);
|
|
|
|
TOML_ABI_NAMESPACE_END // TOML_LARGE_FILES
|
|
|
|
TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex)
|
|
|
|
#if TOML_EXCEPTIONS
|
|
#define TOML_ERROR_CHECK (void)0
|
|
#define TOML_ERROR throw parse_error
|
|
#else
|
|
#define TOML_ERROR_CHECK if (err) return nullptr
|
|
#define TOML_ERROR err.emplace
|
|
#endif
|
|
|
|
struct TOML_INTERFACE utf8_reader_interface
|
|
{
|
|
[[nodiscard]]
|
|
virtual const source_path_ptr& source_path() const noexcept = 0;
|
|
|
|
[[nodiscard]]
|
|
virtual const utf8_codepoint* read_next() = 0;
|
|
|
|
[[nodiscard]]
|
|
virtual bool peek_eof() const = 0;
|
|
|
|
#if !TOML_EXCEPTIONS
|
|
|
|
[[nodiscard]]
|
|
virtual optional<parse_error>&& error() noexcept = 0;
|
|
|
|
#endif
|
|
|
|
virtual ~utf8_reader_interface() noexcept = default;
|
|
};
|
|
|
|
template <typename T>
|
|
class TOML_EMPTY_BASES TOML_API utf8_reader final
|
|
: public utf8_reader_interface
|
|
{
|
|
private:
|
|
utf8_byte_stream<T> stream;
|
|
utf8_decoder decoder;
|
|
utf8_codepoint codepoints[2];
|
|
size_t cp_idx = 1;
|
|
uint8_t current_byte_count{};
|
|
source_path_ptr source_path_;
|
|
#if !TOML_EXCEPTIONS
|
|
optional<parse_error> err;
|
|
#endif
|
|
|
|
public:
|
|
|
|
template <typename U, typename String = std::string_view>
|
|
explicit utf8_reader(U && source, String&& source_path = {})
|
|
noexcept(std::is_nothrow_constructible_v<utf8_byte_stream<T>, U&&>)
|
|
: stream{ std::forward<U>(source) }
|
|
{
|
|
std::memset(codepoints, 0, sizeof(codepoints));
|
|
codepoints[0].position = { 1, 1 };
|
|
codepoints[1].position = { 1, 1 };
|
|
|
|
if (!source_path.empty())
|
|
source_path_ = std::make_shared<const std::string>(std::forward<String>(source_path));
|
|
}
|
|
|
|
[[nodiscard]]
|
|
const source_path_ptr& source_path() const noexcept override
|
|
{
|
|
return source_path_;
|
|
}
|
|
|
|
[[nodiscard]]
|
|
const utf8_codepoint* read_next() override
|
|
{
|
|
TOML_ERROR_CHECK;
|
|
|
|
auto& prev = codepoints[(cp_idx - 1_sz) % 2_sz];
|
|
|
|
if (stream.eof())
|
|
return nullptr;
|
|
else if (stream.error())
|
|
TOML_ERROR("An error occurred while reading from the underlying stream", prev.position, source_path_ );
|
|
else if (decoder.error())
|
|
TOML_ERROR( "Encountered invalid utf-8 sequence", prev.position, source_path_ );
|
|
|
|
TOML_ERROR_CHECK;
|
|
|
|
while (true)
|
|
{
|
|
uint8_t next_byte;
|
|
{
|
|
unsigned int next_byte_raw{ 0xFFFFFFFFu };
|
|
if constexpr (noexcept(stream()) || !TOML_EXCEPTIONS)
|
|
{
|
|
next_byte_raw = stream();
|
|
}
|
|
#if TOML_EXCEPTIONS
|
|
else
|
|
{
|
|
try
|
|
{
|
|
next_byte_raw = stream();
|
|
}
|
|
catch (const std::exception& exc)
|
|
{
|
|
throw parse_error{ exc.what(), prev.position, source_path_ };
|
|
}
|
|
catch (...)
|
|
{
|
|
throw parse_error{ "An unspecified error occurred", prev.position, source_path_ };
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (next_byte_raw >= 256u)
|
|
{
|
|
if (stream.eof())
|
|
{
|
|
if (decoder.needs_more_input())
|
|
TOML_ERROR("Encountered EOF during incomplete utf-8 code point sequence",
|
|
prev.position, source_path_);
|
|
return nullptr;
|
|
}
|
|
else
|
|
TOML_ERROR("An error occurred while reading from the underlying stream",
|
|
prev.position, source_path_);
|
|
}
|
|
|
|
TOML_ERROR_CHECK;
|
|
next_byte = static_cast<uint8_t>(next_byte_raw);
|
|
}
|
|
|
|
decoder(next_byte);
|
|
if (decoder.error())
|
|
TOML_ERROR( "Encountered invalid utf-8 sequence", prev.position, source_path_ );
|
|
|
|
TOML_ERROR_CHECK;
|
|
|
|
auto& current = codepoints[cp_idx % 2_sz];
|
|
current.bytes[current_byte_count++] = static_cast<char>(next_byte);
|
|
if (decoder.has_code_point())
|
|
{
|
|
//store codepoint
|
|
current.value = decoder.codepoint;
|
|
|
|
//reset prev (will be the next 'current')
|
|
std::memset(prev.bytes, 0, sizeof(prev.bytes));
|
|
current_byte_count = {};
|
|
if (is_line_break<false>(current.value))
|
|
prev.position = { static_cast<source_index>(current.position.line + 1), 1 };
|
|
else
|
|
prev.position = { current.position.line, static_cast<source_index>(current.position.column + 1) };
|
|
cp_idx++;
|
|
return ¤t;
|
|
}
|
|
}
|
|
|
|
TOML_UNREACHABLE;
|
|
}
|
|
|
|
[[nodiscard]]
|
|
bool peek_eof() const override
|
|
{
|
|
return stream.peek_eof();
|
|
}
|
|
|
|
#if !TOML_EXCEPTIONS
|
|
|
|
[[nodiscard]]
|
|
optional<parse_error>&& error() noexcept override
|
|
{
|
|
return std::move(err);
|
|
}
|
|
|
|
#endif
|
|
};
|
|
|
|
template <typename Char>
|
|
utf8_reader(std::basic_string_view<Char>, std::string_view) -> utf8_reader<std::basic_string_view<Char>>;
|
|
template <typename Char>
|
|
utf8_reader(std::basic_string_view<Char>, std::string&&) -> utf8_reader<std::basic_string_view<Char>>;
|
|
template <typename Char>
|
|
utf8_reader(std::basic_istream<Char>&, std::string_view) -> utf8_reader<std::basic_istream<Char>>;
|
|
template <typename Char>
|
|
utf8_reader(std::basic_istream<Char>&, std::string&&) -> utf8_reader<std::basic_istream<Char>>;
|
|
|
|
class TOML_EMPTY_BASES TOML_API utf8_buffered_reader final
|
|
: public utf8_reader_interface
|
|
{
|
|
public:
|
|
static constexpr size_t max_history_length = 72;
|
|
|
|
private:
|
|
static constexpr size_t history_buffer_size = max_history_length - 1; //'head' is stored in the reader
|
|
utf8_reader_interface& reader;
|
|
struct
|
|
{
|
|
|
|
utf8_codepoint buffer[history_buffer_size];
|
|
size_t count, first;
|
|
}
|
|
history = {};
|
|
const utf8_codepoint* head = {};
|
|
size_t negative_offset = {};
|
|
|
|
public:
|
|
explicit utf8_buffered_reader(utf8_reader_interface& reader_) noexcept;
|
|
const source_path_ptr& source_path() const noexcept override;
|
|
const utf8_codepoint* read_next() override;
|
|
const utf8_codepoint* step_back(size_t count) noexcept;
|
|
bool peek_eof() const override;
|
|
#if !TOML_EXCEPTIONS
|
|
optional<parse_error>&& error() noexcept override;
|
|
#endif
|
|
};
|
|
|
|
TOML_ABI_NAMESPACE_END // TOML_EXCEPTIONS
|
|
}
|
|
TOML_IMPL_NAMESPACE_END
|
|
|
|
TOML_POP_WARNINGS // TOML_DISABLE_PADDING_WARNINGS, TOML_DISABLE_MISC_WARNINGS
|