// Formatting library for C++ - scanning API proof of concept // // Copyright (c) 2019 - present, Victor Zverovich // All rights reserved. // // For the license information refer to format.h. #include #include #include #include #include "fmt/format-inl.h" FMT_BEGIN_NAMESPACE namespace detail { inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; } // If c is a hex digit returns its numeric value, otherwise -1. inline auto to_hex_digit(char c) -> int { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; return -1; } struct maybe_contiguous_range { const char* begin; const char* end; explicit operator bool() const { return begin != nullptr; } }; class scan_buffer { private: const char* ptr_; const char* end_; bool contiguous_; protected: scan_buffer(const char* ptr, const char* end, bool contiguous) : ptr_(ptr), end_(end), contiguous_(contiguous) {} ~scan_buffer() = default; void set(span buf) { ptr_ = buf.data; end_ = buf.data + buf.size; } auto ptr() const -> const char* { return ptr_; } public: scan_buffer(const scan_buffer&) = delete; void operator=(const scan_buffer&) = delete; // Fills the buffer with more input if available. virtual void consume() = 0; class sentinel {}; class iterator { private: const char** ptr_; scan_buffer* buf_; // This could be merged with ptr_. char value_; static auto get_sentinel() -> const char** { static const char* ptr = nullptr; return &ptr; } friend class scan_buffer; friend auto operator==(iterator lhs, sentinel) -> bool { return *lhs.ptr_ == nullptr; } friend auto operator!=(iterator lhs, sentinel) -> bool { return *lhs.ptr_ != nullptr; } iterator(scan_buffer* buf) : buf_(buf) { if (buf->ptr_ == buf->end_) { ptr_ = get_sentinel(); return; } ptr_ = &buf->ptr_; value_ = *buf->ptr_; } friend scan_buffer& get_buffer(iterator it) { return *it.buf_; } public: iterator() : ptr_(get_sentinel()), buf_(nullptr) {} auto operator++() -> iterator& { if (!buf_->try_consume()) ptr_ = get_sentinel(); value_ = *buf_->ptr_; return *this; } auto operator++(int) -> iterator { iterator copy = *this; ++*this; return copy; } auto operator*() const -> char { return value_; } auto base() const -> const char* { return buf_->ptr_; } friend auto to_contiguous(iterator it) -> maybe_contiguous_range; friend auto advance(iterator it, size_t n) -> iterator; }; friend auto to_contiguous(iterator it) -> maybe_contiguous_range { if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_}; return {nullptr, nullptr}; } friend auto advance(iterator it, size_t n) -> iterator { FMT_ASSERT(it.buf_->is_contiguous(), ""); const char*& ptr = it.buf_->ptr_; ptr += n; it.value_ = *ptr; if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel(); return it; } auto begin() -> iterator { return this; } auto end() -> sentinel { return {}; } auto is_contiguous() const -> bool { return contiguous_; } // Tries consuming a single code unit. Returns true iff there is more input. auto try_consume() -> bool { FMT_ASSERT(ptr_ != end_, ""); ++ptr_; if (ptr_ != end_) return true; consume(); return ptr_ != end_; } }; using scan_iterator = scan_buffer::iterator; using scan_sentinel = scan_buffer::sentinel; class string_scan_buffer final : public scan_buffer { private: void consume() override {} public: explicit string_scan_buffer(string_view s) : scan_buffer(s.begin(), s.end(), true) {} }; class file_scan_buffer final : public scan_buffer { private: template static auto get_file(F* f, int) -> glibc_file { return f; } template static auto get_file(F* f, int) -> apple_file { return f; } static auto get_file(FILE* f, ...) -> fallback_file { return f; } decltype(get_file(static_cast(nullptr), 0)) file_; // Fills the buffer if it is empty. void fill() { span buf = file_.get_read_buffer(); if (buf.size == 0) { int c = file_.get(); // Put the character back since we are only filling the buffer. if (c != EOF) file_.unget(static_cast(c)); buf = file_.get_read_buffer(); } set(buf); } void consume() override { // Consume the current buffer content. size_t n = to_unsigned(ptr() - file_.get_read_buffer().data); for (size_t i = 0; i != n; ++i) file_.get(); fill(); } public: explicit file_scan_buffer(FILE* f) : scan_buffer(nullptr, nullptr, false), file_(f) { flockfile(f); fill(); } ~file_scan_buffer() { funlockfile(file_); } }; } // namespace detail template struct scanner { // A deleted default constructor indicates a disabled scanner. scanner() = delete; }; class scan_parse_context { private: string_view format_; public: using iterator = string_view::iterator; explicit FMT_CONSTEXPR scan_parse_context(string_view format) : format_(format) {} FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); } FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); } void advance_to(iterator it) { format_.remove_prefix(detail::to_unsigned(it - begin())); } }; namespace detail { enum class scan_type { none_type, int_type, uint_type, long_long_type, ulong_long_type, string_type, string_view_type, custom_type }; template struct custom_scan_arg { void* value; void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx); }; } // namespace detail // A scan argument. Context is a template parameter for the compiled API where // output can be unbuffered. template class basic_scan_arg { private: using scan_type = detail::scan_type; scan_type type_; union { int* int_value_; unsigned* uint_value_; long long* long_long_value_; unsigned long long* ulong_long_value_; std::string* string_; string_view* string_view_; detail::custom_scan_arg custom_; // TODO: more types }; template static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx, Context& ctx) { auto s = scanner(); parse_ctx.advance_to(s.parse(parse_ctx)); ctx.advance_to(s.scan(*static_cast(arg), ctx)); } public: FMT_CONSTEXPR basic_scan_arg() : type_(scan_type::none_type), int_value_(nullptr) {} FMT_CONSTEXPR basic_scan_arg(int& value) : type_(scan_type::int_type), int_value_(&value) {} FMT_CONSTEXPR basic_scan_arg(unsigned& value) : type_(scan_type::uint_type), uint_value_(&value) {} FMT_CONSTEXPR basic_scan_arg(long long& value) : type_(scan_type::long_long_type), long_long_value_(&value) {} FMT_CONSTEXPR basic_scan_arg(unsigned long long& value) : type_(scan_type::ulong_long_type), ulong_long_value_(&value) {} FMT_CONSTEXPR basic_scan_arg(std::string& value) : type_(scan_type::string_type), string_(&value) {} FMT_CONSTEXPR basic_scan_arg(string_view& value) : type_(scan_type::string_view_type), string_view_(&value) {} template FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) { custom_.value = &value; custom_.scan = scan_custom_arg; } constexpr explicit operator bool() const noexcept { return type_ != scan_type::none_type; } auto type() const -> detail::scan_type { return type_; } template auto visit(Visitor&& vis) -> decltype(vis(monostate())) { switch (type_) { case scan_type::none_type: break; case scan_type::int_type: return vis(*int_value_); case scan_type::uint_type: return vis(*uint_value_); case scan_type::long_long_type: return vis(*long_long_value_); case scan_type::ulong_long_type: return vis(*ulong_long_value_); case scan_type::string_type: return vis(*string_); case scan_type::string_view_type: return vis(*string_view_); case scan_type::custom_type: break; } return vis(monostate()); } auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx, Context& ctx) const -> bool { if (type_ != scan_type::custom_type) return false; parse_ctx.advance_to(parse_begin); custom_.scan(custom_.value, parse_ctx, ctx); return true; } }; class scan_context; using scan_arg = basic_scan_arg; struct scan_args { int size; const scan_arg* data; template FMT_CONSTEXPR scan_args(const std::array& store) : size(N), data(store.data()) { static_assert(N < INT_MAX, "too many arguments"); } }; class scan_context { private: detail::scan_buffer& buf_; scan_args args_; public: using iterator = detail::scan_iterator; using sentinel = detail::scan_sentinel; explicit FMT_CONSTEXPR scan_context(detail::scan_buffer& buf, scan_args args) : buf_(buf), args_(args) {} FMT_CONSTEXPR auto arg(int id) const -> scan_arg { return id < args_.size ? args_.data[id] : scan_arg(); } auto begin() const -> iterator { return buf_.begin(); } auto end() const -> sentinel { return {}; } void advance_to(iterator) { buf_.consume(); } }; namespace detail { const char* parse_scan_specs(const char* begin, const char* end, format_specs& specs, scan_type) { while (begin != end) { switch (to_ascii(*begin)) { // TODO: parse more scan format specifiers case 'x': specs.type = presentation_type::hex; ++begin; break; case '}': return begin; } } return begin; } template ::value)> auto read(scan_iterator it, T& value) -> scan_iterator { if (it == scan_sentinel()) return it; char c = *it; if (c < '0' || c > '9') report_error("invalid input"); int num_digits = 0; T n = 0, prev = 0; char prev_digit = c; do { prev = n; n = n * 10 + static_cast(c - '0'); prev_digit = c; c = *++it; ++num_digits; if (c < '0' || c > '9') break; } while (it != scan_sentinel()); // Check overflow. if (num_digits <= std::numeric_limits::digits10) { value = n; return it; } unsigned max = to_unsigned((std::numeric_limits::max)()); if (num_digits == std::numeric_limits::digits10 + 1 && prev * 10ull + unsigned(prev_digit - '0') <= max) { value = n; } else { report_error("number is too big"); } return it; } template ::value)> auto read_hex(scan_iterator it, T& value) -> scan_iterator { if (it == scan_sentinel()) return it; int digit = to_hex_digit(*it); if (digit < 0) report_error("invalid input"); int num_digits = 0; T n = 0; do { n = (n << 4) + static_cast(digit); ++num_digits; digit = to_hex_digit(*++it); if (digit < 0) break; } while (it != scan_sentinel()); // Check overflow. if (num_digits <= (std::numeric_limits::digits >> 2)) value = n; else report_error("number is too big"); return it; } template ::value)> auto read(scan_iterator it, T& value, const format_specs& specs) -> scan_iterator { if (specs.type == presentation_type::hex) return read_hex(it, value); return read(it, value); } template ::value)> auto read(scan_iterator it, T& value, const format_specs& specs = {}) -> scan_iterator { bool negative = it != scan_sentinel() && *it == '-'; if (negative) { ++it; if (it == scan_sentinel()) report_error("invalid input"); } using unsigned_type = typename std::make_unsigned::type; unsigned_type abs_value = 0; it = read(it, abs_value, specs); auto n = static_cast(abs_value); value = negative ? -n : n; return it; } auto read(scan_iterator it, std::string& value, const format_specs& = {}) -> scan_iterator { while (it != scan_sentinel() && *it != ' ') value.push_back(*it++); return it; } auto read(scan_iterator it, string_view& value, const format_specs& = {}) -> scan_iterator { auto range = to_contiguous(it); // This could also be checked at compile time in scan. if (!range) report_error("string_view requires contiguous input"); auto p = range.begin; while (p != range.end && *p != ' ') ++p; size_t size = to_unsigned(p - range.begin); value = {range.begin, size}; return advance(it, size); } auto read(scan_iterator it, monostate, const format_specs& = {}) -> scan_iterator { return it; } // An argument scanner that uses the default format, e.g. decimal for integers. struct default_arg_scanner { scan_iterator it; template FMT_INLINE auto operator()(T&& value) -> scan_iterator { return read(it, value); } }; // An argument scanner with format specifiers. struct arg_scanner { scan_iterator it; const format_specs& specs; template auto operator()(T&& value) -> scan_iterator { return read(it, value, specs); } }; struct scan_handler { private: scan_parse_context parse_ctx_; scan_context scan_ctx_; int next_arg_id_; using sentinel = scan_buffer::sentinel; public: FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf, scan_args args) : parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {} auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); } void on_text(const char* begin, const char* end) { if (begin == end) return; auto it = scan_ctx_.begin(); for (; begin != end; ++begin, ++it) { if (it == sentinel() || *begin != *it) on_error("invalid input"); } scan_ctx_.advance_to(it); } FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); } FMT_CONSTEXPR auto on_arg_id(int id) -> int { if (!scan_ctx_.arg(id)) on_error("argument index out of range"); return id; } FMT_CONSTEXPR auto on_arg_id(string_view id) -> int { if (id.data()) on_error("invalid format"); return 0; } void on_replacement_field(int arg_id, const char* begin) { scan_arg arg = scan_ctx_.arg(arg_id); if (arg.scan_custom(begin, parse_ctx_, scan_ctx_)) return; auto it = scan_ctx_.begin(); while (it != sentinel() && is_whitespace(*it)) ++it; scan_ctx_.advance_to(arg.visit(default_arg_scanner{it})); } auto on_format_specs(int arg_id, const char* begin, const char* end) -> const char* { scan_arg arg = scan_ctx_.arg(arg_id); if (arg.scan_custom(begin, parse_ctx_, scan_ctx_)) return parse_ctx_.begin(); auto specs = format_specs(); begin = parse_scan_specs(begin, end, specs, arg.type()); if (begin == end || *begin != '}') on_error("missing '}' in format string"); scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs})); return begin; } void on_error(const char* message) { report_error(message); } }; void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) { auto h = detail::scan_handler(fmt, buf, args); detail::parse_format_string(fmt, h); } template void make_args(std::array&, std::tuple&) {} template void make_args(std::array& args, std::tuple& values) { using element_type = typename std::tuple_element>::type; static_assert(std::is_same, element_type>::value, ""); args[I] = std::get(values); make_args(args, values); } } // namespace detail template class scan_data { private: std::tuple values_; Range range_; public: scan_data() = default; scan_data(T... values) : values_(std::move(values)...) {} auto value() const -> decltype(std::get<0>(values_)) { return std::get<0>(values_); } auto values() const -> const std::tuple& { return values_; } auto make_args() -> std::array { auto args = std::array(); detail::make_args<0>(args, values_); return args; } auto range() const -> Range { return range_; } auto begin() const -> decltype(range_.begin()) { return range_.begin(); } auto end() const -> decltype(range_.end()) { return range_.end(); } }; template auto make_scan_args(T&... args) -> std::array { return {{args...}}; } class scan_error {}; // A rudimentary version of std::expected for testing the API shape. template class expected { private: T value_; bool has_value_ = true; public: expected(T value) : value_(std::move(value)) {} explicit operator bool() const { return has_value_; } auto operator->() const -> const T* { return &value_; } auto error() -> E const { return E(); } }; template using scan_result = expected, scan_error>; auto vscan(string_view input, string_view fmt, scan_args args) -> string_view::iterator { auto&& buf = detail::string_scan_buffer(input); detail::vscan(buf, fmt, args); return input.begin() + (buf.begin().base() - input.data()); } // Scans the input and stores the results (in)to args. template auto scan_to(string_view input, string_view fmt, T&... args) -> string_view::iterator { return vscan(input, fmt, make_scan_args(args...)); } template auto scan(string_view input, string_view fmt) -> scan_result { auto data = scan_data(); vscan(input, fmt, data.make_args()); return data; } template ::value)> auto scan_to(Range&& input, string_view fmt, T&... args) -> decltype(std::begin(input)) { auto it = std::begin(input); detail::vscan(get_buffer(it), fmt, make_scan_args(args...)); return it; } template auto scan_to(FILE* f, string_view fmt, T&... args) -> bool { auto&& buf = detail::file_scan_buffer(f); detail::vscan(buf, fmt, make_scan_args(args...)); return buf.begin() != buf.end(); } FMT_END_NAMESPACE