fmt/test/format-impl-test.cc

558 lines
18 KiB
C++
Raw Normal View History

2018-03-04 17:16:51 +00:00
// Formatting library for C++ - formatting library implementation tests
//
// Copyright (c) 2012 - present, Victor Zverovich
// All rights reserved.
//
// For the license information refer to format.h.
2014-09-05 14:35:00 +00:00
2016-03-04 17:04:28 +00:00
#include <algorithm>
#include <cstring>
2021-04-30 21:21:49 +00:00
// clang-format off
2021-04-30 04:09:54 +00:00
#include "test-assert.h"
2021-04-30 21:21:49 +00:00
// clang-format on
2021-04-30 04:09:54 +00:00
2021-04-30 21:21:49 +00:00
#include "fmt/format.h"
#include "gmock/gmock.h"
#include "util.h"
2014-09-05 14:35:00 +00:00
2020-05-10 14:25:42 +00:00
using fmt::detail::bigint;
using fmt::detail::fp;
using fmt::detail::max_value;
2018-08-29 16:34:57 +00:00
2019-09-07 19:54:16 +00:00
static_assert(!std::is_copy_constructible<bigint>::value, "");
static_assert(!std::is_copy_assignable<bigint>::value, "");
2021-04-30 04:09:54 +00:00
TEST(bigint_test, construct) {
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(bigint()), "");
EXPECT_EQ(fmt::to_string(bigint(0x42)), "42");
EXPECT_EQ(fmt::to_string(bigint(0x123456789abcedf0)), "123456789abcedf0");
2019-09-07 19:54:16 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, compare) {
2019-10-12 15:33:24 +00:00
bigint n1(42);
bigint n2(42);
2019-10-13 16:03:07 +00:00
EXPECT_EQ(compare(n1, n2), 0);
2019-10-12 15:33:24 +00:00
n2 <<= 32;
2019-10-13 16:03:07 +00:00
EXPECT_LT(compare(n1, n2), 0);
2019-10-12 15:33:24 +00:00
bigint n3(43);
2019-10-13 16:03:07 +00:00
EXPECT_LT(compare(n1, n3), 0);
EXPECT_GT(compare(n3, n1), 0);
2019-10-12 15:33:24 +00:00
bigint n4(42 * 0x100000001);
2019-10-13 16:03:07 +00:00
EXPECT_LT(compare(n2, n4), 0);
EXPECT_GT(compare(n4, n2), 0);
2019-10-06 19:34:02 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, add_compare) {
2019-10-13 14:08:39 +00:00
EXPECT_LT(
add_compare(bigint(0xffffffff), bigint(0xffffffff), bigint(1) <<= 64), 0);
EXPECT_LT(add_compare(bigint(1) <<= 32, bigint(1), bigint(1) <<= 96), 0);
EXPECT_GT(add_compare(bigint(1) <<= 32, bigint(0), bigint(0xffffffff)), 0);
EXPECT_GT(add_compare(bigint(0), bigint(1) <<= 32, bigint(0xffffffff)), 0);
EXPECT_GT(add_compare(bigint(42), bigint(1), bigint(42)), 0);
EXPECT_GT(add_compare(bigint(0xffffffff), bigint(1), bigint(0xffffffff)), 0);
EXPECT_LT(add_compare(bigint(10), bigint(10), bigint(22)), 0);
EXPECT_LT(add_compare(bigint(0x100000010), bigint(0x100000010),
bigint(0x300000010)),
0);
EXPECT_GT(add_compare(bigint(0x1ffffffff), bigint(0x100000002),
bigint(0x300000000)),
0);
EXPECT_EQ(add_compare(bigint(0x1ffffffff), bigint(0x100000002),
bigint(0x300000001)),
0);
EXPECT_LT(add_compare(bigint(0x1ffffffff), bigint(0x100000002),
bigint(0x300000002)),
0);
EXPECT_LT(add_compare(bigint(0x1ffffffff), bigint(0x100000002),
bigint(0x300000003)),
0);
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, shift_left) {
2019-09-08 00:38:27 +00:00
bigint n(0x42);
n <<= 0;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n), "42");
2019-09-08 00:38:27 +00:00
n <<= 1;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n), "84");
2019-09-08 00:38:27 +00:00
n <<= 25;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n), "108000000");
2019-09-08 00:38:27 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, multiply) {
bigint n(0x42);
EXPECT_THROW(n *= 0, assertion_failure);
n *= 1;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n), "42");
n *= 2;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n), "84");
n *= 0x12345678;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n), "962fc95e0");
bigint bigmax(max_value<uint32_t>());
bigmax *= max_value<uint32_t>();
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(bigmax), "fffffffe00000001");
const auto max64 = max_value<uint64_t>();
bigmax = max64;
bigmax *= max64;
EXPECT_EQ(fmt::to_string(bigmax), "fffffffffffffffe0000000000000001");
const auto max128 = (fmt::detail::uint128_t(max64) << 64) | max64;
bigmax = max128;
bigmax *= max128;
EXPECT_EQ(fmt::to_string(bigmax),
"fffffffffffffffffffffffffffffffe00000000000000000000000000000001");
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, square) {
2019-10-05 18:45:33 +00:00
bigint n0(0);
n0.square();
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n0), "0");
2019-10-05 18:45:33 +00:00
bigint n1(0x100);
n1.square();
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n1), "10000");
2019-10-05 18:45:33 +00:00
bigint n2(0xfffffffff);
n2.square();
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n2), "ffffffffe000000001");
2019-10-05 18:45:33 +00:00
bigint n3(max_value<uint64_t>());
n3.square();
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n3), "fffffffffffffffe0000000000000001");
2019-10-06 19:34:02 +00:00
bigint n4;
n4.assign_pow10(10);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n4), "2540be400");
2019-10-05 18:45:33 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, divmod_assign_zero_divisor) {
2019-10-09 17:31:17 +00:00
bigint zero(0);
EXPECT_THROW(bigint(0).divmod_assign(zero), assertion_failure);
EXPECT_THROW(bigint(42).divmod_assign(zero), assertion_failure);
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, divmod_assign_self) {
2019-10-09 17:31:17 +00:00
bigint n(100);
EXPECT_THROW(n.divmod_assign(n), assertion_failure);
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, divmod_assign_unaligned) {
2019-10-09 17:31:17 +00:00
// (42 << 340) / pow(10, 100):
bigint n1(42);
n1 <<= 340;
bigint n2;
n2.assign_pow10(100);
int result = n1.divmod_assign(n2);
EXPECT_EQ(result, 9406);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n1),
"10f8353019583bfc29ffc8f564e1b9f9d819dbb4cf783e4507eca1539220p96");
2019-10-09 17:31:17 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(bigint_test, divmod_assign) {
2019-10-09 17:31:17 +00:00
// 100 / 10:
bigint n1(100);
int result = n1.divmod_assign(bigint(10));
EXPECT_EQ(result, 10);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n1), "0");
2019-10-09 17:31:17 +00:00
// pow(10, 100) / (42 << 320):
n1.assign_pow10(100);
result = n1.divmod_assign(bigint(42) <<= 320);
EXPECT_EQ(result, 111);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n1),
"13ad2594c37ceb0b2784c4ce0bf38ace408e211a7caab24308a82e8f10p96");
2019-10-09 17:31:17 +00:00
// 42 / 100:
bigint n2(42);
n1.assign_pow10(2);
result = n2.divmod_assign(n1);
EXPECT_EQ(result, 0);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::to_string(n2), "2a");
2019-10-09 17:31:17 +00:00
}
template <bool is_iec559> void run_double_tests() {
2018-08-29 16:34:57 +00:00
fmt::print("warning: double is not IEC559, skipping FP tests\n");
}
template <> void run_double_tests<true>() {
// Construct from double.
EXPECT_EQ(fp(1.23), fp(0x13ae147ae147aeu, -52));
2018-08-29 16:34:57 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, double_tests) {
run_double_tests<std::numeric_limits<double>::is_iec559>();
2018-08-29 16:34:57 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, normalize) {
2019-09-08 00:07:53 +00:00
const auto v = fp(0xbeef, 42);
auto normalized = normalize(v);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(normalized.f, 0xbeef000000000000);
EXPECT_EQ(normalized.e, -6);
2018-08-29 16:34:57 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, multiply) {
2018-08-29 16:34:57 +00:00
auto v = fp(123ULL << 32, 4) * fp(56ULL << 32, 7);
EXPECT_EQ(v.f, 123u * 56u);
EXPECT_EQ(v.e, 4 + 7 + 64);
v = fp(123ULL << 32, 4) * fp(567ULL << 31, 8);
EXPECT_EQ(v.f, (123 * 567 + 1u) / 2);
EXPECT_EQ(v.e, 4 + 8 + 64);
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, get_cached_power) {
2020-10-04 14:20:22 +00:00
using limits = std::numeric_limits<double>;
2018-08-29 16:34:57 +00:00
for (auto exp = limits::min_exponent; exp <= limits::max_exponent; ++exp) {
int dec_exp = 0;
2022-02-20 19:33:16 +00:00
auto power = fmt::detail::get_cached_power(exp, dec_exp);
bigint exact, cache(power.f);
2020-10-04 14:20:22 +00:00
if (dec_exp >= 0) {
exact.assign_pow10(dec_exp);
2022-02-20 19:33:16 +00:00
if (power.e <= 0)
exact <<= -power.e;
2020-10-04 14:20:22 +00:00
else
2022-02-20 19:33:16 +00:00
cache <<= power.e;
2020-10-04 14:20:22 +00:00
exact.align(cache);
cache.align(exact);
2022-04-02 14:40:52 +00:00
auto exact_str = fmt::to_string(exact);
auto cache_str = fmt::to_string(cache);
2020-10-04 14:20:22 +00:00
EXPECT_EQ(exact_str.size(), cache_str.size());
EXPECT_EQ(exact_str.substr(0, 15), cache_str.substr(0, 15));
int diff = cache_str[15] - exact_str[15];
if (diff == 1)
EXPECT_GT(exact_str[16], '8');
else
EXPECT_EQ(diff, 0);
} else {
cache.assign_pow10(-dec_exp);
2022-02-20 19:33:16 +00:00
cache *= power.f + 1; // Inexact check.
2022-04-02 14:40:52 +00:00
exact = 1;
2022-02-20 19:33:16 +00:00
exact <<= -power.e;
2020-10-04 14:20:22 +00:00
exact.align(cache);
2022-04-02 14:40:52 +00:00
auto exact_str = fmt::to_string(exact);
auto cache_str = fmt::to_string(cache);
2020-10-04 14:20:22 +00:00
EXPECT_EQ(exact_str.size(), cache_str.size());
EXPECT_EQ(exact_str.substr(0, 16), cache_str.substr(0, 16));
}
2018-08-29 16:34:57 +00:00
}
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, dragonbox_max_k) {
2020-09-23 22:12:03 +00:00
using fmt::detail::dragonbox::floor_log10_pow2;
using float_info = fmt::detail::dragonbox::float_info<float>;
2022-03-14 20:15:30 +00:00
EXPECT_EQ(
fmt::detail::const_check(float_info::max_k),
float_info::kappa -
floor_log10_pow2(std::numeric_limits<float>::min_exponent -
fmt::detail::num_significand_bits<float>() - 1));
2020-09-23 22:12:03 +00:00
using double_info = fmt::detail::dragonbox::float_info<double>;
2022-03-14 20:15:30 +00:00
EXPECT_EQ(
fmt::detail::const_check(double_info::max_k),
double_info::kappa -
floor_log10_pow2(std::numeric_limits<double>::min_exponent -
2 * fmt::detail::num_significand_bits<double>() - 1));
2020-09-23 22:12:03 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, get_round_direction) {
2020-05-10 14:25:42 +00:00
using fmt::detail::get_round_direction;
using fmt::detail::round_direction;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(get_round_direction(100, 50, 0), round_direction::down);
EXPECT_EQ(get_round_direction(100, 51, 0), round_direction::up);
EXPECT_EQ(get_round_direction(100, 40, 10), round_direction::down);
EXPECT_EQ(get_round_direction(100, 60, 10), round_direction::up);
for (size_t i = 41; i < 60; ++i)
2022-04-02 14:40:52 +00:00
EXPECT_EQ(get_round_direction(100, i, 10), round_direction::unknown);
2019-09-08 16:04:09 +00:00
uint64_t max = max_value<uint64_t>();
2019-03-10 18:14:50 +00:00
EXPECT_THROW(get_round_direction(100, 100, 0), assertion_failure);
EXPECT_THROW(get_round_direction(100, 0, 100), assertion_failure);
EXPECT_THROW(get_round_direction(100, 0, 50), assertion_failure);
// Check that remainder + error doesn't overflow.
2022-04-02 14:40:52 +00:00
EXPECT_EQ(get_round_direction(max, max - 1, 2), round_direction::up);
2019-03-10 18:14:50 +00:00
// Check that 2 * (remainder + error) doesn't overflow.
2022-04-02 14:40:52 +00:00
EXPECT_EQ(get_round_direction(max, max / 2 + 1, max / 2),
round_direction::unknown);
2019-03-10 18:14:50 +00:00
// Check that remainder - error doesn't overflow.
2022-04-02 14:40:52 +00:00
EXPECT_EQ(get_round_direction(100, 40, 41), round_direction::unknown);
2019-03-10 18:14:50 +00:00
// Check that 2 * (remainder - error) doesn't overflow.
2022-04-02 14:40:52 +00:00
EXPECT_EQ(get_round_direction(max, max - 1, 1), round_direction::up);
2019-03-10 18:14:50 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, fixed_handler) {
2021-09-26 16:00:09 +00:00
struct handler : fmt::detail::gen_digits_handler {
2019-03-13 20:10:18 +00:00
char buffer[10];
2021-09-26 16:00:09 +00:00
handler(int prec = 0) : fmt::detail::gen_digits_handler() {
2019-03-13 20:10:18 +00:00
buf = buffer;
precision = prec;
}
};
2021-09-26 16:00:09 +00:00
handler().on_digit('0', 100, 99, 0, false);
EXPECT_THROW(handler().on_digit('0', 100, 100, 0, false), assertion_failure);
2020-05-10 14:25:42 +00:00
namespace digits = fmt::detail::digits;
2021-09-26 16:00:09 +00:00
EXPECT_EQ(handler(1).on_digit('0', 100, 10, 10, false), digits::error);
2019-03-13 20:10:18 +00:00
// Check that divisor - error doesn't overflow.
2021-09-26 16:00:09 +00:00
EXPECT_EQ(handler(1).on_digit('0', 100, 10, 101, false), digits::error);
2019-03-13 20:10:18 +00:00
// Check that 2 * error doesn't overflow.
2019-09-08 16:04:09 +00:00
uint64_t max = max_value<uint64_t>();
2021-09-26 16:00:09 +00:00
EXPECT_EQ(handler(1).on_digit('0', max, 10, max - 1, false), digits::error);
2019-03-13 20:10:18 +00:00
}
2021-04-30 04:09:54 +00:00
TEST(fp_test, grisu_format_compiles_with_on_ieee_double) {
2022-04-02 14:40:52 +00:00
auto buf = fmt::memory_buffer();
2020-05-10 14:25:42 +00:00
format_float(0.42, -1, fmt::detail::float_specs(), buf);
}
2021-04-30 21:21:49 +00:00
TEST(format_impl_test, format_error_code) {
2014-09-05 15:04:26 +00:00
std::string msg = "error 42", sep = ": ";
{
2022-04-02 14:40:52 +00:00
auto buffer = fmt::memory_buffer();
format_to(fmt::appender(buffer), "garbage");
2020-05-10 14:25:42 +00:00
fmt::detail::format_error_code(buffer, 42, "test");
2022-04-02 14:40:52 +00:00
EXPECT_EQ(to_string(buffer), "test: " + msg);
2014-09-05 15:04:26 +00:00
}
{
2022-04-02 14:40:52 +00:00
auto buffer = fmt::memory_buffer();
2021-04-30 21:21:49 +00:00
auto prefix =
std::string(fmt::inline_buffer_size - msg.size() - sep.size() + 1, 'x');
2020-05-10 14:25:42 +00:00
fmt::detail::format_error_code(buffer, 42, prefix);
EXPECT_EQ(msg, to_string(buffer));
2014-09-05 15:04:26 +00:00
}
int codes[] = {42, -1};
2020-05-07 22:59:46 +00:00
for (size_t i = 0, n = sizeof(codes) / sizeof(*codes); i < n; ++i) {
// Test maximum buffer size.
msg = fmt::format("error {}", codes[i]);
fmt::memory_buffer buffer;
2021-04-30 21:21:49 +00:00
auto prefix =
std::string(fmt::inline_buffer_size - msg.size() - sep.size(), 'x');
2020-05-10 14:25:42 +00:00
fmt::detail::format_error_code(buffer, codes[i], prefix);
EXPECT_EQ(prefix + sep + msg, to_string(buffer));
2020-05-07 22:59:46 +00:00
size_t size = fmt::inline_buffer_size;
EXPECT_EQ(size, buffer.size());
buffer.resize(0);
// Test with a message that doesn't fit into the buffer.
prefix += 'x';
2020-05-10 14:25:42 +00:00
fmt::detail::format_error_code(buffer, codes[i], prefix);
2022-04-02 14:40:52 +00:00
EXPECT_EQ(to_string(buffer), msg);
2014-09-05 15:04:26 +00:00
}
}
2021-04-30 21:21:49 +00:00
TEST(format_impl_test, compute_width) {
2020-05-10 14:25:42 +00:00
EXPECT_EQ(4,
2021-02-13 15:52:39 +00:00
fmt::detail::compute_width(
2020-05-10 14:25:42 +00:00
fmt::basic_string_view<fmt::detail::char8_type>(
reinterpret_cast<const fmt::detail::char8_type*>("ёжик"))));
2018-07-21 16:13:21 +00:00
}
2020-05-10 14:25:42 +00:00
// Tests fmt::detail::count_digits for integer type Int.
template <typename Int> void test_count_digits() {
2020-05-10 14:25:42 +00:00
for (Int i = 0; i < 10; ++i) EXPECT_EQ(1u, fmt::detail::count_digits(i));
2019-10-06 19:34:02 +00:00
for (Int i = 1, n = 1, end = max_value<Int>() / 10; n <= end; ++i) {
n *= 10;
2022-04-02 14:40:52 +00:00
EXPECT_EQ(fmt::detail::count_digits(n - 1), i);
EXPECT_EQ(fmt::detail::count_digits(n), i + 1);
}
}
2021-04-30 21:21:49 +00:00
TEST(format_impl_test, count_digits) {
test_count_digits<uint32_t>();
test_count_digits<uint64_t>();
}
TEST(format_impl_test, countl_zero) {
constexpr auto num_bits = fmt::detail::num_bits<uint32_t>();
uint32_t n = 1u;
for (int i = 1; i < num_bits - 1; i++) {
n <<= 1;
EXPECT_EQ(fmt::detail::countl_zero(n - 1), num_bits - i);
EXPECT_EQ(fmt::detail::countl_zero(n), num_bits - i - 1);
}
}
2022-04-03 21:52:35 +00:00
#if FMT_USE_FLOAT128
TEST(format_impl_test, write_float128) {
auto s = std::string();
fmt::detail::write<char>(std::back_inserter(s), __float128(42));
EXPECT_EQ(s, "42");
}
#endif
2022-05-21 13:38:05 +00:00
struct double_double {
double a;
double b;
explicit constexpr double_double(double a_val = 0, double b_val = 0)
: a(a_val), b(b_val) {}
operator double() const { return a + b; }
auto operator-() const -> double_double { return double_double(-a, -b); }
};
bool operator>=(const double_double& lhs, const double_double& rhs) {
return lhs.a + lhs.b >= rhs.a + rhs.b;
}
struct slow_float {
float value;
explicit constexpr slow_float(float val = 0) : value(val) {}
operator float() const { return value; }
auto operator-() const -> slow_float { return slow_float(-value); }
};
2022-05-21 13:38:05 +00:00
namespace std {
template <> struct is_floating_point<double_double> : std::true_type {};
template <> struct numeric_limits<double_double> {
2022-05-31 19:47:08 +00:00
// is_iec559 is true for double-double in libstdc++.
static constexpr bool is_iec559 = true;
2022-05-21 13:38:05 +00:00
static constexpr int digits = 106;
};
template <> struct is_floating_point<slow_float> : std::true_type {};
template <> struct numeric_limits<slow_float> : numeric_limits<float> {};
2022-05-21 13:38:05 +00:00
} // namespace std
FMT_BEGIN_NAMESPACE
namespace detail {
template <> struct is_fast_float<slow_float> : std::false_type {};
namespace dragonbox {
template <> struct float_info<slow_float> {
using carrier_uint = uint32_t;
static const int exponent_bits = 8;
};
} // namespace dragonbox
} // namespace detail
FMT_END_NAMESPACE
2022-05-21 13:38:05 +00:00
TEST(format_impl_test, write_double_double) {
2022-05-31 19:47:08 +00:00
auto s = std::string();
fmt::detail::write<char>(std::back_inserter(s), double_double(42), {});
// Specializing is_floating_point is broken in MSVC.
if (!FMT_MSC_VERSION) EXPECT_EQ(s, "42");
}
TEST(format_impl_test, write_dragon_even) {
auto s = std::string();
fmt::detail::write<char>(std::back_inserter(s), slow_float(33554450.0f), {});
// Specializing is_floating_point is broken in MSVC.
if (!FMT_MSC_VERSION) EXPECT_EQ(s, "33554450");
2022-05-21 13:38:05 +00:00
}
2020-09-27 20:49:40 +00:00
#ifdef _WIN32
2021-04-30 21:21:49 +00:00
# include <windows.h>
TEST(format_impl_test, write_console_signature) {
2022-04-02 14:40:52 +00:00
decltype(::WriteConsoleW)* p = fmt::detail::WriteConsoleW;
2020-09-27 20:49:40 +00:00
(void)p;
}
#endif
// A public domain branchless UTF-8 decoder by Christopher Wellons:
// https://github.com/skeeto/branchless-utf8
constexpr bool unicode_is_surrogate(uint32_t c) {
return c >= 0xD800U && c <= 0xDFFFU;
}
FMT_CONSTEXPR char* utf8_encode(char* s, uint32_t c) {
if (c >= (1UL << 16)) {
s[0] = static_cast<char>(0xf0 | (c >> 18));
s[1] = static_cast<char>(0x80 | ((c >> 12) & 0x3f));
s[2] = static_cast<char>(0x80 | ((c >> 6) & 0x3f));
s[3] = static_cast<char>(0x80 | ((c >> 0) & 0x3f));
return s + 4;
} else if (c >= (1UL << 11)) {
s[0] = static_cast<char>(0xe0 | (c >> 12));
s[1] = static_cast<char>(0x80 | ((c >> 6) & 0x3f));
s[2] = static_cast<char>(0x80 | ((c >> 0) & 0x3f));
return s + 3;
} else if (c >= (1UL << 7)) {
s[0] = static_cast<char>(0xc0 | (c >> 6));
s[1] = static_cast<char>(0x80 | ((c >> 0) & 0x3f));
return s + 2;
} else {
s[0] = static_cast<char>(c);
return s + 1;
}
}
// Make sure it can decode every character
TEST(format_impl_test, utf8_decode_decode_all) {
for (uint32_t i = 0; i < 0x10ffff; i++) {
if (!unicode_is_surrogate(i)) {
int e;
uint32_t c;
char buf[8] = {0};
char* end = utf8_encode(buf, i);
const char* res = fmt::detail::utf8_decode(buf, &c, &e);
EXPECT_EQ(end, res);
EXPECT_EQ(c, i);
EXPECT_EQ(e, 0);
}
}
}
// Reject everything outside of U+0000..U+10FFFF
TEST(format_impl_test, utf8_decode_out_of_range) {
for (uint32_t i = 0x110000; i < 0x1fffff; i++) {
int e;
uint32_t c;
char buf[8] = {0};
utf8_encode(buf, i);
const char* end = fmt::detail::utf8_decode(buf, &c, &e);
EXPECT_NE(e, 0);
EXPECT_EQ(end - buf, 4);
}
}
// Does it reject all surrogate halves?
TEST(format_impl_test, utf8_decode_surrogate_halves) {
for (uint32_t i = 0xd800; i <= 0xdfff; i++) {
int e;
uint32_t c;
char buf[8] = {0};
utf8_encode(buf, i);
fmt::detail::utf8_decode(buf, &c, &e);
EXPECT_NE(e, 0);
}
}
// How about non-canonical encodings?
TEST(format_impl_test, utf8_decode_non_canonical_encodings) {
int e;
uint32_t c;
const char* end;
char buf2[8] = {char(0xc0), char(0xA4)};
end = fmt::detail::utf8_decode(buf2, &c, &e);
EXPECT_NE(e, 0); // non-canonical len 2
EXPECT_EQ(end, buf2 + 2); // non-canonical recover 2
char buf3[8] = {char(0xe0), char(0x80), char(0xA4)};
end = fmt::detail::utf8_decode(buf3, &c, &e);
EXPECT_NE(e, 0); // non-canonical len 3
EXPECT_EQ(end, buf3 + 3); // non-canonical recover 3
char buf4[8] = {char(0xf0), char(0x80), char(0x80), char(0xA4)};
end = fmt::detail::utf8_decode(buf4, &c, &e);
EXPECT_NE(e, 0); // non-canonical encoding len 4
EXPECT_EQ(end, buf4 + 4); // non-canonical recover 4
}
// Let's try some bogus byte sequences
TEST(format_impl_test, utf8_decode_bogus_byte_sequences) {
int e;
uint32_t c;
// Invalid first byte
char buf0[4] = {char(0xff)};
auto len = fmt::detail::utf8_decode(buf0, &c, &e) - buf0;
EXPECT_NE(e, 0); // "bogus [ff] 0x%02x U+%04lx", e, (unsigned long)c);
EXPECT_EQ(len, 1); // "bogus [ff] recovery %d", len);
// Invalid first byte
char buf1[4] = {char(0x80)};
len = fmt::detail::utf8_decode(buf1, &c, &e) - buf1;
EXPECT_NE(e, 0); // "bogus [80] 0x%02x U+%04lx", e, (unsigned long)c);
EXPECT_EQ(len, 1); // "bogus [80] recovery %d", len);
// Looks like a two-byte sequence but second byte is wrong
char buf2[4] = {char(0xc0), char(0x0a)};
len = fmt::detail::utf8_decode(buf2, &c, &e) - buf2;
EXPECT_NE(e, 0); // "bogus [c0 0a] 0x%02x U+%04lx", e, (unsigned long)c
EXPECT_EQ(len, 2); // "bogus [c0 0a] recovery %d", len);
}