Escape invalid code points

This commit is contained in:
Victor Zverovich 2021-08-22 15:14:13 -07:00
parent a76031e11d
commit a212ff757f
3 changed files with 24 additions and 23 deletions

View File

@ -232,18 +232,18 @@ struct singleton {
unsigned char lower_count;
};
inline auto is_printable(uint16_t x, const singleton* singleton_uppers,
size_t singleton_uppers_size,
inline auto is_printable(uint16_t x, const singleton* singletons,
size_t singletons_size,
const unsigned char* singleton_lowers,
const unsigned char* normal, size_t normal_size)
-> bool {
auto upper = x >> 8;
auto lower_start = 0;
for (size_t i = 0; i < singleton_uppers_size; ++i) {
auto su = singleton_uppers[i];
auto lower_end = lower_start + su.lower_count;
if (upper < su.upper) break;
if (upper == su.upper) {
for (size_t i = 0; i < singletons_size; ++i) {
auto s = singletons[i];
auto lower_end = lower_start + s.lower_count;
if (upper < s.upper) break;
if (upper == s.upper) {
for (auto j = lower_start; j < lower_end; ++j) {
if (singleton_lowers[j] == (x & 0xff)) return false;
}
@ -266,7 +266,7 @@ inline auto is_printable(uint16_t x, const singleton* singleton_uppers,
// Returns true iff the code point cp is printable.
// This code is generated by support/printable.py.
inline auto is_printable(uint32_t cp) -> bool {
static constexpr singleton singletons0_upper[] = {
static constexpr singleton singletons0[] = {
{0x00, 1}, {0x03, 5}, {0x05, 6}, {0x06, 3}, {0x07, 6}, {0x08, 8},
{0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13},
{0x0f, 4}, {0x10, 3}, {0x12, 18}, {0x13, 9}, {0x16, 1}, {0x17, 5},
@ -302,7 +302,7 @@ inline auto is_printable(uint32_t cp) -> bool {
0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
0xfe, 0xff,
};
static constexpr singleton singletons1_upper[] = {
static constexpr singleton singletons1[] = {
{0x00, 6}, {0x01, 1}, {0x03, 1}, {0x04, 2}, {0x08, 8}, {0x09, 2},
{0x0a, 5}, {0x0b, 2}, {0x0e, 4}, {0x10, 1}, {0x11, 2}, {0x12, 5},
{0x13, 17}, {0x14, 1}, {0x15, 2}, {0x17, 2}, {0x19, 13}, {0x1c, 5},
@ -395,13 +395,13 @@ inline auto is_printable(uint32_t cp) -> bool {
};
auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) {
return is_printable(lower, singletons0_upper,
sizeof(singletons0_upper) / sizeof(*singletons0_upper),
return is_printable(lower, singletons0,
sizeof(singletons0) / sizeof(*singletons0),
singletons0_lower, normal0, sizeof(normal0));
}
if (cp < 0x20000) {
return is_printable(lower, singletons1_upper,
sizeof(singletons1_upper) / sizeof(*singletons1_upper),
return is_printable(lower, singletons1,
sizeof(singletons1) / sizeof(*singletons1),
singletons1_lower, normal1, sizeof(normal1));
}
if (0x2a6de <= cp && cp < 0x2a700) return false;
@ -412,7 +412,7 @@ inline auto is_printable(uint32_t cp) -> bool {
if (0x2fa1e <= cp && cp < 0x30000) return false;
if (0x3134b <= cp && cp < 0xe0100) return false;
if (0xe01f0 <= cp && cp < 0x110000) return false;
return true;
return cp < 0x110000;
}
inline auto needs_escape(uint32_t cp) -> bool {

View File

@ -173,29 +173,29 @@ def main():
print("""\
inline auto is_printable(uint32_t cp) -> bool {\
""")
print_singletons(singletons0u, singletons0l, 'singletons0_upper', 'singletons0_lower')
print_singletons(singletons1u, singletons1l, 'singletons1_upper', 'singletons1_lower')
print_singletons(singletons0u, singletons0l, 'singletons0', 'singletons0_lower')
print_singletons(singletons1u, singletons1l, 'singletons1', 'singletons1_lower')
print_normal(normal0, 'normal0')
print_normal(normal1, 'normal1')
print("""\
auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) {
return is_printable(lower, singletons0_upper,
sizeof(singletons0_upper) / sizeof(*singletons0_upper),
return is_printable(lower, singletons0,
sizeof(singletons0) / sizeof(*singletons0),
singletons0_lower, normal0, sizeof(normal0));
}
if (cp < 0x20000) {
return is_printable(lower, singletons1_upper,
sizeof(singletons1_upper) / sizeof(*singletons1_upper),
return is_printable(lower, singletons1,
sizeof(singletons1) / sizeof(*singletons1),
singletons1_lower, normal1, sizeof(normal1));
}\
""")
for a, b in extra:
print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b))
print("""\
return true;
}\
""")
return cp < 0x{:x};
}}\
""".format(NUM_CODEPOINTS))
if __name__ == '__main__':
main()

View File

@ -267,6 +267,7 @@ TEST(ranges_test, is_printable) {
using fmt::detail::is_printable;
EXPECT_TRUE(is_printable(0x0323));
EXPECT_FALSE(is_printable(0x0378));
EXPECT_FALSE(is_printable(0x110000));
}
TEST(ranges_test, escape_string) {