Rewrite printable.py codegen to emit C++

This commit is contained in:
Victor Zverovich 2021-08-22 09:10:10 -07:00
parent 6cf90d7cee
commit 7df2c82a8a

View File

@ -1,8 +1,11 @@
#!/usr/bin/env python3
# This script is based on
# https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py
# distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT.
# This script uses the following Unicode tables:
# - UnicodeData.txt
# Script license: https://github.com/rust-lang/rust/blob/master/LICENSE-MIT
from collections import namedtuple
@ -112,23 +115,20 @@ def compress_normal(normal):
return compressed
def print_singletons(uppers, lowers, uppersname, lowersname):
print("#[rustfmt::skip]")
print("const {}: &[(u8, u8)] = &[".format(uppersname))
print(" static constexpr singleton {}[] = {{".format(uppersname))
for u, c in uppers:
print(" ({:#04x}, {}),".format(u, c))
print("];")
print("#[rustfmt::skip]")
print("const {}: &[u8] = &[".format(lowersname))
print(" {{{:#04x}, {}}},".format(u, c))
print(" };")
print(" static constexpr unsigned char {}[] = {{".format(lowersname))
for i in range(0, len(lowers), 8):
print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8])))
print("];")
print(" };")
def print_normal(normal, normalname):
print("#[rustfmt::skip]")
print("const {}: &[u8] = &[".format(normalname))
print(" static constexpr unsigned char {}[] = {{".format(normalname))
for v in normal:
print(" {}".format(" ".join("{:#04x},".format(i) for i in v)))
print("];")
print(" };")
def main():
file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
@ -171,67 +171,66 @@ def main():
normal1 = compress_normal(normal1)
print("""\
// NOTE: The following code was generated by "src/libcore/unicode/printable.py",
// do not edit directly!
struct singleton {
unsigned char upper;
unsigned char lowercount;
};
fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
let xupper = (x >> 8) as u8;
let mut lowerstart = 0;
for &(upper, lowercount) in singletonuppers {
let lowerend = lowerstart + lowercount as usize;
if xupper == upper {
for &lower in &singletonlowers[lowerstart..lowerend] {
if lower == x as u8 {
return false;
}
}
} else if xupper < upper {
break;
}
lowerstart = lowerend;
inline auto check(uint16_t x, const singleton* singletonuppers,
size_t singletonuppers_size,
const unsigned char* singletonlowers,
const unsigned char* normal, size_t normal_size) -> bool {
auto xupper = x >> 8;
auto lowerstart = 0;
for (size_t i = 0; i < singletonuppers_size; ++i) {
auto su = singletonuppers[i];
auto lowerend = lowerstart + su.lowercount;
if (xupper < su.upper) break;
if (xupper == su.upper) {
for (auto j = lowerstart; j < lowerend; ++j) {
if (singletonlowers[j] == x) return false;
}
}
lowerstart = lowerend;
}
let mut x = x as i32;
let mut normal = normal.iter().cloned();
let mut current = true;
while let Some(v) = normal.next() {
let len = if v & 0x80 != 0 {
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
} else {
v as i32
};
x -= len;
if x < 0 {
break;
}
current = !current;
}
current
auto xsigned = static_cast<int>(x);
auto current = true;
for (size_t i = 0; i < normal_size; ++i) {
auto v = static_cast<int>(normal[i]);
auto len = v & 0x80 != 0 ? (v & 0x7f) << 8 | normal[i++] : v;
xsigned -= len;
if (xsigned < 0) break;
current = !current;
}
return current;
}
pub(crate) fn is_printable(x: char) -> bool {
let x = x as u32;
let lower = x as u16;
if x < 0x10000 {
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
} else if x < 0x20000 {
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
} else {\
inline auto is_printable(uint32_t cp) -> bool {\
""")
print_singletons(singletons0u, singletons0l, 'singletons0u', 'singletons0l')
print_singletons(singletons1u, singletons1l, 'singletons1u', 'singletons1l')
print_normal(normal0, 'normal0')
print_normal(normal1, 'normal1')
print("""\
auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) {
return check(lower, singletons0u,
sizeof(singletons0u) / sizeof(*singletons0u), singletons0l,
normal0, sizeof(normal0));
}
if (cp < 0x20000) {
return check(lower, singletons1u,
sizeof(singletons1u) / sizeof(*singletons1u), singletons1l,
normal1, sizeof(normal1));
}\
""")
for a, b in extra:
print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b))
print(" return false;")
print(" }")
print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b))
print("""\
true
}
return true;
}\
""")
print()
print_singletons(singletons0u, singletons0l, 'SINGLETONS0U', 'SINGLETONS0L')
print_singletons(singletons1u, singletons1l, 'SINGLETONS1U', 'SINGLETONS1L')
print_normal(normal0, 'NORMAL0')
print_normal(normal1, 'NORMAL1')
if __name__ == '__main__':
main()