mirror of
https://github.com/fmtlib/fmt.git
synced 2024-11-02 11:28:20 +00:00
237 lines
6.7 KiB
Python
237 lines
6.7 KiB
Python
|
#!/usr/bin/env python
|
||
|
|
||
|
# This script uses the following Unicode tables:
|
||
|
# - UnicodeData.txt
|
||
|
|
||
|
|
||
|
from collections import namedtuple
|
||
|
import csv
|
||
|
import os
|
||
|
import subprocess
|
||
|
|
||
|
NUM_CODEPOINTS=0x110000
|
||
|
|
||
|
def to_ranges(iter):
|
||
|
current = None
|
||
|
for i in iter:
|
||
|
if current is None or i != current[1] or i in (0x10000, 0x20000):
|
||
|
if current is not None:
|
||
|
yield tuple(current)
|
||
|
current = [i, i + 1]
|
||
|
else:
|
||
|
current[1] += 1
|
||
|
if current is not None:
|
||
|
yield tuple(current)
|
||
|
|
||
|
def get_escaped(codepoints):
|
||
|
for c in codepoints:
|
||
|
if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '):
|
||
|
yield c.value
|
||
|
|
||
|
def get_file(f):
|
||
|
try:
|
||
|
return open(os.path.basename(f))
|
||
|
except FileNotFoundError:
|
||
|
subprocess.run(["curl", "-O", f], check=True)
|
||
|
return open(os.path.basename(f))
|
||
|
|
||
|
Codepoint = namedtuple('Codepoint', 'value class_')
|
||
|
|
||
|
def get_codepoints(f):
|
||
|
r = csv.reader(f, delimiter=";")
|
||
|
prev_codepoint = 0
|
||
|
class_first = None
|
||
|
for row in r:
|
||
|
codepoint = int(row[0], 16)
|
||
|
name = row[1]
|
||
|
class_ = row[2]
|
||
|
|
||
|
if class_first is not None:
|
||
|
if not name.endswith("Last>"):
|
||
|
raise ValueError("Missing Last after First")
|
||
|
|
||
|
for c in range(prev_codepoint + 1, codepoint):
|
||
|
yield Codepoint(c, class_first)
|
||
|
|
||
|
class_first = None
|
||
|
if name.endswith("First>"):
|
||
|
class_first = class_
|
||
|
|
||
|
yield Codepoint(codepoint, class_)
|
||
|
prev_codepoint = codepoint
|
||
|
|
||
|
if class_first is not None:
|
||
|
raise ValueError("Missing Last after First")
|
||
|
|
||
|
for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
|
||
|
yield Codepoint(c, None)
|
||
|
|
||
|
def compress_singletons(singletons):
|
||
|
uppers = [] # (upper, # items in lowers)
|
||
|
lowers = []
|
||
|
|
||
|
for i in singletons:
|
||
|
upper = i >> 8
|
||
|
lower = i & 0xff
|
||
|
if len(uppers) == 0 or uppers[-1][0] != upper:
|
||
|
uppers.append((upper, 1))
|
||
|
else:
|
||
|
upper, count = uppers[-1]
|
||
|
uppers[-1] = upper, count + 1
|
||
|
lowers.append(lower)
|
||
|
|
||
|
return uppers, lowers
|
||
|
|
||
|
def compress_normal(normal):
|
||
|
# lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
|
||
|
# lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
|
||
|
compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
|
||
|
|
||
|
prev_start = 0
|
||
|
for start, count in normal:
|
||
|
truelen = start - prev_start
|
||
|
falselen = count
|
||
|
prev_start = start + count
|
||
|
|
||
|
assert truelen < 0x8000 and falselen < 0x8000
|
||
|
entry = []
|
||
|
if truelen > 0x7f:
|
||
|
entry.append(0x80 | (truelen >> 8))
|
||
|
entry.append(truelen & 0xff)
|
||
|
else:
|
||
|
entry.append(truelen & 0x7f)
|
||
|
if falselen > 0x7f:
|
||
|
entry.append(0x80 | (falselen >> 8))
|
||
|
entry.append(falselen & 0xff)
|
||
|
else:
|
||
|
entry.append(falselen & 0x7f)
|
||
|
|
||
|
compressed.append(entry)
|
||
|
|
||
|
return compressed
|
||
|
|
||
|
def print_singletons(uppers, lowers, uppersname, lowersname):
|
||
|
print("#[rustfmt::skip]")
|
||
|
print("const {}: &[(u8, u8)] = &[".format(uppersname))
|
||
|
for u, c in uppers:
|
||
|
print(" ({:#04x}, {}),".format(u, c))
|
||
|
print("];")
|
||
|
print("#[rustfmt::skip]")
|
||
|
print("const {}: &[u8] = &[".format(lowersname))
|
||
|
for i in range(0, len(lowers), 8):
|
||
|
print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8])))
|
||
|
print("];")
|
||
|
|
||
|
def print_normal(normal, normalname):
|
||
|
print("#[rustfmt::skip]")
|
||
|
print("const {}: &[u8] = &[".format(normalname))
|
||
|
for v in normal:
|
||
|
print(" {}".format(" ".join("{:#04x},".format(i) for i in v)))
|
||
|
print("];")
|
||
|
|
||
|
def main():
|
||
|
file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
|
||
|
|
||
|
codepoints = get_codepoints(file)
|
||
|
|
||
|
CUTOFF=0x10000
|
||
|
singletons0 = []
|
||
|
singletons1 = []
|
||
|
normal0 = []
|
||
|
normal1 = []
|
||
|
extra = []
|
||
|
|
||
|
for a, b in to_ranges(get_escaped(codepoints)):
|
||
|
if a > 2 * CUTOFF:
|
||
|
extra.append((a, b - a))
|
||
|
elif a == b - 1:
|
||
|
if a & CUTOFF:
|
||
|
singletons1.append(a & ~CUTOFF)
|
||
|
else:
|
||
|
singletons0.append(a)
|
||
|
elif a == b - 2:
|
||
|
if a & CUTOFF:
|
||
|
singletons1.append(a & ~CUTOFF)
|
||
|
singletons1.append((a + 1) & ~CUTOFF)
|
||
|
else:
|
||
|
singletons0.append(a)
|
||
|
singletons0.append(a + 1)
|
||
|
else:
|
||
|
if a >= 2 * CUTOFF:
|
||
|
extra.append((a, b - a))
|
||
|
elif a & CUTOFF:
|
||
|
normal1.append((a & ~CUTOFF, b - a))
|
||
|
else:
|
||
|
normal0.append((a, b - a))
|
||
|
|
||
|
singletons0u, singletons0l = compress_singletons(singletons0)
|
||
|
singletons1u, singletons1l = compress_singletons(singletons1)
|
||
|
normal0 = compress_normal(normal0)
|
||
|
normal1 = compress_normal(normal1)
|
||
|
|
||
|
print("""\
|
||
|
// NOTE: The following code was generated by "src/libcore/unicode/printable.py",
|
||
|
// do not edit directly!
|
||
|
|
||
|
fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
|
||
|
let xupper = (x >> 8) as u8;
|
||
|
let mut lowerstart = 0;
|
||
|
for &(upper, lowercount) in singletonuppers {
|
||
|
let lowerend = lowerstart + lowercount as usize;
|
||
|
if xupper == upper {
|
||
|
for &lower in &singletonlowers[lowerstart..lowerend] {
|
||
|
if lower == x as u8 {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
} else if xupper < upper {
|
||
|
break;
|
||
|
}
|
||
|
lowerstart = lowerend;
|
||
|
}
|
||
|
|
||
|
let mut x = x as i32;
|
||
|
let mut normal = normal.iter().cloned();
|
||
|
let mut current = true;
|
||
|
while let Some(v) = normal.next() {
|
||
|
let len = if v & 0x80 != 0 {
|
||
|
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
|
||
|
} else {
|
||
|
v as i32
|
||
|
};
|
||
|
x -= len;
|
||
|
if x < 0 {
|
||
|
break;
|
||
|
}
|
||
|
current = !current;
|
||
|
}
|
||
|
current
|
||
|
}
|
||
|
|
||
|
pub(crate) fn is_printable(x: char) -> bool {
|
||
|
let x = x as u32;
|
||
|
let lower = x as u16;
|
||
|
if x < 0x10000 {
|
||
|
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
|
||
|
} else if x < 0x20000 {
|
||
|
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
|
||
|
} else {\
|
||
|
""")
|
||
|
for a, b in extra:
|
||
|
print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b))
|
||
|
print(" return false;")
|
||
|
print(" }")
|
||
|
print("""\
|
||
|
true
|
||
|
}
|
||
|
}\
|
||
|
""")
|
||
|
print()
|
||
|
print_singletons(singletons0u, singletons0l, 'SINGLETONS0U', 'SINGLETONS0L')
|
||
|
print_singletons(singletons1u, singletons1l, 'SINGLETONS1U', 'SINGLETONS1L')
|
||
|
print_normal(normal0, 'NORMAL0')
|
||
|
print_normal(normal1, 'NORMAL1')
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|