Skip to content

Commit ac8fa20

Browse files
authored
Merge pull request #96 from Jules-Bertholet/tables-fmt
Ensure `tables.rs` passes rustfmt
2 parents c49e96f + 42fd2c1 commit ac8fa20

File tree

5 files changed

+9928
-25227
lines changed

5 files changed

+9928
-25227
lines changed

.github/workflows/rust.yml

+3
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ jobs:
7777
runs-on: ubuntu-latest
7878
steps:
7979
- uses: actions/checkout@v3
80+
- uses: actions/setup-python@v5
81+
with:
82+
python-version: '3.12'
8083
- name: Regen
8184
run: cd scripts && python3 unicode.py
8285
- name: Diff tables

scripts/unicode.py

+27-32
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# out-of-line and check the tables.rs and normalization_tests.rs files into git.
2121
import collections
2222
import urllib.request
23+
from itertools import batched
2324

2425
UNICODE_VERSION = "15.1.0"
2526
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -354,20 +355,26 @@ def is_first_and_last(first, last):
354355
return False
355356
return first[1:-8] == last[1:-7]
356357

357-
def gen_mph_data(name, d, kv_type, kv_callback):
358+
def gen_mph_data(name, d, kv_type, kv_callback, kv_row_width):
358359
(salt, keys) = minimal_perfect_hash(d)
359-
out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
360-
for s in salt:
361-
out.write(" 0x{:x},\n".format(s))
360+
out.write(f"\npub(crate) const {name.upper()}_SALT: &[u16] = &[\n")
361+
for s_row in batched(salt, 13):
362+
out.write(" ")
363+
for s in s_row:
364+
out.write(f" 0x{s:03X},")
365+
out.write("\n")
366+
out.write("];\n")
367+
out.write(f"pub(crate) const {name.upper()}_KV: &[{kv_type}] = &[\n")
368+
for k_row in batched(keys, kv_row_width):
369+
out.write(" ")
370+
for k in k_row:
371+
out.write(f" {kv_callback(k)},")
372+
out.write("\n")
362373
out.write("];\n")
363-
out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
364-
for k in keys:
365-
out.write(" {},\n".format(kv_callback(k)))
366-
out.write("];\n\n")
367374

368375
def gen_combining_class(combining_classes, out):
369376
gen_mph_data('canonical_combining_class', combining_classes, 'u32',
370-
lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))
377+
lambda k: f"0x{int(combining_classes[k]) | (k << 8):07X}", 8)
371378

372379
def gen_composition_table(canon_comp, out):
373380
table = {}
@@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
376383
table[(c1 << 16) | c2] = c3
377384
(salt, keys) = minimal_perfect_hash(table)
378385
gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
379-
lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))
386+
lambda k: f"(0x{k:08X}, '\\u{{{table[k]:06X}}}')", 1)
380387

381388
out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
382389
out.write(" match (c1, c2) {\n")
@@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
403410
assert offset < 65536
404411
out.write("];\n")
405412
gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))",
406-
lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k])))
413+
lambda k: f"(0x{k:05X}, (0x{offsets[k]:03X}, 0x{len(table[k]):X}))", 1)
407414

408415
def gen_qc_match(prop_table, out):
409416
out.write(" match c {\n")
@@ -412,7 +419,7 @@ def gen_qc_match(prop_table, out):
412419
assert data in ('N', 'M')
413420
result = "No" if data == 'N' else "Maybe"
414421
if high:
415-
out.write(r" '\u{%s}'...'\u{%s}' => %s," % (low, high, result))
422+
out.write(r" '\u{%s}'..='\u{%s}' => %s," % (low, high, result))
416423
else:
417424
out.write(r" '\u{%s}' => %s," % (low, result))
418425
out.write("\n")
@@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
421428
out.write(" }\n")
422429

423430
def gen_nfc_qc(prop_tables, out):
424-
out.write("#[inline]\n")
431+
out.write("\n#[inline]\n")
425432
out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
426433
out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
427434
gen_qc_match(prop_tables['NFC_QC'], out)
@@ -450,7 +457,7 @@ def gen_nfkd_qc(prop_tables, out):
450457

451458
def gen_combining_mark(general_category_mark, out):
452459
gen_mph_data('combining_mark', general_category_mark, 'u32',
453-
lambda k: '0x{:04x}'.format(k))
460+
lambda k: '0x{:05X}'.format(k), 10)
454461

455462
def gen_public_assigned(general_category_public_assigned, out):
456463
# This could be done as a hash but the table is somewhat small.
@@ -464,17 +471,16 @@ def gen_public_assigned(general_category_public_assigned, out):
464471
out.write(" ")
465472
start = False
466473
else:
467-
out.write(" | ")
474+
out.write("\n | ")
468475
if first == last:
469-
out.write("'\\u{%s}'\n" % hexify(first))
476+
out.write("'\\u{%s}'" % hexify(first))
470477
else:
471-
out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
472-
out.write(" => true,\n")
478+
out.write("'\\u{%s}'..='\\u{%s}'" % (hexify(first), hexify(last)))
479+
out.write(" => true,\n")
473480

474481
out.write(" _ => false,\n")
475482
out.write(" }\n")
476483
out.write("}\n")
477-
out.write("\n")
478484

479485
def gen_stream_safe(leading, trailing, out):
480486
# This could be done as a hash but the table is very small.
@@ -488,10 +494,9 @@ def gen_stream_safe(leading, trailing, out):
488494
out.write(" _ => 0,\n")
489495
out.write(" }\n")
490496
out.write("}\n")
491-
out.write("\n")
492497

493498
gen_mph_data('trailing_nonstarters', trailing, 'u32',
494-
lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))
499+
lambda k: f"0x{int(trailing[k]) | (k << 8):07X}", 8)
495500

496501
def gen_tests(tests, out):
497502
out.write("""#[derive(Debug)]
@@ -579,43 +584,33 @@ def minimal_perfect_hash(d):
579584
data = UnicodeData()
580585
with open("tables.rs", "w", newline = "\n") as out:
581586
out.write(PREAMBLE)
582-
out.write("#![cfg_attr(rustfmt, rustfmt::skip)]\n")
583587
out.write("use crate::quick_check::IsNormalized;\n")
584588
out.write("use crate::quick_check::IsNormalized::*;\n")
585589
out.write("\n")
586590

587591
version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
588592
out.write("#[allow(unused)]\n")
589-
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)
593+
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n" % version)
590594

591595
gen_combining_class(data.combining_classes, out)
592-
out.write("\n")
593596

594597
gen_composition_table(data.canon_comp, out)
595-
out.write("\n")
596598

597599
gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)
598600

599601
gen_combining_mark(data.general_category_mark, out)
600-
out.write("\n")
601602

602603
gen_public_assigned(data.general_category_public_assigned, out)
603-
out.write("\n")
604604

605605
gen_nfc_qc(data.norm_props, out)
606-
out.write("\n")
607606

608607
gen_nfkc_qc(data.norm_props, out)
609-
out.write("\n")
610608

611609
gen_nfd_qc(data.norm_props, out)
612-
out.write("\n")
613610

614611
gen_nfkd_qc(data.norm_props, out)
615-
out.write("\n")
616612

617613
gen_stream_safe(data.ss_leading, data.ss_trailing, out)
618-
out.write("\n")
619614

620615
with open("normalization_tests.rs", "w", newline = "\n") as out:
621616
out.write(PREAMBLE)

src/lib.rs

-2
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@ mod quick_check;
7272
mod recompose;
7373
mod replace;
7474
mod stream_safe;
75-
76-
#[rustfmt::skip]
7775
mod tables;
7876

7977
#[doc(hidden)]

src/normalize.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ const L_LAST: u32 = L_BASE + L_COUNT - 1;
123123
const V_LAST: u32 = V_BASE + V_COUNT - 1;
124124
const T_LAST: u32 = T_BASE + T_COUNT - 1;
125125

126-
// Composition only occurs for `TPart`s in `U+11A8 ... U+11C2`,
127-
// i.e. `T_BASE + 1 ... T_LAST`.
126+
// Composition only occurs for `TPart`s in `U+11A8 ..= U+11C2`,
127+
// i.e. `T_BASE + 1 ..= T_LAST`.
128128
const T_FIRST: u32 = T_BASE + 1;
129129

130130
pub(crate) fn is_hangul_syllable(c: char) -> bool {
@@ -172,15 +172,15 @@ fn compose_hangul(a: char, b: char) -> Option<char> {
172172
let (a, b) = (a as u32, b as u32);
173173
match (a, b) {
174174
// Compose a leading consonant and a vowel together into an LV_Syllable
175-
(L_BASE...L_LAST, V_BASE...V_LAST) => {
175+
(L_BASE..=L_LAST, V_BASE..=V_LAST) => {
176176
let l_index = a - L_BASE;
177177
let v_index = b - V_BASE;
178178
let lv_index = l_index * N_COUNT + v_index * T_COUNT;
179179
let s = S_BASE + lv_index;
180180
Some(unsafe { char::from_u32_unchecked(s) })
181181
}
182182
// Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
183-
(S_BASE...S_LAST, T_FIRST...T_LAST) if (a - S_BASE) % T_COUNT == 0 => {
183+
(S_BASE..=S_LAST, T_FIRST..=T_LAST) if (a - S_BASE) % T_COUNT == 0 => {
184184
Some(unsafe { char::from_u32_unchecked(a + (b - T_BASE)) })
185185
}
186186
_ => None,
@@ -193,7 +193,7 @@ mod tests {
193193

194194
// Regression test from a bugfix where we were composing an LV_Syllable with
195195
// T_BASE directly. (We should only compose an LV_Syllable with a character
196-
// in the range `T_BASE + 1 ... T_LAST`.)
196+
// in the range `T_BASE + 1 ..= T_LAST`.)
197197
#[test]
198198
fn test_hangul_composition() {
199199
assert_eq!(compose_hangul('\u{c8e0}', '\u{11a7}'), None);

0 commit comments

Comments
 (0)