20
20
# out-of-line and check the tables.rs and normalization_tests.rs files into git.
21
21
import collections
22
22
import urllib .request
23
+ from itertools import batched
23
24
24
25
UNICODE_VERSION = "15.1.0"
25
26
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -354,20 +355,26 @@ def is_first_and_last(first, last):
354
355
return False
355
356
return first [1 :- 8 ] == last [1 :- 7 ]
356
357
357
- def gen_mph_data (name , d , kv_type , kv_callback ):
358
+ def gen_mph_data (name , d , kv_type , kv_callback , kv_row_width ):
358
359
(salt , keys ) = minimal_perfect_hash (d )
359
- out .write ("pub(crate) const %s_SALT: &[u16] = &[\n " % name .upper ())
360
- for s in salt :
361
- out .write (" 0x{:x},\n " .format (s ))
360
+ out .write (f"\n pub(crate) const { name .upper ()} _SALT: &[u16] = &[\n " )
361
+ for s_row in batched (salt , 13 ):
362
+ out .write (" " )
363
+ for s in s_row :
364
+ out .write (f" 0x{ s :03X} ," )
365
+ out .write ("\n " )
366
+ out .write ("];\n " )
367
+ out .write (f"pub(crate) const { name .upper ()} _KV: &[{ kv_type } ] = &[\n " )
368
+ for k_row in batched (keys , kv_row_width ):
369
+ out .write (" " )
370
+ for k in k_row :
371
+ out .write (f" { kv_callback (k )} ," )
372
+ out .write ("\n " )
362
373
out .write ("];\n " )
363
- out .write ("pub(crate) const {}_KV: &[{}] = &[\n " .format (name .upper (), kv_type ))
364
- for k in keys :
365
- out .write (" {},\n " .format (kv_callback (k )))
366
- out .write ("];\n \n " )
367
374
368
375
def gen_combining_class (combining_classes , out ):
369
376
gen_mph_data ('canonical_combining_class' , combining_classes , 'u32' ,
370
- lambda k : "0x{:X}" . format ( int (combining_classes [k ]) | (k << 8 )) )
377
+ lambda k : f "0x{ int (combining_classes [k ]) | (k << 8 ):07X } " , 8 )
371
378
372
379
def gen_composition_table (canon_comp , out ):
373
380
table = {}
@@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
376
383
table [(c1 << 16 ) | c2 ] = c3
377
384
(salt , keys ) = minimal_perfect_hash (table )
378
385
gen_mph_data ('COMPOSITION_TABLE' , table , '(u32, char)' ,
379
- lambda k : "(0x%s , '\\ u{%s} ')" % ( hexify ( k ), hexify ( table [ k ])) )
386
+ lambda k : f "(0x{ k :08X } , '\\ u{{ { table [ k ]:06X } }} ')", 1 )
380
387
381
388
out .write ("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n " )
382
389
out .write (" match (c1, c2) {\n " )
@@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
403
410
assert offset < 65536
404
411
out .write ("];\n " )
405
412
gen_mph_data (name + '_decomposed' , table , "(u32, (u16, u16))" ,
406
- lambda k : "(0x{:x }, ({}, {}))" . format ( k , offsets [k ], len (table [k ])) )
413
+ lambda k : f "(0x{ k :05X } , (0x { offsets [k ]:03X } , 0x { len (table [k ]):X } ))" , 1 )
407
414
408
415
def gen_qc_match (prop_table , out ):
409
416
out .write (" match c {\n " )
@@ -412,7 +419,7 @@ def gen_qc_match(prop_table, out):
412
419
assert data in ('N' , 'M' )
413
420
result = "No" if data == 'N' else "Maybe"
414
421
if high :
415
- out .write (r" '\u{%s}'... '\u{%s}' => %s," % (low , high , result ))
422
+ out .write (r" '\u{%s}'..= '\u{%s}' => %s," % (low , high , result ))
416
423
else :
417
424
out .write (r" '\u{%s}' => %s," % (low , result ))
418
425
out .write ("\n " )
@@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
421
428
out .write (" }\n " )
422
429
423
430
def gen_nfc_qc (prop_tables , out ):
424
- out .write ("#[inline]\n " )
431
+ out .write ("\n #[inline]\n " )
425
432
out .write ("#[allow(ellipsis_inclusive_range_patterns)]\n " )
426
433
out .write ("pub fn qc_nfc(c: char) -> IsNormalized {\n " )
427
434
gen_qc_match (prop_tables ['NFC_QC' ], out )
@@ -450,7 +457,7 @@ def gen_nfkd_qc(prop_tables, out):
450
457
451
458
def gen_combining_mark (general_category_mark , out ):
452
459
gen_mph_data ('combining_mark' , general_category_mark , 'u32' ,
453
- lambda k : '0x{:04x }' .format (k ))
460
+ lambda k : '0x{:05X }' .format (k ), 10 )
454
461
455
462
def gen_public_assigned (general_category_public_assigned , out ):
456
463
# This could be done as a hash but the table is somewhat small.
@@ -464,17 +471,16 @@ def gen_public_assigned(general_category_public_assigned, out):
464
471
out .write (" " )
465
472
start = False
466
473
else :
467
- out .write (" | " )
474
+ out .write ("\n | " )
468
475
if first == last :
469
- out .write ("'\\ u{%s}'\n " % hexify (first ))
476
+ out .write ("'\\ u{%s}'" % hexify (first ))
470
477
else :
471
- out .write ("'\\ u{%s}'..='\\ u{%s}'\n " % (hexify (first ), hexify (last )))
472
- out .write (" => true,\n " )
478
+ out .write ("'\\ u{%s}'..='\\ u{%s}'" % (hexify (first ), hexify (last )))
479
+ out .write (" => true,\n " )
473
480
474
481
out .write (" _ => false,\n " )
475
482
out .write (" }\n " )
476
483
out .write ("}\n " )
477
- out .write ("\n " )
478
484
479
485
def gen_stream_safe (leading , trailing , out ):
480
486
# This could be done as a hash but the table is very small.
@@ -488,10 +494,9 @@ def gen_stream_safe(leading, trailing, out):
488
494
out .write (" _ => 0,\n " )
489
495
out .write (" }\n " )
490
496
out .write ("}\n " )
491
- out .write ("\n " )
492
497
493
498
gen_mph_data ('trailing_nonstarters' , trailing , 'u32' ,
494
- lambda k : "0x{:X}" . format ( int (trailing [k ]) | (k << 8 )) )
499
+ lambda k : f "0x{ int (trailing [k ]) | (k << 8 ):07X } " , 8 )
495
500
496
501
def gen_tests (tests , out ):
497
502
out .write ("""#[derive(Debug)]
@@ -579,43 +584,33 @@ def minimal_perfect_hash(d):
579
584
data = UnicodeData ()
580
585
with open ("tables.rs" , "w" , newline = "\n " ) as out :
581
586
out .write (PREAMBLE )
582
- out .write ("#![cfg_attr(rustfmt, rustfmt::skip)]\n " )
583
587
out .write ("use crate::quick_check::IsNormalized;\n " )
584
588
out .write ("use crate::quick_check::IsNormalized::*;\n " )
585
589
out .write ("\n " )
586
590
587
591
version = "(%s, %s, %s)" % tuple (UNICODE_VERSION .split ("." ))
588
592
out .write ("#[allow(unused)]\n " )
589
- out .write ("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n \n " % version )
593
+ out .write ("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n " % version )
590
594
591
595
gen_combining_class (data .combining_classes , out )
592
- out .write ("\n " )
593
596
594
597
gen_composition_table (data .canon_comp , out )
595
- out .write ("\n " )
596
598
597
599
gen_decomposition_tables (data .canon_fully_decomp , data .compat_fully_decomp , data .cjk_compat_variants_fully_decomp , out )
598
600
599
601
gen_combining_mark (data .general_category_mark , out )
600
- out .write ("\n " )
601
602
602
603
gen_public_assigned (data .general_category_public_assigned , out )
603
- out .write ("\n " )
604
604
605
605
gen_nfc_qc (data .norm_props , out )
606
- out .write ("\n " )
607
606
608
607
gen_nfkc_qc (data .norm_props , out )
609
- out .write ("\n " )
610
608
611
609
gen_nfd_qc (data .norm_props , out )
612
- out .write ("\n " )
613
610
614
611
gen_nfkd_qc (data .norm_props , out )
615
- out .write ("\n " )
616
612
617
613
gen_stream_safe (data .ss_leading , data .ss_trailing , out )
618
- out .write ("\n " )
619
614
620
615
with open ("normalization_tests.rs" , "w" , newline = "\n " ) as out :
621
616
out .write (PREAMBLE )
0 commit comments