Skip to content

Commit f77d542

Browse files
authored
Merge pull request #22 from crazymerlyn/casefolding
Update CaseFolding.txt to Unicode 16.0.0
2 parents 96b9e42 + 6a43562 commit f77d542

File tree

4 files changed

+324
-7
lines changed

4 files changed

+324
-7
lines changed

.github/workflows/rust.yml

+2
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,7 @@ jobs:
2727
runs-on: ubuntu-latest
2828
steps:
2929
- uses: actions/checkout@v2
30+
- name: Verify CaseFolding.txt
31+
run: curl https://www.unicode.org/Public/16.0.0/ucd/CaseFolding.txt -o CaseFolding_online.txt && diff CaseFolding.txt CaseFolding_online.txt
3032
- name: Verify regenerated files
3133
run: cargo run --package print-table >case_folding_data.rs && diff case_folding_data.rs src/case_folding_data.rs

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "caseless"
3-
version = "0.2.1"
3+
version = "0.2.2"
44
authors = ["Simon Sapin <[email protected]>"]
55
description = "Unicode caseless matching"
66
repository = "https://github.com/unicode-rs/rust-caseless"

CaseFolding.txt

+164-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# CaseFolding-10.0.0.txt
2-
# Date: 2017-04-14, 05:40:18 GMT
3-
# © 2017 Unicode®, Inc.
1+
# CaseFolding-16.0.0.txt
2+
# Date: 2024-04-30, 21:48:11 GMT
3+
# © 2024 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
5-
# For terms of use, see http://www.unicode.org/terms_of_use.html
5+
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
77
# Unicode Character Database
8-
# For documentation, see http://www.unicode.org/reports/tr44/
8+
# For documentation, see https://www.unicode.org/reports/tr44/
99
#
1010
# Case Folding Properties
1111
#
@@ -603,6 +603,53 @@
603603
1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
604604
1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
605605
1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
606+
1C89; C; 1C8A; # CYRILLIC CAPITAL LETTER TJE
607+
1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
608+
1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
609+
1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
610+
1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON
611+
1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN
612+
1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN
613+
1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN
614+
1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN
615+
1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN
616+
1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN
617+
1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS
618+
1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN
619+
1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR
620+
1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON
621+
1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR
622+
1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR
623+
1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE
624+
1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN
625+
1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR
626+
1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN
627+
1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR
628+
1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR
629+
1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN
630+
1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR
631+
1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN
632+
1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN
633+
1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN
634+
1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL
635+
1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL
636+
1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR
637+
1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN
638+
1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN
639+
1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE
640+
1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE
641+
1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE
642+
1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE
643+
1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR
644+
1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE
645+
1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI
646+
1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN
647+
1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI
648+
1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN
649+
1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN
650+
1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN
651+
1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN
652+
1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
606653
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
607654
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
608655
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
@@ -883,6 +930,7 @@
883930
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
884931
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
885932
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
933+
1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
886934
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
887935
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
888936
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
@@ -891,6 +939,7 @@
891939
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
892940
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
893941
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
942+
1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
894943
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
895944
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
896945
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
@@ -1004,6 +1053,7 @@
10041053
2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
10051054
2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
10061055
2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
1056+
2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
10071057
2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
10081058
2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
10091059
2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
@@ -1180,6 +1230,25 @@ A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
11801230
A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
11811231
A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
11821232
A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
1233+
A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
1234+
A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
1235+
A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
1236+
A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
1237+
A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
1238+
A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
1239+
A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
1240+
A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
1241+
A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
1242+
A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
1243+
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
1244+
A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN
1245+
A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
1246+
A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
1247+
A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
1248+
A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
1249+
A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA
1250+
A7DC; C; 019B; # LATIN CAPITAL LETTER LAMBDA WITH STROKE
1251+
A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
11831252
AB70; C; 13A0; # CHEROKEE SMALL LETTER A
11841253
AB71; C; 13A1; # CHEROKEE SMALL LETTER E
11851254
AB72; C; 13A2; # CHEROKEE SMALL LETTER I
@@ -1266,6 +1335,7 @@ FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
12661335
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
12671336
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
12681337
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
1338+
FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T
12691339
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
12701340
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
12711341
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
@@ -1374,6 +1444,41 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
13741444
104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
13751445
104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
13761446
104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
1447+
10570; C; 10597; # VITHKUQI CAPITAL LETTER A
1448+
10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
1449+
10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
1450+
10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
1451+
10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
1452+
10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
1453+
10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
1454+
10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
1455+
10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
1456+
10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
1457+
1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
1458+
1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
1459+
1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
1460+
1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
1461+
1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
1462+
10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
1463+
10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
1464+
10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
1465+
10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
1466+
10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
1467+
10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
1468+
10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
1469+
10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
1470+
10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
1471+
10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
1472+
1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
1473+
1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
1474+
1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
1475+
1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
1476+
1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
1477+
10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
1478+
10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
1479+
10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
1480+
10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
1481+
10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
13771482
10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
13781483
10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
13791484
10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
@@ -1425,6 +1530,28 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
14251530
10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
14261531
10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
14271532
10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
1533+
10D50; C; 10D70; # GARAY CAPITAL LETTER A
1534+
10D51; C; 10D71; # GARAY CAPITAL LETTER CA
1535+
10D52; C; 10D72; # GARAY CAPITAL LETTER MA
1536+
10D53; C; 10D73; # GARAY CAPITAL LETTER KA
1537+
10D54; C; 10D74; # GARAY CAPITAL LETTER BA
1538+
10D55; C; 10D75; # GARAY CAPITAL LETTER JA
1539+
10D56; C; 10D76; # GARAY CAPITAL LETTER SA
1540+
10D57; C; 10D77; # GARAY CAPITAL LETTER WA
1541+
10D58; C; 10D78; # GARAY CAPITAL LETTER LA
1542+
10D59; C; 10D79; # GARAY CAPITAL LETTER GA
1543+
10D5A; C; 10D7A; # GARAY CAPITAL LETTER DA
1544+
10D5B; C; 10D7B; # GARAY CAPITAL LETTER XA
1545+
10D5C; C; 10D7C; # GARAY CAPITAL LETTER YA
1546+
10D5D; C; 10D7D; # GARAY CAPITAL LETTER TA
1547+
10D5E; C; 10D7E; # GARAY CAPITAL LETTER RA
1548+
10D5F; C; 10D7F; # GARAY CAPITAL LETTER NYA
1549+
10D60; C; 10D80; # GARAY CAPITAL LETTER FA
1550+
10D61; C; 10D81; # GARAY CAPITAL LETTER NA
1551+
10D62; C; 10D82; # GARAY CAPITAL LETTER PA
1552+
10D63; C; 10D83; # GARAY CAPITAL LETTER HA
1553+
10D64; C; 10D84; # GARAY CAPITAL LETTER OLD KA
1554+
10D65; C; 10D85; # GARAY CAPITAL LETTER OLD NA
14281555
118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
14291556
118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
14301557
118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
@@ -1457,6 +1584,38 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
14571584
118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
14581585
118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
14591586
118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
1587+
16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M
1588+
16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S
1589+
16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V
1590+
16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W
1591+
16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU
1592+
16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z
1593+
16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP
1594+
16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P
1595+
16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T
1596+
16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G
1597+
16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F
1598+
16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I
1599+
16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K
1600+
16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A
1601+
16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J
1602+
16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E
1603+
16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B
1604+
16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C
1605+
16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U
1606+
16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU
1607+
16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L
1608+
16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q
1609+
16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP
1610+
16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY
1611+
16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X
1612+
16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D
1613+
16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE
1614+
16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N
1615+
16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R
1616+
16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
1617+
16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
1618+
16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
14601619
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
14611620
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
14621621
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM

0 commit comments

Comments
 (0)