Skip to content

Commit 2081c29

Browse files
authored
Merge pull request #136 from Jules-Bertholet/std-tables
Use stdlib alphabetic and numeric character tables
2 parents 592ce00 + e96ec2e commit 2081c29

File tree

2 files changed

+38
-16
lines changed

2 files changed

+38
-16
lines changed

scripts/unicode.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,27 @@ def emit_util_mod(f):
232232
233233
#[inline]
234234
fn is_alphabetic(c: char) -> bool {
235-
match c {
236-
'a' ..= 'z' | 'A' ..= 'Z' => true,
237-
c if c > '\x7f' => super::derived_property::Alphabetic(c),
238-
_ => false,
235+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
236+
c.is_alphabetic()
237+
} else {
238+
match c {
239+
'a' ..= 'z' | 'A' ..= 'Z' => true,
240+
c if c > '\\x7f' => super::derived_property::Alphabetic(c),
241+
_ => false,
242+
}
239243
}
240244
}
241245
242246
#[inline]
243247
fn is_numeric(c: char) -> bool {
244-
match c {
245-
'0' ..= '9' => true,
246-
c if c > '\x7f' => super::general_category::N(c),
247-
_ => false,
248+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
249+
c.is_numeric()
250+
} else {
251+
match c {
252+
'0' ..= '9' => true,
253+
c if c > '\\x7f' => super::general_category::N(c),
254+
_ => false,
255+
}
248256
}
249257
}
250258
@@ -388,6 +396,10 @@ def emit_break_module(f, break_table, break_cats, name):
388396
/// The version of [Unicode](http://www.unicode.org/)
389397
/// that this version of unicode-segmentation is based on.
390398
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
399+
""" % UNICODE_VERSION)
400+
401+
rf.write("""
402+
const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);
391403
""" % UNICODE_VERSION)
392404

393405
# download and parse all the data

src/tables.rs

+18-8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
/// that this version of unicode-segmentation is based on.
1717
pub const UNICODE_VERSION: (u64, u64, u64) = (15, 1, 0);
1818

19+
const UNICODE_VERSION_U8: (u8, u8, u8) = (15, 1, 0);
20+
1921
pub mod util {
2022
#[inline]
2123
pub fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
@@ -29,19 +31,27 @@ pub mod util {
2931

3032
#[inline]
3133
fn is_alphabetic(c: char) -> bool {
32-
match c {
33-
'a' ..= 'z' | 'A' ..= 'Z' => true,
34-
c if c > '' => super::derived_property::Alphabetic(c),
35-
_ => false,
34+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
35+
c.is_alphabetic()
36+
} else {
37+
match c {
38+
'a' ..= 'z' | 'A' ..= 'Z' => true,
39+
c if c > '\x7f' => super::derived_property::Alphabetic(c),
40+
_ => false,
41+
}
3642
}
3743
}
3844

3945
#[inline]
4046
fn is_numeric(c: char) -> bool {
41-
match c {
42-
'0' ..= '9' => true,
43-
c if c > '' => super::general_category::N(c),
44-
_ => false,
47+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
48+
c.is_numeric()
49+
} else {
50+
match c {
51+
'0' ..= '9' => true,
52+
c if c > '\x7f' => super::general_category::N(c),
53+
_ => false,
54+
}
4555
}
4656
}
4757

0 commit comments

Comments
 (0)