rust-lang
diff --git a/‎library/core/src/unicode/mod.rs‎
Lines changed: 1 addition & 1 deletion b/‎library/core/src/unicode/mod.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎library/core/src/unicode/unicode_data.rs‎
Lines changed: 385 additions & 822 deletions b/‎library/core/src/unicode/unicode_data.rs‎
Lines changed: 385 additions & 822 deletions
diff --git a/‎library/coretests/tests/lib.rs‎
Lines changed: 1 addition & 0 deletions b/‎library/coretests/tests/lib.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎library/coretests/tests/unicode.rs‎
Lines changed: 97 additions & 0 deletions b/‎library/coretests/tests/unicode.rs‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎library/coretests/tests/unicode/test_data.rs‎
Lines changed: 2902 additions & 0 deletions b/‎library/coretests/tests/unicode/test_data.rs‎
Lines changed: 2902 additions & 0 deletions
diff --git a/‎src/bootstrap/src/core/build_steps/run.rs‎
Lines changed: 4 additions & 0 deletions b/‎src/bootstrap/src/core/build_steps/run.rs‎
Lines changed: 4 additions & 0 deletions
@@ -19,7 +19,7 @@ pub(crate) use unicode_data::white_space::lookup as White_Space;
 pub(crate) mod printable;
 
 #[allow(unreachable_pub)]
-mod unicode_data;
+pub mod unicode_data;
 
 /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
 /// `char` and `str` methods are based on.
 
@@ -120,6 +120,7 @@
 #![feature(uint_bit_width)]
 #![feature(uint_carryless_mul)]
 #![feature(uint_gather_scatter_bits)]
+#![feature(unicode_internals)]
 #![feature(unsize)]
 #![feature(unwrap_infallible)]
 #![feature(widening_mul)]
 
@@ -1,5 +1,102 @@
+use core::unicode::unicode_data;
+use std::ops::RangeInclusive;
+
+mod test_data;
+
 #[test]
 pub fn version() {
     let (major, _minor, _update) = core::char::UNICODE_VERSION;
     assert!(major >= 10);
 }
+
+#[track_caller]
+fn test_boolean_property(ranges: &[RangeInclusive<char>], lookup: fn(char) -> bool) {
+    let mut start = '\u{80}';
+    for range in ranges {
+        for c in start..*range.start() {
+            assert!(!lookup(c), "{c:?}");
+        }
+        for c in range.clone() {
+            assert!(lookup(c), "{c:?}");
+        }
+        start = char::from_u32(*range.end() as u32 + 1).unwrap();
+    }
+    for c in start..=char::MAX {
+        assert!(!lookup(c), "{c:?}");
+    }
+}
+
+#[track_caller]
+fn test_case_mapping(ranges: &[(char, [char; 3])], lookup: fn(char) -> [char; 3]) {
+    let mut start = '\u{80}';
+    for &(key, val) in ranges {
+        for c in start..key {
+            assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}");
+        }
+        assert_eq!(lookup(key), val, "{key:?}");
+        start = char::from_u32(key as u32 + 1).unwrap();
+    }
+    for c in start..=char::MAX {
+        assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}");
+    }
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn alphabetic() {
+    test_boolean_property(test_data::ALPHABETIC, unicode_data::alphabetic::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn case_ignorable() {
+    test_boolean_property(test_data::CASE_IGNORABLE, unicode_data::case_ignorable::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn cased() {
+    test_boolean_property(test_data::CASED, unicode_data::cased::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn grapheme_extend() {
+    test_boolean_property(test_data::GRAPHEME_EXTEND, unicode_data::grapheme_extend::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn lowercase() {
+    test_boolean_property(test_data::LOWERCASE, unicode_data::lowercase::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn n() {
+    test_boolean_property(test_data::N, unicode_data::n::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn uppercase() {
+    test_boolean_property(test_data::UPPERCASE, unicode_data::uppercase::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn white_space() {
+    test_boolean_property(test_data::WHITE_SPACE, unicode_data::white_space::lookup);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn to_lowercase() {
+    test_case_mapping(test_data::TO_LOWER, unicode_data::conversions::to_lower);
+}
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn to_uppercase() {
+    test_case_mapping(test_data::TO_UPPER, unicode_data::conversions::to_upper);
+}
@@ -358,6 +358,8 @@ impl Step for GenerateCompletions {
     }
 }
 
+/// The build step for generating the tables in `core/src/char/unicode/unicode_data.rs`
+/// and the tests in `library/coretests/tests/unicode/test_data.rs`.
 #[derive(Debug, Clone, Hash, PartialEq, Eq)]
 pub struct UnicodeTableGenerator;
 
@@ -375,7 +377,9 @@ impl Step for UnicodeTableGenerator {
 
     fn run(self, builder: &Builder<'_>) {
         let mut cmd = builder.tool_cmd(Tool::UnicodeTableGenerator);
+        // Generated files that are checked into git:
         cmd.arg(builder.src.join("library/core/src/unicode/unicode_data.rs"));
+        cmd.arg(builder.src.join("library/coretests/tests/unicode/test_data.rs"));
         cmd.run(builder);
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -358,6 +358,8 @@ impl Step for GenerateCompletions {`
`358`	`358`	`}`
`359`	`359`	`}`
`360`	`360`
	`361`	+/// The build step for generating the tables in `core/src/char/unicode/unicode_data.rs`
	`362`	+/// and the tests in `library/coretests/tests/unicode/test_data.rs`.
`361`	`363`	`#[derive(Debug, Clone, Hash, PartialEq, Eq)]`
`362`	`364`	`pub struct UnicodeTableGenerator;`
`363`	`365`
`@@ -375,7 +377,9 @@ impl Step for UnicodeTableGenerator {`
`375`	`377`
`376`	`378`	`fn run(self, builder: &Builder<'_>) {`
`377`	`379`	`let mut cmd = builder.tool_cmd(Tool::UnicodeTableGenerator);`
	`380`	`+ // Generated files that are checked into git:`
`378`	`381`	`cmd.arg(builder.src.join("library/core/src/unicode/unicode_data.rs"));`
	`382`	`+ cmd.arg(builder.src.join("library/coretests/tests/unicode/test_data.rs"));`
`379`	`383`	`cmd.run(builder);`
`380`	`384`	`}`
`381`	`385`	`}`