Skip to content

Commit ecd2681

Browse files
committed
Add test for standardized variation sequences
1 parent 035b5fc commit ecd2681

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

build.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ fn main() {
1919
"emoji/emoji-variation-sequences.txt",
2020
out_path.join("emoji-variation-sequences.txt"),
2121
);
22+
unicode_data::download_file(
23+
UNICODE_VERSION,
24+
"StandardizedVariants.txt",
25+
out_path.join("StandardizedVariants.txt"),
26+
)
2227
}
2328
}
2429

src/lib.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,51 @@ mod test {
213213
.collect()
214214
}
215215

216+
/// Returns the set of standardized variation sequences defined by Unicode.
217+
///
218+
/// This does not include emoji variation sequences (also known as
219+
/// "presentation sequences").
220+
#[cfg(feature = "_test-unicode-conformance")]
221+
fn get_valid_standardized_variation_sequences() -> HashSet<String> {
222+
read_sequences(include_str!(concat!(
223+
env!("OUT_DIR"),
224+
"/StandardizedVariants.txt",
225+
)))
226+
}
227+
228+
/// Tests whether a string is a standardized variation sequence.
229+
///
230+
/// This does not include emoji variation sequences (i.e., presentation
231+
/// sequences). Use [`is_presentation_sequence`] to test whether a string is
232+
/// a presentation sequence.
233+
fn is_standardized_variation_sequence(s: &str) -> bool {
234+
// Non-specific variation selectors from
235+
// https://unicode.org/charts/PDF/UFE00.pdf.
236+
(0xFE00..=0xFE0D)
237+
.map(|cp| char::from_u32(cp).unwrap())
238+
.any(|vs| s.contains(vs))
239+
}
240+
241+
/// Tests that no standardized variation sequence is invalid.
242+
///
243+
/// The validity of emoji variation sequences (i.e., presentation sequences)
244+
/// is tested by [`no_invalid_presentation_sequence`].
245+
#[cfg(feature = "_test-unicode-conformance")]
246+
#[test]
247+
fn no_invalid_standardized_variation_sequence() {
248+
let sequences = get_valid_standardized_variation_sequences();
249+
assert!(
250+
are_all_variants_valid(ROOT, |c| {
251+
if is_standardized_variation_sequence(c) {
252+
sequences.contains(c)
253+
} else {
254+
true
255+
}
256+
}),
257+
"invalid standardized variation sequence(s) (see list above)",
258+
)
259+
}
260+
216261
/// https://www.unicode.org/reports/tr51/#def_text_presentation_selector.
217262
const TEXT_PRESENTATION_SELECTOR: char = '\u{FE0E}';
218263
/// https://www.unicode.org/reports/tr51/#def_emoji_presentation_selector.

0 commit comments

Comments
 (0)