diff --git a/build.rs b/build.rs index f4f1e89..635307c 100644 --- a/build.rs +++ b/build.rs @@ -27,7 +27,7 @@ enum Line<'a> { ModuleStart(&'a str), ModuleEnd, Symbol(&'a str, Option), - Variant(&'a str, char), + Variant(ModifierSet<&'a str>, char), } fn main() { @@ -97,7 +97,7 @@ fn tokenize(line: &str) -> StrResult { validate_ident(part)?; } let c = decode_char(tail.ok_or("missing char")?)?; - Line::Variant(rest, c) + Line::Variant(ModifierSet(rest), c) } else { validate_ident(head)?; let c = tail.map(decode_char).transpose()?; @@ -154,7 +154,7 @@ fn parse<'a>( let symbol = if variants.len() > 0 { if let Some(c) = c { - variants.insert(0, ("", c)); + variants.insert(0, (ModifierSet::empty(), c)); } Symbol::Multi(variants) } else { diff --git a/src/lib.rs b/src/lib.rs index 0787444..4d208f3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,13 @@ /*! Human-friendly notation for Unicode symbols. + +## Model +A [`Symbol`] is a collection of one or more _variants_. +Each variant is identified by a set of _modifiers_ (see [`ModifierSet`]) +and has a single character as its value. +The modifiers themselves can in principle be any non-empty strings +that don't contain the character `.`, but codex only defines +ones that are entirely made of ASCII alphabetical characters. */ include!("shared.rs"); @@ -26,6 +34,47 @@ impl Module { } } +impl<'a> ModifierSet<&'a str> { + /// Iterate over the list of modifiers with the original lifetime. + pub fn to_iter(self) -> impl Iterator { + self.0.split('.').filter(|s| !s.is_empty()) + } +} + +impl Symbol { + /// Get the symbol's character for a given set of modifiers. + pub fn get(&self, modifs: ModifierSet<&str>) -> Option { + match self { + Self::Single(c) => modifs.is_empty().then_some(*c), + Self::Multi(list) => modifs.best_match_in(list.iter().copied()), + } + } + + /// The characters that are covered by this symbol. + pub fn variants(&self) -> impl Iterator, char)> { + enum Variants { + Single(std::iter::Once), + Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>), + } + let mut iter = match self { + Self::Single(c) => Variants::Single(std::iter::once(*c)), + Self::Multi(sl) => Variants::Multi(sl.iter()), + }; + std::iter::from_fn(move || match &mut iter { + Variants::Single(iter) => Some((ModifierSet::empty(), iter.next()?)), + Variants::Multi(iter) => iter.next().copied(), + }) + } + + /// Possible modifiers for this symbol. + pub fn modifiers(&self) -> impl Iterator + '_ { + self.variants() + .flat_map(|(m, _)| m.to_iter()) + .collect::>() + .into_iter() + } +} + /// A module that contains the other top-level modules. pub const ROOT: Module = Module(&[ ("emoji", Binding::new(Def::Module(EMOJI))), diff --git a/src/shared.rs b/src/shared.rs index f4327c6..9407aab 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -1,12 +1,14 @@ +use std::ops::{AddAssign, Deref}; + macro_rules! declare_types { ($(<$lt:lifetime>)? $(derive($($Der:ident),*),)? str = $s:ty, - List = $L:ident<_> + List = $List:ident<_> ) => { /// A module of definitions. $(#[derive($($Der),*)])? - pub struct Module<$($lt)?>($L<($s, Binding<$($lt)?>)>); + pub struct Module<$($lt)?>($List<($s, Binding<$($lt)?>)>); /// A definition bound in a module, with metadata. $(#[derive($($Der),*)])? @@ -38,8 +40,109 @@ macro_rules! declare_types { pub enum Symbol<$($lt)?> { /// A symbol without modifiers. Single(char), - /// A symbol with named modifiers. The symbol defaults to its first variant. - Multi($L<($s, char)>), + /// A symbol with named modifiers. + /// The symbol defaults to its first variant. + Multi($List<(ModifierSet<$s>, char)>), } }; } + +/// A set of modifiers. +#[derive(Debug, Copy, Clone)] +pub struct ModifierSet(S); + +impl> ModifierSet { + /// Convert the underlying string to a slice. + pub fn as_deref(&self) -> ModifierSet<&str> { + ModifierSet(&self.0) + } + + /// Construct a modifier set from a string, + /// where modifiers are separated by the character `.`. + /// + /// It is not unsafe to use this function wrongly, but it can produce + /// unexpected results down the line. Correct usage should ensure that + /// `s` does not contain any empty modifiers (i.e. the sequence `..`) + /// and that no modifier occurs twice. + pub fn new_unchecked(s: S) -> Self { + Self(s) + } + + /// Construct an empty modifier set. + pub fn empty() -> Self + where + S: Default, + { + Self(S::default()) + } + + /// Whether `self` is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Add a modifier to the set, without checking that it is a valid modifier. + /// + /// It is not unsafe to use this method wrongly, but that can produce + /// unexpected results down the line. Correct usage should ensure that + /// `modifier` is not empty and doesn't contain the character `.`. + pub fn add_unchecked(&mut self, m: &str) + where + S: for<'a> AddAssign<&'a str>, + { + if !self.0.is_empty() { + self.0 += "."; + } + self.0 += m; + } + + /// Iterate over the list of modifiers in an arbitrary order. + pub fn iter(&self) -> impl Iterator { + self.0.split('.').filter(|s| !s.is_empty()) + } + + /// Whether the set contains the modifier `m`. + pub fn contains(&self, m: &str) -> bool { + self.iter().any(|lhs| lhs == m) + } + + /// Whether all modifiers in `self` are also present in `other`. + pub fn is_subset(&self, other: ModifierSet<&str>) -> bool { + self.iter().all(|m| other.contains(m)) + } + + /// Find the best match from the list. + /// + /// To be considered a match, the modifier set must be a superset of + /// (or equal to) `self`. Among different matches, the best one is selected + /// by the following two criteria (in order): + /// 1. Number of modifiers in common with `self` (more is better). + /// 2. Total number of modifiers (fewer is better). + pub fn best_match_in<'a, T>( + &self, + variants: impl Iterator, T)>, + ) -> Option { + let mut best = None; + let mut best_score = None; + + // Find the best table entry with this name. + for candidate in variants.filter(|(set, _)| self.is_subset(*set)) { + let mut matching = 0; + let mut total = 0; + for modifier in candidate.0.iter() { + if self.contains(modifier) { + matching += 1; + } + total += 1; + } + + let score = (matching, core::cmp::Reverse(total)); + if best_score.map_or(true, |b| score > b) { + best = Some(candidate.1); + best_score = Some(score); + } + } + + best + } +}