Skip to content

Commit

Permalink
Add new API
Browse files Browse the repository at this point in the history
  • Loading branch information
T0mstone committed Feb 3, 2025
1 parent a3a16a7 commit 536dc49
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 7 deletions.
6 changes: 3 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ enum Line<'a> {
ModuleStart(&'a str),
ModuleEnd,
Symbol(&'a str, Option<char>),
Variant(&'a str, char),
Variant(ModifierSet<&'a str>, char),
}

fn main() {
Expand Down Expand Up @@ -97,7 +97,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
validate_ident(part)?;
}
let c = decode_char(tail.ok_or("missing char")?)?;
Line::Variant(rest, c)
Line::Variant(ModifierSet(rest), c)
} else {
validate_ident(head)?;
let c = tail.map(decode_char).transpose()?;
Expand Down Expand Up @@ -154,7 +154,7 @@ fn parse<'a>(

let symbol = if variants.len() > 0 {
if let Some(c) = c {
variants.insert(0, ("", c));
variants.insert(0, (ModifierSet::empty(), c));
}
Symbol::Multi(variants)
} else {
Expand Down
49 changes: 49 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
/*!
Human-friendly notation for Unicode symbols.
## Model
A [`Symbol`] is a collection of one or more _variants_.
Each variant is identified by a set of _modifiers_ (see [`ModifierSet`])
and has a single character as its value.
The modifiers themselves can in principle be any non-empty strings
that don't contain the character `.`, but codex only defines
ones that are entirely made of ASCII alphabetical characters.
*/

include!("shared.rs");
Expand All @@ -26,6 +34,47 @@ impl Module {
}
}

impl<'a> ModifierSet<&'a str> {
/// Iterate over the list of modifiers with the original lifetime.
pub fn to_iter(self) -> impl Iterator<Item = &'a str> {
self.0.split('.').filter(|s| !s.is_empty())
}
}

impl Symbol {
/// Get the symbol's character for a given set of modifiers.
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
match self {
Self::Single(c) => modifs.is_empty().then_some(*c),
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
}
}

/// The characters that are covered by this symbol.
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
enum Variants {
Single(std::iter::Once<char>),
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
}
let mut iter = match self {
Self::Single(c) => Variants::Single(std::iter::once(*c)),
Self::Multi(sl) => Variants::Multi(sl.iter()),
};
std::iter::from_fn(move || match &mut iter {
Variants::Single(iter) => Some((ModifierSet::empty(), iter.next()?)),
Variants::Multi(iter) => iter.next().copied(),
})
}

/// Possible modifiers for this symbol.
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
self.variants()
.flat_map(|(m, _)| m.to_iter())
.collect::<std::collections::BTreeSet<_>>()
.into_iter()
}
}

/// A module that contains the other top-level modules.
pub const ROOT: Module = Module(&[
("emoji", Binding::new(Def::Module(EMOJI))),
Expand Down
111 changes: 107 additions & 4 deletions src/shared.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use std::ops::{AddAssign, Deref};

macro_rules! declare_types {
($(<$lt:lifetime>)?
$(derive($($Der:ident),*),)?
str = $s:ty,
List = $L:ident<_>
List = $List:ident<_>
) => {
/// A module of definitions.
$(#[derive($($Der),*)])?
pub struct Module<$($lt)?>($L<($s, Binding<$($lt)?>)>);
pub struct Module<$($lt)?>($List<($s, Binding<$($lt)?>)>);

/// A definition bound in a module, with metadata.
$(#[derive($($Der),*)])?
Expand Down Expand Up @@ -38,8 +40,109 @@ macro_rules! declare_types {
pub enum Symbol<$($lt)?> {
/// A symbol without modifiers.
Single(char),
/// A symbol with named modifiers. The symbol defaults to its first variant.
Multi($L<($s, char)>),
/// A symbol with named modifiers.
/// The symbol defaults to its first variant.
Multi($List<(ModifierSet<$s>, char)>),
}
};
}

/// A set of modifiers.
#[derive(Debug, Copy, Clone)]
pub struct ModifierSet<S>(S);

impl<S: Deref<Target = str>> ModifierSet<S> {
/// Convert the underlying string to a slice.
pub fn as_deref(&self) -> ModifierSet<&str> {
ModifierSet(&self.0)
}

/// Construct a modifier set from a string,
/// where modifiers are separated by the character `.`.
///
/// It is not unsafe to use this function wrongly, but it can produce
/// unexpected results down the line. Correct usage should ensure that
/// `s` does not contain any empty modifiers (i.e. the sequence `..`)
/// and that no modifier occurs twice.
pub fn new_unchecked(s: S) -> Self {
Self(s)
}

/// Construct an empty modifier set.
pub fn empty() -> Self
where
S: Default,
{
Self(S::default())
}

/// Whether `self` is empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

/// Add a modifier to the set, without checking that it is a valid modifier.
///
/// It is not unsafe to use this method wrongly, but that can produce
/// unexpected results down the line. Correct usage should ensure that
/// `modifier` is not empty and doesn't contain the character `.`.
pub fn add_unchecked(&mut self, m: &str)
where
S: for<'a> AddAssign<&'a str>,
{
if !self.0.is_empty() {
self.0 += ".";
}
self.0 += m;
}

/// Iterate over the list of modifiers in an arbitrary order.
pub fn iter(&self) -> impl Iterator<Item = &str> {
self.0.split('.').filter(|s| !s.is_empty())
}

/// Whether the set contains the modifier `m`.
pub fn contains(&self, m: &str) -> bool {
self.iter().any(|lhs| lhs == m)
}

/// Whether all modifiers in `self` are also present in `other`.
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
self.iter().all(|m| other.contains(m))
}

/// Find the best match from the list.
///
/// To be considered a match, the modifier set must be a superset of
/// (or equal to) `self`. Among different matches, the best one is selected
/// by the following two criteria (in order):
/// 1. Number of modifiers in common with `self` (more is better).
/// 2. Total number of modifiers (fewer is better).
pub fn best_match_in<'a, T>(
&self,
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
) -> Option<T> {
let mut best = None;
let mut best_score = None;

// Find the best table entry with this name.
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
let mut matching = 0;
let mut total = 0;
for modifier in candidate.0.iter() {
if self.contains(modifier) {
matching += 1;
}
total += 1;
}

let score = (matching, core::cmp::Reverse(total));
if best_score.map_or(true, |b| score > b) {
best = Some(candidate.1);
best_score = Some(score);
}
}

best
}
}

0 comments on commit 536dc49

Please sign in to comment.