Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Codify and Resolve modifiers #46

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 10 additions & 23 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@ use std::path::Path;

type StrResult<T> = Result<T, String>;

/// A module of definitions.
struct Module<'a>(Vec<(&'a str, Binding<'a>)>);
include!("src/shared.rs");

declare_types!{
<'a>
str = &'a str,
List = Vec<_>
}

impl<'a> Module<'a> {
fn new(mut list: Vec<(&'a str, Binding<'a>)>) -> Self {
Expand All @@ -14,24 +19,6 @@ impl<'a> Module<'a> {
}
}

/// A definition bound in a module, with metadata.
struct Binding<'a> {
def: Def<'a>,
deprecation: Option<&'a str>,
}

/// A definition in a module.
enum Def<'a> {
Symbol(Symbol<'a>),
Module(Module<'a>),
}

/// A symbol, either a leaf or with modifiers.
enum Symbol<'a> {
Single(char),
Multi(Vec<(&'a str, char)>),
}

/// A single line during parsing.
#[derive(Debug, Copy, Clone)]
enum Line<'a> {
Expand All @@ -40,7 +27,7 @@ enum Line<'a> {
ModuleStart(&'a str),
ModuleEnd,
Symbol(&'a str, Option<char>),
Variant(&'a str, char),
Variant(ModifierSet<&'a str>, char),
}

fn main() {
Expand Down Expand Up @@ -110,7 +97,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
validate_ident(part)?;
}
let c = decode_char(tail.ok_or("missing char")?)?;
Line::Variant(rest, c)
Line::Variant(ModifierSet(rest), c)
} else {
validate_ident(head)?;
let c = tail.map(decode_char).transpose()?;
Expand Down Expand Up @@ -167,7 +154,7 @@ fn parse<'a>(

let symbol = if variants.len() > 0 {
if let Some(c) = c {
variants.insert(0, ("", c));
variants.insert(0, (ModifierSet::empty(), c));
}
Symbol::Multi(variants)
} else {
Expand Down
80 changes: 50 additions & 30 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
/*!
Human-friendly notation for Unicode symbols.

## Model
A [`Symbol`] is a collection of one or more _variants_.
Each variant is identified by a set of _modifiers_ (see [`ModifierSet`])
and has a single character as its value.
The modifiers themselves can in principle be any non-empty strings
that don't contain the character `.`, but codex only defines
ones that are entirely made of ASCII alphabetical characters.
*/

/// A module of definitions.
#[derive(Debug, Copy, Clone)]
pub struct Module(&'static [(&'static str, Binding)]);
include!("shared.rs");

type StaticSlice<T> = &'static [T];
declare_types! {
derive(Debug, Copy, Clone),
str = &'static str,
List = StaticSlice<_>
}

impl Module {
/// Try to get a bound definition in the module.
Expand All @@ -21,38 +34,45 @@ impl Module {
}
}

/// A definition bound in a module, with metadata.
#[derive(Debug, Copy, Clone)]
pub struct Binding {
/// The bound definition.
pub def: Def,
/// A deprecation message for the definition, if it is deprecated.
pub deprecation: Option<&'static str>,
impl<'a> ModifierSet<&'a str> {
/// Iterate over the list of modifiers with the original lifetime.
pub fn to_iter(self) -> impl Iterator<Item = &'a str> {
self.0.split('.').filter(|s| !s.is_empty())
}
}

impl Binding {
/// Create a new bound definition.
pub const fn new(definition: Def) -> Self {
Self { def: definition, deprecation: None }
impl Symbol {
/// Get the symbol's character for a given set of modifiers.
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
match self {
Self::Single(c) => modifs.is_empty().then_some(*c),
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
}
}
}

/// A definition in a module.
#[derive(Debug, Copy, Clone)]
pub enum Def {
/// A symbol, potentially with modifiers.
Symbol(Symbol),
/// A nested module.
Module(Module),
}
/// The characters that are covered by this symbol.
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
enum Variants {
Single(std::iter::Once<char>),
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
}
let mut iter = match self {
Self::Single(c) => Variants::Single(std::iter::once(*c)),
Self::Multi(sl) => Variants::Multi(sl.iter()),
};
std::iter::from_fn(move || match &mut iter {
Variants::Single(iter) => Some((ModifierSet::empty(), iter.next()?)),
Variants::Multi(iter) => iter.next().copied(),
})
}

/// A symbol, either a leaf or with modifiers.
#[derive(Debug, Copy, Clone)]
pub enum Symbol {
/// A symbol without modifiers.
Single(char),
/// A symbol with named modifiers. The symbol defaults to its first variant.
Multi(&'static [(&'static str, char)]),
/// Possible modifiers for this symbol.
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
self.variants()
.flat_map(|(m, _)| m.to_iter())
.collect::<std::collections::BTreeSet<_>>()
.into_iter()
}
}

/// A module that contains the other top-level modules.
Expand Down
148 changes: 148 additions & 0 deletions src/shared.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
use std::ops::{AddAssign, Deref};

macro_rules! declare_types {
($(<$lt:lifetime>)?
$(derive($($Der:ident),*),)?
str = $s:ty,
List = $List:ident<_>
) => {
/// A module of definitions.
$(#[derive($($Der),*)])?
pub struct Module<$($lt)?>($List<($s, Binding<$($lt)?>)>);

/// A definition bound in a module, with metadata.
$(#[derive($($Der),*)])?
pub struct Binding<$($lt)?> {
/// The bound definition.
pub def: Def<$($lt)?>,
/// A deprecation message for the definition, if it is deprecated.
pub deprecation: Option<$s>,
}

impl<$($lt)?> Binding<$($lt)?> {
/// Create a new bound definition.
pub const fn new(definition: Def<$($lt)?>) -> Self {
Self { def: definition, deprecation: None }
}
}

/// A definition in a module.
$(#[derive($($Der),*)])?
pub enum Def<$($lt)?> {
/// A symbol, potentially with modifiers.
Symbol(Symbol<$($lt)?>),
/// A nested module.
Module(Module<$($lt)?>),
}

/// A symbol, either a leaf or with modifiers.
$(#[derive($($Der),*)])?
pub enum Symbol<$($lt)?> {
/// A symbol without modifiers.
Single(char),
/// A symbol with named modifiers.
/// The symbol defaults to its first variant.
Multi($List<(ModifierSet<$s>, char)>),
}
};
}

/// A set of modifiers.
#[derive(Debug, Copy, Clone)]
pub struct ModifierSet<S>(S);

impl<S: Deref<Target = str>> ModifierSet<S> {
/// Convert the underlying string to a slice.
pub fn as_deref(&self) -> ModifierSet<&str> {
ModifierSet(&self.0)
}

/// Construct a modifier set from a string,
/// where modifiers are separated by the character `.`.
///
/// It is not unsafe to use this function wrongly, but it can produce
/// unexpected results down the line. Correct usage should ensure that
/// `s` does not contain any empty modifiers (i.e. the sequence `..`)
/// and that no modifier occurs twice.
pub fn new_unchecked(s: S) -> Self {
Self(s)
}

/// Construct an empty modifier set.
pub fn empty() -> Self
where
S: Default,
{
Self(S::default())
}

/// Whether `self` is empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

/// Add a modifier to the set, without checking that it is a valid modifier.
///
/// It is not unsafe to use this method wrongly, but that can produce
/// unexpected results down the line. Correct usage should ensure that
/// `modifier` is not empty and doesn't contain the character `.`.
pub fn add_unchecked(&mut self, m: &str)
where
S: for<'a> AddAssign<&'a str>,
{
if !self.0.is_empty() {
self.0 += ".";
}
self.0 += m;
}

/// Iterate over the list of modifiers in an arbitrary order.
pub fn iter(&self) -> impl Iterator<Item = &str> {
self.0.split('.').filter(|s| !s.is_empty())
}

/// Whether the set contains the modifier `m`.
pub fn contains(&self, m: &str) -> bool {
self.iter().any(|lhs| lhs == m)
}

/// Whether all modifiers in `self` are also present in `other`.
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
self.iter().all(|m| other.contains(m))
}

/// Find the best match from the list.
///
/// To be considered a match, the modifier set must be a superset of
/// (or equal to) `self`. Among different matches, the best one is selected
/// by the following two criteria (in order):
/// 1. Number of modifiers in common with `self` (more is better).
/// 2. Total number of modifiers (fewer is better).
pub fn best_match_in<'a, T>(
&self,
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
) -> Option<T> {
let mut best = None;
let mut best_score = None;

// Find the best table entry with this name.
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
let mut matching = 0;
let mut total = 0;
for modifier in candidate.0.iter() {
if self.contains(modifier) {
matching += 1;
}
total += 1;
}

let score = (matching, core::cmp::Reverse(total));
if best_score.map_or(true, |b| score > b) {
best = Some(candidate.1);
best_score = Some(score);
}
}

best
}
}