Merge pull request #7 from life4/no-std

no_std support
life4 · Sep 11, 2024 · 813bb70 · 813bb70
2 parents bc80088 + afdf1e8
commit 813bb70
Show file tree

Hide file tree

Showing 31 changed files with 479 additions and 535 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,30 +1,29 @@
 [package]
 name = "textdistance"
-version = "1.0.2"
+version = "1.1.0"
 edition = "2021"
 authors = ["Gram <[email protected]>"]
 description = "Lots of algorithms to compare how similar two sequences are"
 repository = "https://github.com/life4/textdistance.rs"
 license = "MIT"
-keywords = [
-    "jaro",
-    "hamming",
-    "levenshtein",
-    "similarity",
-    "distance",
-]
+keywords = ["jaro", "hamming", "levenshtein", "similarity", "distance"]
 categories = [
     "algorithms",
     "science",
+    "no-std",
     "text-processing",
     "command-line-interface",
 ]
 
+[features]
+default = ["std"]
+std = []
+
 [dev-dependencies]
-assert2 = "0.3.10"
-criterion = "0.4.0"
+assert2 = "0.3.15"
+criterion = "0.5.1"
 proptest = "1.1.0"
-rstest = "0.17.0"
+rstest = "0.22.0"
 unicode-segmentation = "1.10.1"
 
 [[bench]]

diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@ Features:
 + 📚 Contains 20+ algorithms for all purposes.
 + 🔬 Includes state-of-the-art algorithms like `EntropyNCD` and `Sift4`.
 + 🪶 Zero-dependency.
++ 🐜 `#![no_std]` support (embedded systems).
 + 🔨 Works with any iterators, including bytes, code points, Unicode grapheme clusters, words, and numbers.
 + ❤️ Friendly and consistent API for all algorithms.
 + 📏 Optional normalization of the result on the 0.0-1.0 interval.
@@ -66,6 +67,12 @@ Normalization for other metrics:
 cargo add textdistance
 ```
 
+Or if you're going to use it in a [no_std](https://docs.rust-embedded.org/book/intro/no-std.html) project:
+
+```shell
+cargo add --no-default-features textdistance
+```
+
 ## Usage
 
 The `textdistance::str` module provides shortcut functions for each algorithm for calculating the distance/similarity between two strings:

diff --git a/Taskfile.yaml b/Taskfile.yaml
@@ -57,6 +57,7 @@ tasks:
       CLICOLOR_FORCE: "yes"
     cmds:
       - cargo nextest run --no-fail-fast {{.CLI_ARGS}}
+      - cargo build --no-default-features
 
   doctest:
     cmds:

diff --git a/benches/str_benchmarks.rs b/benches/str_benchmarks.rs
@@ -1,7 +1,7 @@
+use core::time::Duration;
 use criterion::BenchmarkId;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use std::fs;
-use std::time::Duration;
 use textdistance::{nstr, str};
 
 fn read_licenses() -> Vec<(String, String)> {

diff --git a/src/algorithm.rs b/src/algorithm.rs
@@ -1,5 +1,6 @@
 use super::Result;
-use std::hash::Hash;
+use alloc::vec::Vec;
+use core::hash::Hash;
 
 /// A base trait for all distance/similarity algorithms.
 ///

diff --git a/src/algorithms/bag.rs b/src/algorithms/bag.rs
@@ -1,5 +1,5 @@
 //! Bag distance
-
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -13,7 +13,7 @@ impl Algorithm<usize> for Bag {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/cosine.rs b/src/algorithms/cosine.rs
@@ -1,4 +1,5 @@
 //! Cosine similarity
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -15,7 +16,7 @@ impl Algorithm<f64> for Cosine {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/damerau_levenshtein.rs b/src/algorithms/damerau_levenshtein.rs
@@ -1,7 +1,10 @@
 //! Damerau-Levenshtein distance
+#![cfg(feature = "std")]
 use crate::{Algorithm, Result};
+use alloc::vec;
+use alloc::vec::Vec;
+use core::hash::Hash;
 use std::collections::HashMap;
-use std::hash::Hash;
 
 /// [Damerau-Levenshtein distance] is an edit distance between two sequences.
 ///

diff --git a/src/algorithms/entropy_ncd.rs b/src/algorithms/entropy_ncd.rs
@@ -1,7 +1,8 @@
 //! Entropy-based Normalized Compression Distance
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
-use std::hash::Hash;
+use core::hash::Hash;
 
 /// Entropy-based [Normalized Compression Distance].
 ///
@@ -46,7 +47,7 @@ impl Algorithm<f64> for EntropyNCD {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/jaccard.rs b/src/algorithms/jaccard.rs
@@ -1,4 +1,5 @@
 //! Jaccard index
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -17,7 +18,7 @@ impl Algorithm<f64> for Jaccard {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/jaro.rs b/src/algorithms/jaro.rs
@@ -1,5 +1,6 @@
 //! Jaro similarity
 use crate::{Algorithm, Result};
+use alloc::vec;
 
 /// [Jaro similarity] is calculated based on the number of transpositions to turn one string into the other.
 ///

diff --git a/src/algorithms/jaro_winkler.rs b/src/algorithms/jaro_winkler.rs
@@ -34,7 +34,7 @@ impl JaroWinkler {
     fn winklerize<C, E>(&self, jaro: f64, s1: C, s2: C) -> f64
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         debug_assert!(self.prefix_weight * self.max_prefix as f64 <= 1.0);
         let mut prefix_len = 0;
@@ -55,7 +55,7 @@ impl JaroWinkler {
 impl Algorithm<f64> for JaroWinkler {
     fn for_vec<E>(&self, s1: &[E], s2: &[E]) -> Result<f64>
     where
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let jaro = self.jaro.for_vec(s1, s2).nval();
         Result {

diff --git a/src/algorithms/lcsseq.rs b/src/algorithms/lcsseq.rs
@@ -1,5 +1,7 @@
 //! Longest common subsequence
 use crate::{Algorithm, Result};
+use alloc::vec;
+use alloc::vec::Vec;
 
 /// The length of the [Longest common subsequence].
 ///

diff --git a/src/algorithms/lcsstr.rs b/src/algorithms/lcsstr.rs
@@ -1,5 +1,6 @@
 //! Longest common substring
 use crate::{Algorithm, Result};
+use alloc::vec;
 
 /// The length of the [Longest common substring].
 ///

diff --git a/src/algorithms/levenshtein.rs b/src/algorithms/levenshtein.rs
@@ -1,5 +1,6 @@
 //! Levenshtein distance
 use crate::{Algorithm, Result};
+use alloc::vec::Vec;
 
 /// [Levenshtein distance] is an edit distance between two sequences.
 ///

diff --git a/src/algorithms/lig3.rs b/src/algorithms/lig3.rs
@@ -2,7 +2,7 @@
 use super::hamming::Hamming;
 use super::levenshtein::Levenshtein;
 use crate::{Algorithm, Result};
-use std::hash::Hash;
+use core::hash::Hash;
 
 /// [LIG3 similarity] is a normalization of [`Hamming`] by [`Levenshtein`].
 ///

diff --git a/src/algorithms/mlipns.rs b/src/algorithms/mlipns.rs
@@ -1,7 +1,7 @@
 //! MLIPNS similarity
 use super::hamming::Hamming;
 use crate::{Algorithm, Result};
-use std::hash::Hash;
+use core::hash::Hash;
 
 /// [MLIPNS similarity] is a normalization for [`Hamming`] that returns either 0 or 1.
 ///

diff --git a/src/algorithms/overlap.rs b/src/algorithms/overlap.rs
@@ -1,4 +1,5 @@
 //! Overlap coefficient
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -12,7 +13,7 @@ impl Algorithm<f64> for Overlap {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/ratcliff_obershelp.rs b/src/algorithms/ratcliff_obershelp.rs
@@ -1,5 +1,7 @@
 //! Gestalt pattern matching
 use crate::{Algorithm, Result};
+use alloc::vec;
+use alloc::vec::Vec;
 
 /// [Ratcliff/Obershelp similarity] is [`LCSStr`] that recursively finds matches
 /// on both sides of the longest substring.
@@ -23,8 +25,7 @@ impl Algorithm<usize> for RatcliffObershelp {
         stack.push(((0, l1), (0, l2)));
         let mut result = 0;
 
-        while !stack.is_empty() {
-            let top = stack.pop().unwrap();
+        while let Some(top) = stack.pop() {
             let ((part1_start, part1_len), (part2_start, part2_len)) = top;
             let s1_part = s1[part1_start..(part1_start + part1_len)].iter();
             let s2_part: Vec<&E> = s2[part2_start..(part2_start + part2_len)].iter().collect();

diff --git a/src/algorithms/roberts.rs b/src/algorithms/roberts.rs
@@ -1,4 +1,5 @@
 //! Roberts similarity
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -14,7 +15,7 @@ impl Algorithm<f64> for Roberts {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/sift4_common.rs b/src/algorithms/sift4_common.rs
@@ -1,5 +1,6 @@
 //! Sift4 distance
 use crate::{Algorithm, Result};
+use alloc::vec::Vec;
 
 /// [Sift4 distance] is an edit algorithm designed to be "fast and relatively accurate".
 ///
@@ -44,7 +45,7 @@ impl Algorithm<usize> for Sift4Common {
         let mut lcss = 0; // largest common subsequence
         let mut local_cs = 0; // local common substring
         let mut trans = 0; // number of transpositions ('ab' vs 'ba')
-        let mut offset_arr: Vec<Offset> = vec![]; // offset pair array, for computing the transpositions
+        let mut offset_arr: Vec<Offset> = Vec::new(); // offset pair array, for computing the transpositions
         while (c1 < l1) && (c2 < l2) {
             if s1[c1] == s2[c2] {
                 local_cs += 1;

diff --git a/src/algorithms/smith_waterman.rs b/src/algorithms/smith_waterman.rs
@@ -1,5 +1,7 @@
 //! Smith-Waterman sequence alignment
 use crate::{Algorithm, Result};
+use alloc::vec;
+use alloc::vec::Vec;
 
 /// [Smith-Waterman similarity] is edit-based and designed for nucleic acid (and protein) sequences.
 ///

diff --git a/src/algorithms/sorensen_dice.rs b/src/algorithms/sorensen_dice.rs
@@ -1,4 +1,5 @@
 //! Sørensen-Dice coefficient
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -12,7 +13,7 @@ impl Algorithm<f64> for SorensenDice {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/tversky.rs b/src/algorithms/tversky.rs
@@ -1,4 +1,5 @@
 //! Tversky index
+#![cfg(feature = "std")]
 use crate::counter::Counter;
 use crate::{Algorithm, Result};
 
@@ -30,7 +31,7 @@ impl Algorithm<f64> for Tversky {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let c1 = Counter::from_iter(s1);
         let c2 = Counter::from_iter(s2);

diff --git a/src/algorithms/yujian_bo.rs b/src/algorithms/yujian_bo.rs
@@ -15,7 +15,7 @@ impl Algorithm<f64> for YujianBo {
     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
     where
         C: Iterator<Item = E>,
-        E: Eq + std::hash::Hash,
+        E: Eq + core::hash::Hash,
     {
         let lev = self.levenshtein.for_iter(s1, s2);
         let dc: usize = self.levenshtein.del_cost;

diff --git a/src/counter.rs b/src/counter.rs
@@ -1,5 +1,6 @@
+#![cfg(feature = "std")]
+use core::hash::Hash;
 use std::collections::HashMap;
-use std::hash::Hash;
 
 /// Multiset container inspired by Python's `collections.Counter`.
 pub struct Counter<K> {
@@ -66,7 +67,7 @@ where
             result.insert(key, *lhs_count + rhs_count);
         }
         for (key, rhs_count) in &rhs.map {
-            if self.map.get(key).is_none() {
+            if !self.map.contains_key(key) {
                 result.insert(key, *rhs_count);
             }
         }
@@ -92,7 +93,7 @@ where
             result += lhs_count.max(rhs_count);
         }
         for (key, rhs_count) in &rhs.map {
-            if self.map.get(key).is_none() {
+            if !self.map.contains_key(key) {
                 result += rhs_count;
             }
         }