From 6a51a2e22a12e98f42775ce04a4cc30b601980cc Mon Sep 17 00:00:00 2001 From: Emily Schmidt Date: Wed, 9 Oct 2024 10:52:46 +0100 Subject: [PATCH 1/6] add first version of stable_set crate --- Cargo.lock | 10 +- Cargo.toml | 1 + stable_set/Cargo.toml | 15 + stable_set/src/index_table.rs | 271 +++++++++++++++ stable_set/src/lib.rs | 26 ++ stable_set/src/stable_map.rs | 618 ++++++++++++++++++++++++++++++++++ stable_set/src/stable_set.rs | 419 +++++++++++++++++++++++ stable_set/src/util.rs | 42 +++ 8 files changed, 1401 insertions(+), 1 deletion(-) create mode 100644 stable_set/Cargo.toml create mode 100644 stable_set/src/index_table.rs create mode 100644 stable_set/src/lib.rs create mode 100644 stable_set/src/stable_map.rs create mode 100644 stable_set/src/stable_set.rs create mode 100644 stable_set/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index 4c3f009..9cdde98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -842,6 +842,14 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "stable_set" +version = "0.1.0" +dependencies = [ + "hashbrown", + "zwohash", +] + [[package]] name = "strsim" version = "0.11.1" diff --git a/Cargo.toml b/Cargo.toml index 1e47325..1fa671e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "extract", "imctk", "lit", + "stable_set", # comment to force multi-line layout ] diff --git a/stable_set/Cargo.toml b/stable_set/Cargo.toml new file mode 100644 index 0000000..e2479ce --- /dev/null +++ b/stable_set/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "stable_set" +version = "0.1.0" +edition = "2021" + +publish = false + +[lints] +workspace = true + +[dependencies] +hashbrown = { version = "0.14.5", default-features = false, features = ["inline-more"] } + +[dev-dependencies] +zwohash = "0.1.2" diff --git a/stable_set/src/index_table.rs b/stable_set/src/index_table.rs new file mode 100644 index 0000000..88cadee --- /dev/null +++ b/stable_set/src/index_table.rs @@ -0,0 +1,271 @@ +use hashbrown::hash_table::HashTable; + +#[derive(Debug, Clone)] +pub enum IndexTable { + Small(HashTable), + Large(HashTable), +} + +#[derive(Debug)] +pub enum OccupiedEntry<'a> { + Small(hashbrown::hash_table::OccupiedEntry<'a, u32>), + Large(hashbrown::hash_table::OccupiedEntry<'a, usize>), +} + +#[derive(Debug)] +pub enum VacantEntry<'a> { + Small(hashbrown::hash_table::VacantEntry<'a, u32>), + Large(hashbrown::hash_table::VacantEntry<'a, usize>), +} + +#[derive(Debug)] +#[allow(dead_code)] +pub enum AbsentEntry<'a> { + Small(hashbrown::hash_table::AbsentEntry<'a, u32>), + Large(hashbrown::hash_table::AbsentEntry<'a, usize>), +} + +#[derive(Debug)] +#[allow(dead_code)] +pub enum Entry<'a> { + Occupied(OccupiedEntry<'a>), + Vacant(VacantEntry<'a>), +} + +impl Default for IndexTable { + fn default() -> Self { + IndexTable::Small(HashTable::new()) + } +} + +impl IndexTable { + pub fn with_capacity(capacity: usize) -> Self { + if Self::try_as_small(capacity).is_ok() { + IndexTable::Small(HashTable::with_capacity(capacity)) + } else { + IndexTable::Large(HashTable::with_capacity(capacity)) + } + } + #[inline(always)] + fn as_small(index: usize) -> u32 { + Self::try_as_small(index).unwrap() + } + #[inline(always)] + fn try_as_small(index: usize) -> Result { + u32::try_from(index) + } + #[inline(always)] + pub fn entry( + &mut self, + hash: u64, + mut eq: impl FnMut(usize) -> bool, + hasher: impl Fn(usize) -> u64, + ) -> Entry<'_> { + match self { + IndexTable::Small(table) => match table.entry( + hash, + |&index| eq(index as usize), + |&index| hasher(index as usize), + ) { + hashbrown::hash_table::Entry::Occupied(entry) => { + Entry::Occupied(OccupiedEntry::Small(entry)) + } + hashbrown::hash_table::Entry::Vacant(entry) => { + Entry::Vacant(VacantEntry::Small(entry)) + } + }, + IndexTable::Large(table) => { + match table.entry(hash, |&index| eq(index), |&index| hasher(index)) { + hashbrown::hash_table::Entry::Occupied(entry) => { + Entry::Occupied(OccupiedEntry::Large(entry)) + } + hashbrown::hash_table::Entry::Vacant(entry) => { + Entry::Vacant(VacantEntry::Large(entry)) + } + } + } + } + } + #[inline(always)] + pub fn find_entry( + &mut self, + hash: u64, + mut eq: impl FnMut(usize) -> bool, + ) -> Result, AbsentEntry<'_>> { + match self { + IndexTable::Small(table) => match table.find_entry(hash, |&index| eq(index as usize)) { + Ok(entry) => Ok(OccupiedEntry::Small(entry)), + Err(entry) => Err(AbsentEntry::Small(entry)), + }, + IndexTable::Large(table) => match table.find_entry(hash, |&index| eq(index)) { + Ok(entry) => Ok(OccupiedEntry::Large(entry)), + Err(entry) => Err(AbsentEntry::Large(entry)), + }, + } + } + #[inline(always)] + pub fn find(&self, hash: u64, mut eq: impl FnMut(usize) -> bool) -> Option { + match self { + IndexTable::Small(table) => table + .find(hash, |&index| eq(index as usize)) + .map(|&index| index as usize), + IndexTable::Large(table) => table.find(hash, |&index| eq(index)).copied(), + } + } + #[inline(always)] + #[must_use] + pub fn replace( + &mut self, + hash: u64, + mut eq: impl FnMut(usize) -> bool, + value: usize, + ) -> Option { + match self { + IndexTable::Small(table) => table + .find_mut(hash, |&index| eq(index as usize)) + .map(|index_ref| std::mem::replace(index_ref, Self::as_small(value)) as usize), + IndexTable::Large(table) => table + .find_mut(hash, |&index| eq(index)) + .map(|index_ref| std::mem::replace(index_ref, value)), + } + } + #[inline(always)] + pub fn is_small(&self) -> bool { + match self { + IndexTable::Small(_) => true, + IndexTable::Large(_) => false, + } + } + #[inline(always)] + pub fn grow_for(&mut self, index: usize, hasher: impl Fn(usize) -> u64) { + if Self::try_as_small(index).is_err() && self.is_small() { + self.grow_cold(hasher) + } + } + #[inline(never)] + #[cold] + fn grow_cold(&mut self, hasher: impl Fn(usize) -> u64) { + let IndexTable::Small(old_table) = + std::mem::replace(self, IndexTable::Large(HashTable::new())) + else { + unreachable!() + }; + let IndexTable::Large(new_table) = self else { + unreachable!() + }; + new_table.reserve(old_table.len(), |&j| hasher(j)); + for i in old_table { + new_table.insert_unique(hasher(i as usize), i as usize, |&j| hasher(j)); + } + } + #[inline(always)] + pub fn len(&self) -> usize { + match self { + IndexTable::Small(table) => table.len(), + IndexTable::Large(table) => table.len(), + } + } + #[inline(always)] + pub fn clear(&mut self) { + match self { + IndexTable::Small(table) => table.clear(), + IndexTable::Large(table) => table.clear(), + } + } + #[inline(always)] + pub fn retain(&mut self, mut f: impl FnMut(usize) -> Option) { + match self { + IndexTable::Small(table) => table.retain(|index| match f(*index as usize) { + Some(new_index) => { + *index = Self::as_small(new_index); + true + } + None => false, + }), + IndexTable::Large(table) => table.retain(|index| match f(*index) { + Some(new_index) => { + *index = new_index; + true + } + None => false, + }), + } + } + #[inline(always)] + pub fn reserve(&mut self, additional: usize, hasher: impl Fn(usize) -> u64) { + self.grow_for((self.len() + additional).saturating_sub(1), &hasher); + match self { + IndexTable::Small(table) => table.reserve(additional, |&index| hasher(index as usize)), + IndexTable::Large(table) => table.reserve(additional, |&index| hasher(index)), + } + } + pub fn clear_range(&mut self, start: usize, end: usize, len: usize) { + if end <= start { + } else if start == 0 && end >= len { + self.clear(); + } else { + self.retain(|index| { + if index < start { + Some(index) + } else if index < end { + None + } else { + Some(index - (end - start)) + } + }); + } + } +} + +impl<'a> VacantEntry<'a> { + #[inline(always)] + pub fn insert(self, index: usize) -> OccupiedEntry<'a> { + match self { + VacantEntry::Small(entry) => { + OccupiedEntry::Small(entry.insert(IndexTable::as_small(index))) + } + VacantEntry::Large(entry) => OccupiedEntry::Large(entry.insert(index)), + } + } + // We can't implement `into_table`, so as a workaround we have to reimplement methods we need here. + #[inline(always)] + #[must_use] + pub fn replace_other( + self, + hash: u64, + mut eq: impl FnMut(usize) -> bool, + value: usize, + ) -> Option { + match self { + VacantEntry::Small(entry) => entry.into_table() + .find_mut(hash, |&index| eq(index as usize)) + .map(|index_ref| std::mem::replace(index_ref, IndexTable::as_small(value)) as usize), + VacantEntry::Large(entry) => entry.into_table() + .find_mut(hash, |&index| eq(index)) + .map(|index_ref| std::mem::replace(index_ref, value)), + } + } +} + +impl<'a> OccupiedEntry<'a> { + #[inline(always)] + pub fn get(&self) -> usize { + match self { + OccupiedEntry::Small(entry) => *entry.get() as usize, + OccupiedEntry::Large(entry) => *entry.get(), + } + } + #[inline(always)] + pub fn remove(self) -> (usize, VacantEntry<'a>) { + match self { + OccupiedEntry::Small(entry) => { + let (index, entry) = entry.remove(); + (index as usize, VacantEntry::Small(entry)) + } + OccupiedEntry::Large(entry) => { + let (index, entry) = entry.remove(); + (index, VacantEntry::Large(entry)) + } + } + } +} diff --git a/stable_set/src/lib.rs b/stable_set/src/lib.rs new file mode 100644 index 0000000..30c90fa --- /dev/null +++ b/stable_set/src/lib.rs @@ -0,0 +1,26 @@ +//! [StableMap] and [StableSet] are a hash map and hash set that preserve the order of their entries, +//! i.e. the order in which entries are inserted is remembered and iterators yield elements in that order. +//! +//! Strictly speaking, this only holds if elements are not removed, as removal via the `swap_remove_*` series of methods perturbs the order +//! (use the more expensive `retain` operation to remove elements while preserving their order). +//! +//! Both structs are implemented as a `Vec` storing the actual entries, supplemented by a hashbrown `HashTable` to allow for fast lookups. +//! This table only stores indices into the `Vec`. +//! +//! This crate is very similar to the `index_map` crate, however there are two key differences: +//! +//! 1. Hashes are not stored with the entries, but instead recalculated when needed. +//! This saves memory and potentially improves performance when hashes are cheap to calculate. +//! 2. For small tables, the index table stores `u32` values, again saving memory. +//! Tables are automatically upgraded to `usize` values as needed. +//! +//! Memory usage is approximately `(k + key size + value size) * num entries` bytes where `k` is 5 for small tables (less than 4 billion entries) and 9 otherwise. + +mod index_table; +mod util; + +pub use stable_set::StableSet; +pub use stable_map::StableMap; + +pub mod stable_set; +pub mod stable_map; \ No newline at end of file diff --git a/stable_set/src/stable_map.rs b/stable_set/src/stable_map.rs new file mode 100644 index 0000000..a9a17d8 --- /dev/null +++ b/stable_set/src/stable_map.rs @@ -0,0 +1,618 @@ +//! [StableMap] is a a hash map that maintains the order of its elements. +use crate::{ + index_table, + util::{impl_iterator, simplify_range}, +}; +use core::hash::Hash; +use index_table::IndexTable; +use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; + +/// A hash map that maintains the order of its elements. +#[derive(Clone)] +pub struct StableMap { + index_table: IndexTable, + items: Vec<(K, V)>, + build_hasher: S, +} + +impl Default for StableMap { + fn default() -> Self { + StableMap { + index_table: IndexTable::default(), + items: Vec::new(), + build_hasher: S::default(), + } + } +} +impl StableMap { + /// Returns an empty map. + pub fn new() -> Self { + Self::default() + } + /// Returns an empty map with the specified capacity. + pub fn with_capacity(capacity: usize) -> Self { + StableMap { + index_table: IndexTable::with_capacity(capacity), + items: Vec::with_capacity(capacity), + build_hasher: S::default(), + } + } +} + +impl StableMap { + /// Returns an empty map with the provided BuildHasher. + pub fn with_hasher(build_hasher: S) -> Self { + StableMap { + index_table: IndexTable::default(), + items: Vec::new(), + build_hasher, + } + } + /// Returns an empty map with the specified capacity and provided BuildHasher. + pub fn with_capacity_and_hasher(capacity: usize, build_hasher: S) -> Self { + StableMap { + index_table: IndexTable::with_capacity(capacity), + items: Vec::with_capacity(capacity), + build_hasher, + } + } +} + +impl std::fmt::Debug for StableMap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_map().entries(self.iter()).finish() + } +} + +impl StableMap { + /// Returns the number of items in the map. + pub fn len(&self) -> usize { + self.items.len() + } + /// Returns `true` if the map is empty. + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + /// Returns a reference to a slice containing all key-value pairs in the set. + pub fn as_slice(&self) -> &[(K, V)] { + &self.items[..] + } + /// Removes all entries from the map, but keeps the allocated memory. + pub fn clear(&mut self) { + self.index_table.clear(); + self.items.clear(); + } + /// Removes all entries except for the first `len` entries from the map, but keeps the allocated memory. + pub fn truncate(&mut self, len: usize) { + self.index_table + .retain(|index| (index < len).then_some(index)); + self.items.truncate(len); + } + /// Returns an iterator over all key-values pairs. + pub fn iter(&self) -> Iter<'_, K, V> { + Iter { + inner: self.items.iter(), + } + } + /// Returns an iterator over all keys. + pub fn keys(&self) -> Keys<'_, K, V> { + Keys { + inner: self.items.iter(), + } + } + /// Returns an iterator over all values. + pub fn values(&self) -> Values<'_, K, V> { + Values { + inner: self.items.iter(), + } + } + /// Returns an iterator over all values, allowing mutation. + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut { + inner: self.items.iter_mut(), + } + } + /// Returns an iterator over all key-value pairs, allowing mutation of values. + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut { + inner: self.items.iter_mut(), + } + } +} + +impl StableMap { + /// Inserts `value` at `key`, replacing any previous value. + /// Returns the index of the item and any previous value. + /// + /// If there was no previous value, the key-value pair is inserted at the end. + pub fn insert_full(&mut self, key: K, value: V) -> (usize, Option) { + let hash = self.build_hasher.hash_one(&key); + match self.index_table.entry( + hash, + |index| self.items[index].0 == key, + |index| self.build_hasher.hash_one(&self.items[index].0), + ) { + index_table::Entry::Occupied(entry) => { + let index = entry.get(); + let old_value = std::mem::replace(&mut self.items[index].1, value); + (index, Some(old_value)) + } + index_table::Entry::Vacant(entry) => { + let index = self.items.len(); + self.items.push((key, value)); + entry.insert(index); + (index, None) + } + } + } + /// Inserts `value` at `key`, replacing and returning any previous value. + /// + /// If there was no previous value, the key-value pair is inserted at the end. + pub fn insert(&mut self, key: K, value: V) -> Option { + self.insert_full(key, value).1 + } +} + +impl StableMap { + /// Returns the index of the entry with the specified key, if it exists. + pub fn get_index_of(&self, key: &Q) -> Option + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + let hash = self.build_hasher.hash_one(key); + self.index_table + .find(hash, |index| self.items[index].0.borrow() == key) + } + /// Returns the index and references to the key and value of the entry with the specified key, if it exists. + pub fn get_full(&self, key: &Q) -> Option<(usize, &K, &V)> + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + self.get_index_of(key).map(|index| { + let entry = &self.items[index]; + (index, &entry.0, &entry.1) + }) + } + /// Returns the index and a shared references to the key and a mutable reference to the value of the entry with the specified key, if it exists. + pub fn get_full_mut(&mut self, key: &Q) -> Option<(usize, &K, &mut V)> + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + self.get_index_of(key).map(|index| { + let entry = &mut self.items[index]; + (index, &entry.0, &mut entry.1) + }) + } + /// Returns a reference to the value corresponding to the specified key, if it exists. + pub fn get(&self, key: &Q) -> Option<&V> + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + self.get_full(key).map(|x| x.2) + } + /// Returns a mutable reference to the value corresponding to the specified key, if it exists. + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + self.get_full_mut(key).map(|x| x.2) + } + /// Returns a reference to the key and value with the specified index, if it exists. + pub fn get_index(&self, index: usize) -> Option<(&K, &V)> { + self.items.get(index).map(|entry| (&entry.0, &entry.1)) + } + /// Returns a shared reference to the key and a mutable reference to the value with the specified index, if it exists. + pub fn get_index_mut(&mut self, index: usize) -> Option<&mut V> { + self.items.get_mut(index).map(|entry| &mut entry.1) + } + fn swap_remove_finish(&mut self, index: usize) -> (K, V) { + let entry = self.items.swap_remove(index); + if index < self.items.len() { + let swapped_hash = self.build_hasher.hash_one(&self.items[index].0); + self.index_table + .replace(swapped_hash, |i| i == self.items.len(), index) + .unwrap(); + } + entry + } + /// Removes the entry with the specified key from the set and returns its index and its key and value, if it exists. + /// + /// The last item is put in its place. + pub fn swap_remove_full(&mut self, key: &Q) -> Option<(usize, K, V)> + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + let hash = self.build_hasher.hash_one(key); + match self + .index_table + .find_entry(hash, |index| self.items[index].0.borrow() == key) + { + Ok(entry) => { + let (index, _) = entry.remove(); + let (key, value) = self.swap_remove_finish(index); + Some((index, key, value)) + } + Err(_) => None, + } + } + /// Removes the entry with the specified key from the set and returns its value, if it exists. + /// + /// The last item is put in its place. + pub fn swap_remove(&mut self, key: &Q) -> Option + where + Q: Hash + Eq + ?Sized, + K: Borrow, + { + self.swap_remove_full(key).map(|x| x.2) + } + /// Removes the entry with the specified index from the set and returns its key and value, if it exists. + /// + /// The last item is put in its place. + pub fn swap_remove_index_full(&mut self, index: usize) -> Option<(K, V)> { + let hash = self.build_hasher.hash_one(&self.items.get(index)?.0); + self.index_table + .find_entry(hash, |i| i == index) + .unwrap() + .remove(); + Some(self.swap_remove_finish(index)) + } + /// Removes the entry with the specified index from the set and returns its value, if it exists. + /// + /// The last item is put in its place. + pub fn swap_remove_index(&mut self, index: usize) -> Option { + self.swap_remove_index_full(index).map(|x| x.1) + } + /// Removes all items for which `f` evaluates to `false`. + /// + /// `f` is guaranteed to be called exactly once for each item and in order, and may mutate the values. + /// + /// The order of elements is preserved. + pub fn retain(&mut self, mut f: impl FnMut(&K, &mut V) -> bool) { + let mut in_index = 0; + let mut out_index = 0; + self.items.retain_mut(|entry| { + let hash = self.build_hasher.hash_one(&entry.0); + let keep = f(&entry.0, &mut entry.1); + if keep { + if in_index != out_index { + self.index_table + .replace(hash, |index| index == in_index, out_index) + .unwrap(); + } + out_index += 1; + } else { + self.index_table + .find_entry(hash, |index| index == in_index) + .unwrap() + .remove(); + } + in_index += 1; + keep + }) + } + /// Returns an iterator yielding all key-value pairs with indices in the specified range and removes those entries. + /// + /// Panics if the range is invalid or out of bounds. + /// + /// If the returned iterator is leaked, the map is left in an invalid state and can only be dropped. + /// Any other operations may panic or return incorrect results. + pub fn drain(&mut self, range: impl RangeBounds) -> Drain<'_, K, V> { + let range = simplify_range(range, self.items.len()); + self.index_table + .clear_range(range.start, range.end, self.items.len()); + Drain { + inner: self.items.drain(range), + } + } +} + +/// An iterator over the entries of a [`StableMap`]. +/// +/// This struct is created by the [`iter`](`StableMap::iter`) method on [`StableMap`]. +pub struct Iter<'a, K, V> { + inner: std::slice::Iter<'a, (K, V)>, +} +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + impl_iterator!(|entry| (&entry.0, &entry.1)); +} + +/// An iterator over the keys of a [`StableMap`]. +/// +/// This struct is created by the [`keys`](`StableMap::keys`) method on [`StableMap`]. +pub struct Keys<'a, K, V> { + inner: std::slice::Iter<'a, (K, V)>, +} +impl<'a, K, V> Iterator for Keys<'a, K, V> { + type Item = &'a K; + impl_iterator!(|entry| &entry.0); +} + +/// An iterator over the values of a [`StableMap`]. +/// +/// This struct is created by the [`values`](`StableMap::values`) method on [`StableMap`]. +pub struct Values<'a, K, V> { + inner: std::slice::Iter<'a, (K, V)>, +} +impl<'a, K, V> Iterator for Values<'a, K, V> { + type Item = &'a V; + impl_iterator!(|entry| &entry.1); +} + +/// An iterator over the values of a [`StableMap`], allowing mutation. +/// +/// This struct is created by the [`values_mut`](`StableMap::values_mut`) method on [`StableMap`]. +pub struct ValuesMut<'a, K, V> { + inner: std::slice::IterMut<'a, (K, V)>, +} +impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { + type Item = &'a mut V; + impl_iterator!(|entry| &mut entry.1); +} + +/// An iterator over the entries of a [`StableMap`], allowing mutation of values. +/// +/// This struct is created by the [`iter_mut`](`StableMap::iter_mut`) method on [`StableMap`]. +pub struct IterMut<'a, K, V> { + inner: std::slice::IterMut<'a, (K, V)>, +} +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + impl_iterator!(|entry| (&entry.0, &mut entry.1)); +} + +/// An iterator moving entries out of a [`StableMap`]. +/// +/// This struct is created by the `into_iter` method on [`StableMap`]. +pub struct IntoIter { + inner: std::vec::IntoIter<(K, V)>, +} + +impl Iterator for IntoIter { + type Item = (K, V); + impl_iterator!(); +} +impl IntoIterator for StableMap { + type Item = (K, V); + type IntoIter = IntoIter; + fn into_iter(self) -> Self::IntoIter { + IntoIter { + inner: self.items.into_iter(), + } + } +} + +/// An iterator that removes entries from a map, see [`drain`](StableMap::drain). +pub struct Drain<'a, K, V> { + inner: std::vec::Drain<'a, (K, V)>, +} + +impl Iterator for Drain<'_, K, V> { + type Item = (K, V); + impl_iterator!(); +} + +impl<'a, K, V, S> IntoIterator for &'a StableMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} +impl<'a, K, V, S> IntoIterator for &'a mut StableMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// A vacant entry in a [`StableMap`]. +pub struct VacantEntry<'a, K, V, S> { + entry: index_table::VacantEntry<'a>, + key: K, + items: &'a mut Vec<(K, V)>, + #[allow(dead_code)] // for consistency with OccupiedEntry + build_hasher: &'a S, +} + +/// An occupied entry in a [`StableMap`]. +pub struct OccupiedEntry<'a, K, V, S> { + entry: index_table::OccupiedEntry<'a>, + items: &'a mut Vec<(K, V)>, + build_hasher: &'a S, +} + +/// An entry in a [`StableMap`]. +pub enum Entry<'a, K, V, S> { + /// A vacant entry. + Vacant(VacantEntry<'a, K, V, S>), + /// An occupied entry. + Occupied(OccupiedEntry<'a, K, V, S>), +} + +impl StableMap { + /// Returns the entry corresponding to the given key, allowing for insertion and/or in-place mutation. + pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S> { + let hash = self.build_hasher.hash_one(&key); + match self.index_table.entry( + hash, + |index| self.items[index].0 == key, + |index| self.build_hasher.hash_one(&self.items[index].0), + ) { + index_table::Entry::Occupied(entry) => Entry::Occupied(OccupiedEntry { + entry, + items: &mut self.items, + build_hasher: &self.build_hasher, + }), + index_table::Entry::Vacant(entry) => Entry::Vacant(VacantEntry { + entry, + key, + items: &mut self.items, + build_hasher: &self.build_hasher, + }), + } + } +} + +impl<'a, K, V, S> VacantEntry<'a, K, V, S> { + /// Returns a reference to the key that would be used for insertion. + pub fn key(&self) -> &K { + &self.key + } + /// Returns the key that would be used for insertion. + pub fn into_key(self) -> K { + self.key + } + /// Returns the index at which an entry would be inserted. + pub fn index(&self) -> usize { + self.items.len() + } + /// Inserts a value into the entry, returning a mutable reference to the value. + pub fn insert(self, value: V) -> &'a mut V { + let index = self.items.len(); + self.items.push((self.key, value)); + self.entry.insert(index); + &mut self.items[index].1 + } +} + +impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { + /// Returns a reference to the key of the entry. + pub fn key(&self) -> &K { + &self.items[self.index()].0 + } + /// Returns the index of the entry. + pub fn index(&self) -> usize { + self.entry.get() + } + /// Replaces the value in the entry with the provided value, returning the previous value. + pub fn insert(self, value: V) -> V { + std::mem::replace(self.into_mut(), value) + } + /// Returns a mutable reference to the value in the entry. + pub fn get_mut(&mut self) -> &mut V { + let index = self.index(); + &mut self.items[index].1 + } + /// Returns a mutable reference to the value in the entry, bound to the lifetime of the map. + pub fn into_mut(self) -> &'a mut V { + let index = self.index(); + &mut self.items[index].1 + } +} + +impl OccupiedEntry<'_, K, V, S> { + /// Removes the entry from the map and returns the key-value pair. + /// + /// The last entry in the map is put in its place. + pub fn swap_remove_entry(self) -> (K, V) { + let (index, entry) = self.entry.remove(); + let keyvalue = self.items.swap_remove(index); + if index < self.items.len() { + let hash = self.build_hasher.hash_one(&self.items[index].0); + entry + .replace_other(hash, |i| i == self.items.len(), index) + .unwrap(); + } + keyvalue + } + /// Removes the entry from the map and returns the value. + /// + /// The last entry in the map is put in its place. + pub fn swap_remove(self) -> V { + self.swap_remove_entry().1 + } +} + +impl<'a, K, V, S> Entry<'a, K, V, S> { + /// Returns a refernece to the key of the entry. + pub fn key(&self) -> &K { + match self { + Entry::Vacant(entry) => entry.key(), + Entry::Occupied(entry) => entry.key(), + } + } + /// Returns the index of the entry. + pub fn index(&self) -> usize { + match self { + Entry::Vacant(entry) => entry.index(), + Entry::Occupied(entry) => entry.index(), + } + } + /// Inserts the default value if the entry is vacant. Returns a mutable reference to the entry. + pub fn or_default(self) -> &'a mut V + where + V: Default, + { + self.or_insert(V::default()) + } + /// Inserts the provided value if the entry is vacant. Returns a mutable reference to the entry. + pub fn or_insert(self, value: V) -> &'a mut V { + match self { + Entry::Vacant(entry) => entry.insert(value), + Entry::Occupied(entry) => entry.into_mut(), + } + } + /// Inserts the value returned by `f()` if the entry is vacant. Returns a mutable reference to the entry. + pub fn or_insert_with(self, f: impl FnOnce() -> V) -> &'a mut V { + match self { + Entry::Vacant(entry) => entry.insert(f()), + Entry::Occupied(entry) => entry.into_mut(), + } + } + /// Inserts the value returned by `f(&key)` if the entry is vacant. Returns a mutable reference to the entry. + pub fn or_insert_with_key(self, f: impl FnOnce(&K) -> V) -> &'a mut V { + match self { + Entry::Vacant(entry) => { + let value = f(entry.key()); + entry.insert(value) + } + Entry::Occupied(entry) => entry.into_mut(), + } + } + /// Modifies the value by calling `f(&value)` if the entry is occupied. Returns the entry itself. + pub fn and_modify(self, f: impl FnOnce(&mut V)) -> Self { + match self { + Entry::Vacant(entry) => Entry::Vacant(entry), + Entry::Occupied(mut entry) => { + f(entry.get_mut()); + Entry::Occupied(entry) + } + } + } +} + +impl StableMap { + #[cfg(test)] + fn check(&self) { + assert!(self.index_table.len() == self.items.len()); + for (index, (key, _)) in self.items.iter().enumerate() { + let hash = self.build_hasher.hash_one(key); + assert_eq!(self.index_table.find(hash, |idx| idx == index), Some(index)); + } + } +} + +#[test] +fn test() { + use std::hash::BuildHasherDefault; + use zwohash::ZwoHasher; + let mut map: StableMap> = StableMap::default(); + map.insert("adam".into(), 10); + map.insert("eve".into(), 25); + map.insert("mallory".into(), 8); + map.insert("jim".into(), 14); + match map.entry("eve".to_string()) { + Entry::Vacant(_) => unreachable!(), + Entry::Occupied(entry) => entry.swap_remove(), + }; + dbg!(&map); + map.check(); +} diff --git a/stable_set/src/stable_set.rs b/stable_set/src/stable_set.rs new file mode 100644 index 0000000..4fa5916 --- /dev/null +++ b/stable_set/src/stable_set.rs @@ -0,0 +1,419 @@ +//! [StableSet] is a a hash set that maintains the order of its elements. +use crate::{ + index_table, + util::{impl_iterator, simplify_range}, +}; +use core::hash::Hash; +use index_table::IndexTable; +use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; + +/// A hash set that maintains the order of its elements. +#[derive(Clone)] +pub struct StableSet { + index_table: IndexTable, + items: Vec, + build_hasher: S, +} + +impl std::fmt::Debug for StableSet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_set().entries(self).finish() + } +} + +impl Default for StableSet { + fn default() -> Self { + StableSet { + index_table: IndexTable::default(), + items: Vec::new(), + build_hasher: S::default(), + } + } +} + +impl StableSet { + /// Returns an empty set. + pub fn new() -> Self { + Self::default() + } + /// Returns an empty set with the specified capacity. + pub fn with_capacity(capacity: usize) -> Self { + StableSet { + index_table: IndexTable::with_capacity(capacity), + items: Vec::with_capacity(capacity), + build_hasher: S::default(), + } + } +} + +impl StableSet { + /// Returns an empty set with the provided BuildHasher. + pub fn with_hasher(build_hasher: S) -> Self { + StableSet { + index_table: IndexTable::default(), + items: Vec::new(), + build_hasher, + } + } + /// Returns an empty set with the specified capacity and provided BuildHasher. + pub fn with_capacity_and_hasher(capacity: usize, build_hasher: S) -> Self { + StableSet { + index_table: IndexTable::with_capacity(capacity), + items: Vec::with_capacity(capacity), + build_hasher, + } + } + /// Removes all items, but keeps the allocated memory. + pub fn clear(&mut self) { + self.index_table.clear(); + self.items.clear(); + } + /// Removes all items except for the first `len` items, but keeps the allocated memory. + pub fn truncate(&mut self, len: usize) { + self.index_table + .retain(|index| (index < len).then_some(index)); + self.items.truncate(len); + } + /// Returns the number of items in the set. + pub fn len(&self) -> usize { + self.items.len() + } + /// Returns `true` if the set is empty. + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + /// Returns the first item in the set, if it exists. + pub fn first(&self) -> Option<&T> { + self.items.first() + } + /// Returns the last item in the set, if it exists. + pub fn last(&self) -> Option<&T> { + self.items.last() + } + /// Returns a reference to a slice containing all items in the set. + pub fn as_slice(&self) -> &[T] { + &self.items[..] + } + /// Converts the set into a `Vec`. The hashtables containing the indices is dropped. + pub fn into_vec(self) -> Vec { + self.items + } +} + +impl StableSet { + /// Reserve memory for an extra `additional` items. + pub fn reserve(&mut self, additional: usize) { + self.items.reserve(additional); + self.index_table.reserve(additional, |index| { + self.build_hasher.hash_one(&self.items[index]) + }); + } +} + +impl StableSet { + /// Removes the last item from the set and returns it, if it exists. + pub fn pop(&mut self) -> Option { + let opt_item = self.items.pop(); + if let Some(item) = &opt_item { + let hash = self.build_hasher.hash_one(item); + self.index_table + .find_entry(hash, |index| index == self.items.len()) + .unwrap() + .remove(); + } + opt_item + } + /// Inserts an item to the end of the set, unless the set contains an equivalent item already. + /// Returns `true` if the item was inserted. + pub fn insert(&mut self, value: T) -> bool { + self.index_table.grow_for(self.items.len(), |index| { + self.build_hasher.hash_one(&self.items[index]) + }); + let hash = self.build_hasher.hash_one(&value); + match self.index_table.entry( + hash, + |index| self.items[index] == value, + |index| self.build_hasher.hash_one(&self.items[index]), + ) { + index_table::Entry::Occupied(_) => false, + index_table::Entry::Vacant(entry) => { + let new_index = self.items.len(); + self.items.push(value); + entry.insert(new_index); + true + } + } + } + /// Returns a reference to the item in the set equivalent to `value`, if it exists. + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: Hash + Eq + ?Sized, + { + self.get_full(value).map(|x| x.1) + } + /// Returns the index of and a reference to the item in the set equivalent to `value`, if it exists. + pub fn get_full(&self, value: &Q) -> Option<(usize, &T)> + where + T: Borrow, + Q: Hash + Eq + ?Sized, + { + let hash = self.build_hasher.hash_one(value); + self.index_table + .find(hash, |index| self.items[index].borrow() == value) + .map(|index| (index, &self.items[index])) + } + /// Returns `true` if the set contains a value equivalent to `value`. + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: Hash + Eq + ?Sized, + { + self.get_full(value).is_some() + } + /// Returns the item with the given index, if it exists. + pub fn get_index(&self, index: usize) -> Option<&T> { + self.items.get(index) + } + /// Returns the index of the item equivalent to `value`, if it exists. + pub fn get_index_of(&self, value: &Q) -> Option + where + Q: Hash + Eq + ?Sized, + T: Borrow, + { + let hash = self.build_hasher.hash_one(value); + self.index_table + .find(hash, |index| self.items[index].borrow() == value) + } + fn swap_remove_finish(&mut self, index: usize) -> T { + let item = self.items.swap_remove(index); + if index < self.items.len() { + let swapped_hash = self.build_hasher.hash_one(&self.items[index]); + self.index_table + .replace(swapped_hash, |i| i == self.items.len(), index) + .unwrap(); + } + item + } + /// Removes the specified value from the set, if it exists. Returns `true` if an item was removed. + /// + /// The last item is put in its place. + pub fn swap_remove(&mut self, value: &Q) -> bool + where + T: Borrow, + Q: Hash + Eq + ?Sized, + { + self.swap_remove_full(value).is_some() + } + /// Removes the specified value from the set and returns it, if it exists. + /// + /// The last item is put in its place. + pub fn swap_take(&mut self, value: &Q) -> Option + where + T: Borrow, + Q: Hash + Eq + ?Sized, + { + self.swap_remove_full(value).map(|x| x.1) + } + /// Removes the specified value from the set and returns its index and it, if it exists. + /// + /// The last item is put in its place. + pub fn swap_remove_full(&mut self, value: &Q) -> Option<(usize, T)> + where + T: Borrow, + Q: Hash + Eq + ?Sized, + { + let hash = self.build_hasher.hash_one(value); + match self + .index_table + .find_entry(hash, |index| self.items[index].borrow() == value) + { + Ok(entry) => { + let index = entry.remove().0; + let value = self.swap_remove_finish(index); + Some((index, value)) + } + Err(_) => None, + } + } + /// Removes the item at the given index and returns it, if it exists. + /// + /// The last item is put in its place. + pub fn swap_remove_index(&mut self, index: usize) -> Option { + let hash = self.build_hasher.hash_one(self.items.get(index)?); + self.index_table + .find_entry(hash, |i| i == index) + .unwrap() + .remove(); + Some(self.swap_remove_finish(index)) + } + /// Returns `true` if the set is a subset of `other`. + /// + /// The order of elements is ignored. + pub fn is_subset(&self, other: &StableSet) -> bool { + self.len() <= other.len() && self.iter().all(|t| other.contains(t)) + } + /// Removes all items from the set for which `f` evaluates to `false`. + /// + /// `f` is guaranteed to be called exactly once for each item and in order. + /// + /// The order of elements is preserved. + pub fn retain(&mut self, mut f: impl FnMut(&T) -> bool) { + let mut in_index = 0; + let mut out_index = 0; + self.items.retain(|item| { + let hash = self.build_hasher.hash_one(item); + let keep = f(item); + if keep { + if in_index != out_index { + self.index_table + .replace(hash, |index| index == in_index, out_index) + .unwrap(); + } + out_index += 1; + } else { + self.index_table + .find_entry(hash, |index| index == in_index) + .unwrap() + .remove(); + } + in_index += 1; + keep + }) + } +} + +impl PartialEq> + for StableSet +{ + fn eq(&self, other: &StableSet) -> bool { + self.len() == other.len() && self.is_subset(other) + } +} + +impl Eq for StableSet {} + +/// An iterator that moves out of a set. +/// +/// This struct is created by the `into_iter` method on [`StableSet`]. +pub struct IntoIter { + inner: std::vec::IntoIter, +} +impl Iterator for IntoIter { + type Item = T; + impl_iterator!(); +} +impl IntoIterator for StableSet { + type Item = T; + type IntoIter = IntoIter; + fn into_iter(self) -> Self::IntoIter { + IntoIter { + inner: self.items.into_iter(), + } + } +} + +impl<'a, T, S> IntoIterator for &'a StableSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An iterator that returns references into a set. +/// +/// This struct is created by the [`iter`](StableSet::iter) method on [`StableSet`]. +pub struct Iter<'a, T> { + inner: std::slice::Iter<'a, T>, +} +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + impl_iterator!(); +} + +impl StableSet { + /// Returns an iterator over the set. + /// + /// The iterator yields all items in order. + pub fn iter(&self) -> Iter<'_, T> { + Iter { + inner: self.items.iter(), + } + } +} + +impl Extend for StableSet { + fn extend>(&mut self, iter: IntoIter) { + let iter = iter.into_iter(); + let (lower_bound, _) = iter.size_hint(); + self.reserve(lower_bound); + for item in iter { + self.insert(item); + } + } +} + +impl FromIterator for StableSet { + fn from_iter>(iter: IntoIter) -> Self { + let iter = iter.into_iter(); + let (lower_bound, _) = iter.size_hint(); + let mut set = StableSet::with_capacity(lower_bound); + for item in iter { + set.insert(item); + } + set + } +} + +impl StableSet { + #[cfg(test)] + fn check(&self) { + assert!(self.index_table.len() == self.items.len()); + for (index, item) in self.items.iter().enumerate() { + let hash = self.build_hasher.hash_one(item); + assert_eq!(self.index_table.find(hash, |idx| idx == index), Some(index)); + } + } +} + +/// An iterator that removes items from a set, see [`drain`](StableSet::drain). +pub struct Drain<'a, T> { + inner: std::vec::Drain<'a, T>, +} +impl Iterator for Drain<'_, T> { + type Item = T; + impl_iterator!(); +} + +impl StableSet { + /// Returns an iterator yielding all elements with indices in the specified range and removes those elements from the set. + /// + /// Panics if the range is invalid or out of bounds. + /// + /// If the returned iterator is leaked, the set is left in an invalid state and can only be dropped. + /// Any other operations may panic or return incorrect results. + pub fn drain(&mut self, range: impl RangeBounds) -> Drain<'_, T> { + let range = simplify_range(range, self.items.len()); + self.index_table + .clear_range(range.start, range.end, self.items.len()); + Drain { + inner: self.items.drain(range), + } + } +} + +#[test] +fn test_foo() { + use std::hash::BuildHasherDefault; + use zwohash::ZwoHasher; + let mut set: StableSet> = (2..100).collect(); + let mut index = 0; + while let Some(&k) = set.get_index(index) { + set.retain(|&n| n % k != 0 || n == k); + index += 1; + } + dbg!(set.drain(5..9).collect::>()); + dbg!(&set); + set.check(); +} diff --git a/stable_set/src/util.rs b/stable_set/src/util.rs new file mode 100644 index 0000000..d3110a6 --- /dev/null +++ b/stable_set/src/util.rs @@ -0,0 +1,42 @@ +use std::ops::{Range, RangeBounds}; + +pub fn simplify_range(range: impl RangeBounds, len: usize) -> Range { + let lower = match range.start_bound() { + std::ops::Bound::Unbounded => 0, + std::ops::Bound::Included(&n) => n, + std::ops::Bound::Excluded(&n) => n.checked_add(1).expect("start point of range too large"), + }; + let upper = match range.end_bound() { + std::ops::Bound::Unbounded => len, + std::ops::Bound::Included(&n) => n.checked_add(1).expect("end point of range too large"), + std::ops::Bound::Excluded(&n) => n, + }; + assert!(lower < len, "start point {lower} of range is >= length {len}"); + assert!(upper <= len, "end point {upper} of range is > length {len}"); + assert!(lower <= upper, "start point {lower} is larger than end point {upper}"); + lower..upper +} + +macro_rules! impl_iterator { + () => { + impl_iterator!(|x| x); + }; + ($f: expr) => { + fn next(&mut self) -> Option { + self.inner.next().map($f) + } + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + fn count(self) -> usize { + self.inner.count() + } + fn nth(&mut self, n: usize) -> Option { + self.inner.nth(n).map($f) + } + fn last(self) -> Option { + self.inner.last().map($f) + } + }; +} +pub(crate) use impl_iterator; From 26c2096b8b1ac986c723ba338790e50b1a1a372a Mon Sep 17 00:00:00 2001 From: Emily Schmidt Date: Thu, 10 Oct 2024 08:55:41 +0100 Subject: [PATCH 2/6] stable_set: small fixes, add tests --- Cargo.lock | 11 ++++++ stable_set/Cargo.toml | 2 ++ stable_set/src/stable_map.rs | 39 ++++++++++++---------- stable_set/src/stable_set.rs | 18 ++-------- stable_set/src/test_map.rs | 65 ++++++++++++++++++++++++++++++++++++ stable_set/src/test_set.rs | 24 +++++++++++++ 6 files changed, 127 insertions(+), 32 deletions(-) create mode 100644 stable_set/src/test_map.rs create mode 100644 stable_set/src/test_set.rs diff --git a/Cargo.lock b/Cargo.lock index 9cdde98..6f5443c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -736,6 +736,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_pcg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e" +dependencies = [ + "rand_core", +] + [[package]] name = "raw-cpuid" version = "11.1.0" @@ -847,6 +856,8 @@ name = "stable_set" version = "0.1.0" dependencies = [ "hashbrown", + "rand", + "rand_pcg", "zwohash", ] diff --git a/stable_set/Cargo.toml b/stable_set/Cargo.toml index e2479ce..43a15ec 100644 --- a/stable_set/Cargo.toml +++ b/stable_set/Cargo.toml @@ -13,3 +13,5 @@ hashbrown = { version = "0.14.5", default-features = false, features = ["inline- [dev-dependencies] zwohash = "0.1.2" +rand = "0.8.5" +rand_pcg = "0.3.1" \ No newline at end of file diff --git a/stable_set/src/stable_map.rs b/stable_set/src/stable_map.rs index a9a17d8..f7155cf 100644 --- a/stable_set/src/stable_map.rs +++ b/stable_set/src/stable_map.rs @@ -7,6 +7,9 @@ use core::hash::Hash; use index_table::IndexTable; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; +#[path = "test_map.rs"] +mod test_map; + /// A hash map that maintains the order of its elements. #[derive(Clone)] pub struct StableMap { @@ -126,6 +129,9 @@ impl StableMap { /// /// If there was no previous value, the key-value pair is inserted at the end. pub fn insert_full(&mut self, key: K, value: V) -> (usize, Option) { + self.index_table.grow_for(self.items.len(), |index| { + self.build_hasher.hash_one(&self.items[index].0) + }); let hash = self.build_hasher.hash_one(&key); match self.index_table.entry( hash, @@ -413,6 +419,18 @@ impl<'a, K, V, S> IntoIterator for &'a mut StableMap { } } +impl FromIterator<(K, V)> for StableMap { + fn from_iter>(iter: Iter) -> Self { + let iter = iter.into_iter(); + let (lower_bound, _) = iter.size_hint(); + let mut map = StableMap::with_capacity(lower_bound); + for (k, v) in iter { + map.insert(k, v); + } + map + } +} + /// A vacant entry in a [`StableMap`]. pub struct VacantEntry<'a, K, V, S> { entry: index_table::VacantEntry<'a>, @@ -440,6 +458,10 @@ pub enum Entry<'a, K, V, S> { impl StableMap { /// Returns the entry corresponding to the given key, allowing for insertion and/or in-place mutation. pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S> { + // make sure we can insert an entry. do this now because we don't have access to index_table later. + self.index_table.grow_for(self.items.len(), |index| { + self.build_hasher.hash_one(&self.items[index].0) + }); let hash = self.build_hasher.hash_one(&key); match self.index_table.entry( hash, @@ -599,20 +621,3 @@ impl StableMap { } } } - -#[test] -fn test() { - use std::hash::BuildHasherDefault; - use zwohash::ZwoHasher; - let mut map: StableMap> = StableMap::default(); - map.insert("adam".into(), 10); - map.insert("eve".into(), 25); - map.insert("mallory".into(), 8); - map.insert("jim".into(), 14); - match map.entry("eve".to_string()) { - Entry::Vacant(_) => unreachable!(), - Entry::Occupied(entry) => entry.swap_remove(), - }; - dbg!(&map); - map.check(); -} diff --git a/stable_set/src/stable_set.rs b/stable_set/src/stable_set.rs index 4fa5916..87fee92 100644 --- a/stable_set/src/stable_set.rs +++ b/stable_set/src/stable_set.rs @@ -7,6 +7,9 @@ use core::hash::Hash; use index_table::IndexTable; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; +#[path = "test_set.rs"] +mod test_set; + /// A hash set that maintains the order of its elements. #[derive(Clone)] pub struct StableSet { @@ -402,18 +405,3 @@ impl StableSet { } } } - -#[test] -fn test_foo() { - use std::hash::BuildHasherDefault; - use zwohash::ZwoHasher; - let mut set: StableSet> = (2..100).collect(); - let mut index = 0; - while let Some(&k) = set.get_index(index) { - set.retain(|&n| n % k != 0 || n == k); - index += 1; - } - dbg!(set.drain(5..9).collect::>()); - dbg!(&set); - set.check(); -} diff --git a/stable_set/src/test_map.rs b/stable_set/src/test_map.rs new file mode 100644 index 0000000..7534bad --- /dev/null +++ b/stable_set/src/test_map.rs @@ -0,0 +1,65 @@ +#![cfg(test)] +#![allow(missing_docs)] +use crate::StableMap; +use rand::prelude::*; +use std::hash::BuildHasherDefault; +use zwohash::ZwoHasher; +use std::hash::Hash; + +type ZwoMap = StableMap>; + +fn dedup_vec(vec: &mut Vec<(K, V)>) { + let mut seen = std::collections::HashSet::new(); + vec.retain(|item| seen.insert(item.0.clone())); +} + +#[test] +fn test_basic() { + let mut map: ZwoMap = Default::default(); + map.insert("adam".into(), 10); + map.insert("eve".into(), 23); + map.insert("mallory".into(), 40); + map.insert("jim".into(), 5); + assert_eq!(map.get("adam").copied(), Some(10)); + assert_eq!(map.get_index_of("eve"), Some(1)); + assert_eq!(map.get_full("jim"), Some((3, &"jim".into(), &5))); + assert_eq!(map.get_index(2), Some((&"mallory".into(), &40))); + assert_eq!(map.insert_full("jim".into(), 15), (3, Some(5))); + assert_eq!(map.swap_remove("eve"), Some(23)); + assert_eq!(map.get_index(1), Some((&"jim".into(), &15))); + assert_eq!(map.keys().collect::>(), ["adam", "jim", "mallory"]); + assert_eq!(map.values().collect::>(), [&10, &15, &40]); + assert_eq!(map.len(), 3); + assert!(!map.is_empty()); + map.check(); +} + +#[test] +fn test_retain() { + let mut rng = rand_pcg::Pcg64::seed_from_u64(10); + let mut in_data: Vec<(u32, u64)> = (0..1000).map(|_| (rng.gen(), rng.gen())).collect(); + dedup_vec(&mut in_data); + let mut map: ZwoMap = in_data.iter().copied().collect(); + map.retain(|key, value| (*key as u64).wrapping_add(*value) % 7 == 4); + map.check(); + let mut out_data = in_data.clone(); + out_data.retain(|&(key, value)| (key as u64).wrapping_add(value) % 7 == 4); + assert_eq!( + map.iter().map(|(&k, &v)| (k, v)).collect::>(), + out_data + ); +} + +#[test] +fn test_drain() { + let mut rng = rand_pcg::Pcg64::seed_from_u64(11); + let mut in_data: Vec<(u32, u64)> = (0..1000).map(|_| (rng.gen(), rng.gen())).collect(); + dedup_vec(&mut in_data); + let mut map: ZwoMap = in_data.iter().copied().collect(); + let map_drain = map.drain(100..150); + let mut out_data = in_data.clone(); + let out_data_drain = out_data.drain(100..150); + assert!(map_drain.eq(out_data_drain)); + map.check(); + assert_eq!(map.iter().map(|(&k, &v)| (k, v)).collect::>(), out_data); +} \ No newline at end of file diff --git a/stable_set/src/test_set.rs b/stable_set/src/test_set.rs new file mode 100644 index 0000000..d4f5925 --- /dev/null +++ b/stable_set/src/test_set.rs @@ -0,0 +1,24 @@ +#![cfg(test)] +#![allow(missing_docs)] +use crate::StableSet; +use std::hash::BuildHasherDefault; +use zwohash::ZwoHasher; + +type ZwoSet = StableSet>; + +#[test] +fn test_primes() { + let mut set: ZwoSet = (2..200).collect(); + let mut ref_primes = [ + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, + 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, + 191, 193, 197, 199 + ].into_iter(); + while let Some(&k) = set.first() { + assert_eq!(ref_primes.next(), Some(k)); + set.retain(|&n| n % k != 0); + set.check(); + } + assert_eq!(ref_primes.next(), None); + assert!(set.is_empty()); +} \ No newline at end of file From e859e6576dee7f054af612b9728359edc6c84f84 Mon Sep 17 00:00:00 2001 From: Emily Schmidt Date: Wed, 16 Oct 2024 15:57:20 +0100 Subject: [PATCH 3/6] stable_set: make small index type generic for testing, write better tests for the set type --- Cargo.lock | 27 ++-- stable_set/Cargo.toml | 3 +- stable_set/src/index_table.rs | 134 +++++++++++-------- stable_set/src/stable_map.rs | 56 ++++---- stable_set/src/stable_set.rs | 56 ++++---- stable_set/src/test_set.rs | 234 +++++++++++++++++++++++++++++++++- 6 files changed, 392 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f5443c..4a8b6f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -319,6 +319,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + [[package]] name = "heck" version = "0.5.0" @@ -360,7 +366,7 @@ dependencies = [ "clap", "flussab", "flussab-aiger", - "hashbrown", + "hashbrown 0.14.5", "imctk-derive", "imctk-ids", "imctk-ir", @@ -392,7 +398,7 @@ dependencies = [ "color-eyre", "flussab", "flussab-aiger", - "hashbrown", + "hashbrown 0.14.5", "imctk-abc", "imctk-abc-sys", "imctk-aiger", @@ -427,7 +433,7 @@ dependencies = [ name = "imctk-ids" version = "0.1.0" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", "imctk-derive", "imctk-transparent", "rand", @@ -449,7 +455,7 @@ name = "imctk-ir" version = "0.1.0" dependencies = [ "flussab-aiger", - "hashbrown", + "hashbrown 0.14.5", "imctk-derive", "imctk-ids", "imctk-lit", @@ -465,7 +471,7 @@ name = "imctk-lit" version = "0.1.0" dependencies = [ "flussab-aiger", - "hashbrown", + "hashbrown 0.14.5", "imctk-derive", "imctk-ids", "imctk-transparent", @@ -508,12 +514,12 @@ checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" [[package]] name = "indexmap" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.0", ] [[package]] @@ -855,7 +861,8 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" name = "stable_set" version = "0.1.0" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", + "indexmap", "rand", "rand_pcg", "zwohash", @@ -882,7 +889,7 @@ dependencies = [ name = "table_seq" version = "0.1.0" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", "zwohash", ] diff --git a/stable_set/Cargo.toml b/stable_set/Cargo.toml index 43a15ec..3ba14ce 100644 --- a/stable_set/Cargo.toml +++ b/stable_set/Cargo.toml @@ -14,4 +14,5 @@ hashbrown = { version = "0.14.5", default-features = false, features = ["inline- [dev-dependencies] zwohash = "0.1.2" rand = "0.8.5" -rand_pcg = "0.3.1" \ No newline at end of file +rand_pcg = "0.3.1" +indexmap = "2.6.0" \ No newline at end of file diff --git a/stable_set/src/index_table.rs b/stable_set/src/index_table.rs index 88cadee..58f2244 100644 --- a/stable_set/src/index_table.rs +++ b/stable_set/src/index_table.rs @@ -1,71 +1,91 @@ use hashbrown::hash_table::HashTable; +/// Trait for a small index type. You can assume `T: SmallIndex` means `T = u32` most of the time. +/// +/// The only exception is for testing this crate, where we use `T = u8`. +/// +/// This is basically `TryInto + TryFrom` with some convenience methods +pub trait SmallIndex: Copy + std::fmt::Debug + TryInto + TryFrom { + fn into_usize(self) -> usize { + // the unreachable! should be straightforwardly impossible bc the conversion is infallible + // it would be nice if we could just rely on Into existing + // but std doesn't define it for u32 because you might be on a 16-bit platform... + self.try_into().unwrap_or_else(|_| unreachable!()) + } + fn from_usize_unwrap(index: usize) -> Self { + // unfortunately .try_into().unwrap() doesn't work in the generic case + // because TryFrom::Error might not be Debug + index + .try_into() + .unwrap_or_else(|_| panic!("shouldn't happen: small index overflow in IndexTable")) + } +} + +impl SmallIndex for u8 {} +impl SmallIndex for u16 {} +impl SmallIndex for u32 {} +impl SmallIndex for usize {} + +// We could just use u32 directly for the small index, +// but this is slightly less error-prone and allows us to use a smaller type for testing. #[derive(Debug, Clone)] -pub enum IndexTable { - Small(HashTable), +pub enum IndexTable { + Small(HashTable), Large(HashTable), } #[derive(Debug)] -pub enum OccupiedEntry<'a> { - Small(hashbrown::hash_table::OccupiedEntry<'a, u32>), +pub enum OccupiedEntry<'a, S> { + Small(hashbrown::hash_table::OccupiedEntry<'a, S>), Large(hashbrown::hash_table::OccupiedEntry<'a, usize>), } #[derive(Debug)] -pub enum VacantEntry<'a> { - Small(hashbrown::hash_table::VacantEntry<'a, u32>), +pub enum VacantEntry<'a, S> { + Small(hashbrown::hash_table::VacantEntry<'a, S>), Large(hashbrown::hash_table::VacantEntry<'a, usize>), } #[derive(Debug)] #[allow(dead_code)] -pub enum AbsentEntry<'a> { - Small(hashbrown::hash_table::AbsentEntry<'a, u32>), +pub enum AbsentEntry<'a, S> { + Small(hashbrown::hash_table::AbsentEntry<'a, S>), Large(hashbrown::hash_table::AbsentEntry<'a, usize>), } #[derive(Debug)] #[allow(dead_code)] -pub enum Entry<'a> { - Occupied(OccupiedEntry<'a>), - Vacant(VacantEntry<'a>), +pub enum Entry<'a, S> { + Occupied(OccupiedEntry<'a, S>), + Vacant(VacantEntry<'a, S>), } -impl Default for IndexTable { +impl Default for IndexTable { fn default() -> Self { IndexTable::Small(HashTable::new()) } } -impl IndexTable { +impl IndexTable { pub fn with_capacity(capacity: usize) -> Self { - if Self::try_as_small(capacity).is_ok() { + if S::try_from(capacity).is_ok() { IndexTable::Small(HashTable::with_capacity(capacity)) } else { IndexTable::Large(HashTable::with_capacity(capacity)) } } #[inline(always)] - fn as_small(index: usize) -> u32 { - Self::try_as_small(index).unwrap() - } - #[inline(always)] - fn try_as_small(index: usize) -> Result { - u32::try_from(index) - } - #[inline(always)] pub fn entry( &mut self, hash: u64, mut eq: impl FnMut(usize) -> bool, hasher: impl Fn(usize) -> u64, - ) -> Entry<'_> { + ) -> Entry<'_, S> { match self { IndexTable::Small(table) => match table.entry( hash, - |&index| eq(index as usize), - |&index| hasher(index as usize), + |&index| eq(index.into_usize()), + |&index| hasher(index.into_usize()), ) { hashbrown::hash_table::Entry::Occupied(entry) => { Entry::Occupied(OccupiedEntry::Small(entry)) @@ -91,12 +111,14 @@ impl IndexTable { &mut self, hash: u64, mut eq: impl FnMut(usize) -> bool, - ) -> Result, AbsentEntry<'_>> { + ) -> Result, AbsentEntry<'_, S>> { match self { - IndexTable::Small(table) => match table.find_entry(hash, |&index| eq(index as usize)) { - Ok(entry) => Ok(OccupiedEntry::Small(entry)), - Err(entry) => Err(AbsentEntry::Small(entry)), - }, + IndexTable::Small(table) => { + match table.find_entry(hash, |&index| eq(index.into_usize())) { + Ok(entry) => Ok(OccupiedEntry::Small(entry)), + Err(entry) => Err(AbsentEntry::Small(entry)), + } + } IndexTable::Large(table) => match table.find_entry(hash, |&index| eq(index)) { Ok(entry) => Ok(OccupiedEntry::Large(entry)), Err(entry) => Err(AbsentEntry::Large(entry)), @@ -107,8 +129,8 @@ impl IndexTable { pub fn find(&self, hash: u64, mut eq: impl FnMut(usize) -> bool) -> Option { match self { IndexTable::Small(table) => table - .find(hash, |&index| eq(index as usize)) - .map(|&index| index as usize), + .find(hash, |&index| eq(index.into_usize())) + .map(|&index| index.into_usize()), IndexTable::Large(table) => table.find(hash, |&index| eq(index)).copied(), } } @@ -121,9 +143,13 @@ impl IndexTable { value: usize, ) -> Option { match self { - IndexTable::Small(table) => table - .find_mut(hash, |&index| eq(index as usize)) - .map(|index_ref| std::mem::replace(index_ref, Self::as_small(value)) as usize), + IndexTable::Small(table) => { + table + .find_mut(hash, |&index| eq(index.into_usize())) + .map(|index_ref| { + std::mem::replace(index_ref, S::from_usize_unwrap(value)).into_usize() + }) + } IndexTable::Large(table) => table .find_mut(hash, |&index| eq(index)) .map(|index_ref| std::mem::replace(index_ref, value)), @@ -138,7 +164,7 @@ impl IndexTable { } #[inline(always)] pub fn grow_for(&mut self, index: usize, hasher: impl Fn(usize) -> u64) { - if Self::try_as_small(index).is_err() && self.is_small() { + if S::try_from(index).is_err() && self.is_small() { self.grow_cold(hasher) } } @@ -155,7 +181,7 @@ impl IndexTable { }; new_table.reserve(old_table.len(), |&j| hasher(j)); for i in old_table { - new_table.insert_unique(hasher(i as usize), i as usize, |&j| hasher(j)); + new_table.insert_unique(hasher(i.into_usize()), i.into_usize(), |&j| hasher(j)); } } #[inline(always)] @@ -175,9 +201,9 @@ impl IndexTable { #[inline(always)] pub fn retain(&mut self, mut f: impl FnMut(usize) -> Option) { match self { - IndexTable::Small(table) => table.retain(|index| match f(*index as usize) { + IndexTable::Small(table) => table.retain(|index| match f((*index).into_usize()) { Some(new_index) => { - *index = Self::as_small(new_index); + *index = S::from_usize_unwrap(new_index); true } None => false, @@ -195,7 +221,9 @@ impl IndexTable { pub fn reserve(&mut self, additional: usize, hasher: impl Fn(usize) -> u64) { self.grow_for((self.len() + additional).saturating_sub(1), &hasher); match self { - IndexTable::Small(table) => table.reserve(additional, |&index| hasher(index as usize)), + IndexTable::Small(table) => { + table.reserve(additional, |&index| hasher(index.into_usize())) + } IndexTable::Large(table) => table.reserve(additional, |&index| hasher(index)), } } @@ -217,12 +245,12 @@ impl IndexTable { } } -impl<'a> VacantEntry<'a> { +impl<'a, S: SmallIndex> VacantEntry<'a, S> { #[inline(always)] - pub fn insert(self, index: usize) -> OccupiedEntry<'a> { + pub fn insert(self, index: usize) -> OccupiedEntry<'a, S> { match self { VacantEntry::Small(entry) => { - OccupiedEntry::Small(entry.insert(IndexTable::as_small(index))) + OccupiedEntry::Small(entry.insert(S::from_usize_unwrap(index))) } VacantEntry::Large(entry) => OccupiedEntry::Large(entry.insert(index)), } @@ -237,30 +265,34 @@ impl<'a> VacantEntry<'a> { value: usize, ) -> Option { match self { - VacantEntry::Small(entry) => entry.into_table() - .find_mut(hash, |&index| eq(index as usize)) - .map(|index_ref| std::mem::replace(index_ref, IndexTable::as_small(value)) as usize), - VacantEntry::Large(entry) => entry.into_table() + VacantEntry::Small(entry) => entry + .into_table() + .find_mut(hash, |&index| eq(index.into_usize())) + .map(|index_ref| { + std::mem::replace(index_ref, S::from_usize_unwrap(value)).into_usize() + }), + VacantEntry::Large(entry) => entry + .into_table() .find_mut(hash, |&index| eq(index)) .map(|index_ref| std::mem::replace(index_ref, value)), } } } -impl<'a> OccupiedEntry<'a> { +impl<'a, S: SmallIndex> OccupiedEntry<'a, S> { #[inline(always)] pub fn get(&self) -> usize { match self { - OccupiedEntry::Small(entry) => *entry.get() as usize, + OccupiedEntry::Small(entry) => (*entry.get()).into_usize(), OccupiedEntry::Large(entry) => *entry.get(), } } #[inline(always)] - pub fn remove(self) -> (usize, VacantEntry<'a>) { + pub fn remove(self) -> (usize, VacantEntry<'a, S>) { match self { OccupiedEntry::Small(entry) => { let (index, entry) = entry.remove(); - (index as usize, VacantEntry::Small(entry)) + (index.into_usize(), VacantEntry::Small(entry)) } OccupiedEntry::Large(entry) => { let (index, entry) = entry.remove(); diff --git a/stable_set/src/stable_map.rs b/stable_set/src/stable_map.rs index f7155cf..749b462 100644 --- a/stable_set/src/stable_map.rs +++ b/stable_set/src/stable_map.rs @@ -4,7 +4,7 @@ use crate::{ util::{impl_iterator, simplify_range}, }; use core::hash::Hash; -use index_table::IndexTable; +use index_table::{IndexTable, SmallIndex}; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; #[path = "test_map.rs"] @@ -12,13 +12,13 @@ mod test_map; /// A hash map that maintains the order of its elements. #[derive(Clone)] -pub struct StableMap { - index_table: IndexTable, +pub struct StableMap { + index_table: IndexTable, items: Vec<(K, V)>, build_hasher: S, } -impl Default for StableMap { +impl Default for StableMap { fn default() -> Self { StableMap { index_table: IndexTable::default(), @@ -27,7 +27,7 @@ impl Default for StableMap { } } } -impl StableMap { +impl StableMap { /// Returns an empty map. pub fn new() -> Self { Self::default() @@ -42,7 +42,7 @@ impl StableMap { } } -impl StableMap { +impl StableMap { /// Returns an empty map with the provided BuildHasher. pub fn with_hasher(build_hasher: S) -> Self { StableMap { @@ -61,13 +61,13 @@ impl StableMap { } } -impl std::fmt::Debug for StableMap { +impl std::fmt::Debug for StableMap { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_map().entries(self.iter()).finish() } } -impl StableMap { +impl StableMap { /// Returns the number of items in the map. pub fn len(&self) -> usize { self.items.len() @@ -123,7 +123,7 @@ impl StableMap { } } -impl StableMap { +impl StableMap { /// Inserts `value` at `key`, replacing any previous value. /// Returns the index of the item and any previous value. /// @@ -159,7 +159,7 @@ impl StableMap { } } -impl StableMap { +impl StableMap { /// Returns the index of the entry with the specified key, if it exists. pub fn get_index_of(&self, key: &Q) -> Option where @@ -384,7 +384,7 @@ impl Iterator for IntoIter { type Item = (K, V); impl_iterator!(); } -impl IntoIterator for StableMap { +impl IntoIterator for StableMap { type Item = (K, V); type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { @@ -404,14 +404,14 @@ impl Iterator for Drain<'_, K, V> { impl_iterator!(); } -impl<'a, K, V, S> IntoIterator for &'a StableMap { +impl<'a, K, V, S, W: SmallIndex> IntoIterator for &'a StableMap { type Item = (&'a K, &'a V); type IntoIter = Iter<'a, K, V>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl<'a, K, V, S> IntoIterator for &'a mut StableMap { +impl<'a, K, V, S, W: SmallIndex> IntoIterator for &'a mut StableMap { type Item = (&'a K, &'a mut V); type IntoIter = IterMut<'a, K, V>; fn into_iter(self) -> Self::IntoIter { @@ -419,7 +419,7 @@ impl<'a, K, V, S> IntoIterator for &'a mut StableMap { } } -impl FromIterator<(K, V)> for StableMap { +impl FromIterator<(K, V)> for StableMap { fn from_iter>(iter: Iter) -> Self { let iter = iter.into_iter(); let (lower_bound, _) = iter.size_hint(); @@ -432,8 +432,8 @@ impl FromIterator<(K, V)> for StableM } /// A vacant entry in a [`StableMap`]. -pub struct VacantEntry<'a, K, V, S> { - entry: index_table::VacantEntry<'a>, +pub struct VacantEntry<'a, K, V, S, W> { + entry: index_table::VacantEntry<'a, W>, key: K, items: &'a mut Vec<(K, V)>, #[allow(dead_code)] // for consistency with OccupiedEntry @@ -441,23 +441,23 @@ pub struct VacantEntry<'a, K, V, S> { } /// An occupied entry in a [`StableMap`]. -pub struct OccupiedEntry<'a, K, V, S> { - entry: index_table::OccupiedEntry<'a>, +pub struct OccupiedEntry<'a, K, V, S, W> { + entry: index_table::OccupiedEntry<'a, W>, items: &'a mut Vec<(K, V)>, build_hasher: &'a S, } /// An entry in a [`StableMap`]. -pub enum Entry<'a, K, V, S> { +pub enum Entry<'a, K, V, S, W> { /// A vacant entry. - Vacant(VacantEntry<'a, K, V, S>), + Vacant(VacantEntry<'a, K, V, S, W>), /// An occupied entry. - Occupied(OccupiedEntry<'a, K, V, S>), + Occupied(OccupiedEntry<'a, K, V, S, W>), } -impl StableMap { +impl StableMap { /// Returns the entry corresponding to the given key, allowing for insertion and/or in-place mutation. - pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S> { + pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S, W> { // make sure we can insert an entry. do this now because we don't have access to index_table later. self.index_table.grow_for(self.items.len(), |index| { self.build_hasher.hash_one(&self.items[index].0) @@ -483,7 +483,7 @@ impl StableMap { } } -impl<'a, K, V, S> VacantEntry<'a, K, V, S> { +impl<'a, K, V, S, W: SmallIndex> VacantEntry<'a, K, V, S, W> { /// Returns a reference to the key that would be used for insertion. pub fn key(&self) -> &K { &self.key @@ -505,7 +505,7 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { } } -impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { +impl<'a, K, V, S, W: SmallIndex> OccupiedEntry<'a, K, V, S, W> { /// Returns a reference to the key of the entry. pub fn key(&self) -> &K { &self.items[self.index()].0 @@ -530,7 +530,7 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { } } -impl OccupiedEntry<'_, K, V, S> { +impl OccupiedEntry<'_, K, V, S, W> { /// Removes the entry from the map and returns the key-value pair. /// /// The last entry in the map is put in its place. @@ -553,7 +553,7 @@ impl OccupiedEntry<'_, K, V, S> { } } -impl<'a, K, V, S> Entry<'a, K, V, S> { +impl<'a, K, V, S, W: SmallIndex> Entry<'a, K, V, S, W> { /// Returns a refernece to the key of the entry. pub fn key(&self) -> &K { match self { @@ -611,7 +611,7 @@ impl<'a, K, V, S> Entry<'a, K, V, S> { } } -impl StableMap { +impl StableMap { #[cfg(test)] fn check(&self) { assert!(self.index_table.len() == self.items.len()); diff --git a/stable_set/src/stable_set.rs b/stable_set/src/stable_set.rs index 87fee92..585e430 100644 --- a/stable_set/src/stable_set.rs +++ b/stable_set/src/stable_set.rs @@ -4,27 +4,32 @@ use crate::{ util::{impl_iterator, simplify_range}, }; use core::hash::Hash; -use index_table::IndexTable; +use index_table::{IndexTable, SmallIndex}; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; #[path = "test_set.rs"] mod test_set; /// A hash set that maintains the order of its elements. +/// +/// In `StableSet`, +/// `T: Hash + Eq` is the type of elements of the set, +/// `S: BuildHasher` is used for hashing elements +/// and `W: SmallIndex` is the type used for small indices internally (`W` should usually be omitted, it then defaults to `u32`). #[derive(Clone)] -pub struct StableSet { - index_table: IndexTable, +pub struct StableSet { + index_table: IndexTable, items: Vec, build_hasher: S, } -impl std::fmt::Debug for StableSet { +impl std::fmt::Debug for StableSet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_set().entries(self).finish() } } -impl Default for StableSet { +impl Default for StableSet { fn default() -> Self { StableSet { index_table: IndexTable::default(), @@ -34,7 +39,7 @@ impl Default for StableSet { } } -impl StableSet { +impl StableSet { /// Returns an empty set. pub fn new() -> Self { Self::default() @@ -49,7 +54,7 @@ impl StableSet { } } -impl StableSet { +impl StableSet { /// Returns an empty set with the provided BuildHasher. pub fn with_hasher(build_hasher: S) -> Self { StableSet { @@ -103,7 +108,7 @@ impl StableSet { } } -impl StableSet { +impl StableSet { /// Reserve memory for an extra `additional` items. pub fn reserve(&mut self, additional: usize) { self.items.reserve(additional); @@ -113,7 +118,7 @@ impl StableSet { } } -impl StableSet { +impl StableSet { /// Removes the last item from the set and returns it, if it exists. pub fn pop(&mut self) -> Option { let opt_item = self.items.pop(); @@ -129,6 +134,11 @@ impl StableSet { /// Inserts an item to the end of the set, unless the set contains an equivalent item already. /// Returns `true` if the item was inserted. pub fn insert(&mut self, value: T) -> bool { + self.insert_full(value).1 + } + /// Inserts an item to the end of the set, unless the set contains an equivalent item already. + /// Returns the index of the existing or new item, and `true` if the given item was inserted. + pub fn insert_full(&mut self, value: T) -> (usize, bool) { self.index_table.grow_for(self.items.len(), |index| { self.build_hasher.hash_one(&self.items[index]) }); @@ -138,12 +148,12 @@ impl StableSet { |index| self.items[index] == value, |index| self.build_hasher.hash_one(&self.items[index]), ) { - index_table::Entry::Occupied(_) => false, + index_table::Entry::Occupied(entry) => (entry.get(), false), index_table::Entry::Vacant(entry) => { let new_index = self.items.len(); self.items.push(value); entry.insert(new_index); - true + (new_index, true) } } } @@ -253,7 +263,7 @@ impl StableSet { /// Returns `true` if the set is a subset of `other`. /// /// The order of elements is ignored. - pub fn is_subset(&self, other: &StableSet) -> bool { + pub fn is_subset(&self, other: &StableSet) -> bool { self.len() <= other.len() && self.iter().all(|t| other.contains(t)) } /// Removes all items from the set for which `f` evaluates to `false`. @@ -286,15 +296,15 @@ impl StableSet { } } -impl PartialEq> - for StableSet +impl PartialEq> + for StableSet { - fn eq(&self, other: &StableSet) -> bool { + fn eq(&self, other: &StableSet) -> bool { self.len() == other.len() && self.is_subset(other) } } -impl Eq for StableSet {} +impl Eq for StableSet {} /// An iterator that moves out of a set. /// @@ -306,7 +316,7 @@ impl Iterator for IntoIter { type Item = T; impl_iterator!(); } -impl IntoIterator for StableSet { +impl IntoIterator for StableSet { type Item = T; type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { @@ -316,7 +326,7 @@ impl IntoIterator for StableSet { } } -impl<'a, T, S> IntoIterator for &'a StableSet { +impl<'a, T, S, W> IntoIterator for &'a StableSet { type Item = &'a T; type IntoIter = Iter<'a, T>; fn into_iter(self) -> Self::IntoIter { @@ -335,7 +345,7 @@ impl<'a, T> Iterator for Iter<'a, T> { impl_iterator!(); } -impl StableSet { +impl StableSet { /// Returns an iterator over the set. /// /// The iterator yields all items in order. @@ -346,7 +356,7 @@ impl StableSet { } } -impl Extend for StableSet { +impl Extend for StableSet { fn extend>(&mut self, iter: IntoIter) { let iter = iter.into_iter(); let (lower_bound, _) = iter.size_hint(); @@ -357,7 +367,7 @@ impl Extend for StableSet { } } -impl FromIterator for StableSet { +impl FromIterator for StableSet { fn from_iter>(iter: IntoIter) -> Self { let iter = iter.into_iter(); let (lower_bound, _) = iter.size_hint(); @@ -369,7 +379,7 @@ impl FromIterator for StableSet } } -impl StableSet { +impl StableSet { #[cfg(test)] fn check(&self) { assert!(self.index_table.len() == self.items.len()); @@ -389,7 +399,7 @@ impl Iterator for Drain<'_, T> { impl_iterator!(); } -impl StableSet { +impl StableSet { /// Returns an iterator yielding all elements with indices in the specified range and removes those elements from the set. /// /// Panics if the range is invalid or out of bounds. diff --git a/stable_set/src/test_set.rs b/stable_set/src/test_set.rs index d4f5925..6f5afd4 100644 --- a/stable_set/src/test_set.rs +++ b/stable_set/src/test_set.rs @@ -1,10 +1,233 @@ #![cfg(test)] #![allow(missing_docs)] use crate::StableSet; -use std::hash::BuildHasherDefault; +use indexmap::IndexSet; +use rand::prelude::*; +use std::{ + borrow::Borrow, + hash::{BuildHasherDefault, Hash}, +}; use zwohash::ZwoHasher; -type ZwoSet = StableSet>; +// use u8 for the small indices to exercise the large index code +type ZwoSet = StableSet, u8>; + +struct CheckedSet { + dut: ZwoSet, + ref_set: IndexSet, +} + +impl CheckedSet { + fn new() -> Self { + CheckedSet { + dut: ZwoSet::new(), + ref_set: IndexSet::new(), + } + } + fn get_full(&mut self, value: &Q) -> Option<(usize, &T)> + where + T: Borrow, + Q: Hash + Eq, + { + let ref_result = self.ref_set.get_full(value); + let dut_result = self.dut.get_full(value); + assert_eq!(ref_result, dut_result); + ref_result + } + fn insert_full(&mut self, value: T) -> (usize, bool) { + let ref_result = self.ref_set.insert_full(value.clone()); + let dut_result = self.dut.insert_full(value); + assert_eq!(ref_result, dut_result); + ref_result + } + fn swap_remove_full(&mut self, value: &Q) -> Option<(usize, T)> + where + T: Borrow, + Q: Hash + Eq, + { + let ref_result = self.ref_set.swap_remove_full(value); + let dut_result = self.dut.swap_remove_full(value); + assert_eq!(ref_result, dut_result); + ref_result + } + fn swap_remove_index(&mut self, index: usize) -> Option { + let ref_result = self.ref_set.swap_remove_index(index); + let dut_result = self.dut.swap_remove_index(index); + assert_eq!(ref_result, dut_result); + ref_result + } + fn pop(&mut self) -> Option { + let ref_result = self.ref_set.pop(); + let dut_result = self.dut.pop(); + assert_eq!(ref_result, dut_result); + ref_result + } + fn retain(&mut self, fun: impl Fn(&T) -> bool) { + let mut ref_iter = self.ref_set.iter(); + self.dut.retain(|item| { + // make sure that retain visits in the correct order + assert_eq!(Some(item), ref_iter.next()); + fun(item) + }); + assert_eq!(None, ref_iter.next()); + self.ref_set.retain(&fun); + self.check(); + } + fn check(&mut self) { + self.dut.check(); + assert!(self.ref_set.iter().eq(&self.dut)); + } + fn reserve(&mut self, additional: usize) { + self.dut.reserve(additional); + self.ref_set.reserve(additional); + } +} + +macro_rules! weighted_choose { + ($rng:expr, $($name:ident: $weight:expr => $body:expr),+) => { + { + enum Branches { $( $name, )* } + let weights = [$((Branches::$name, $weight)),+]; + match weights.choose_weighted($rng, |x| x.1).unwrap().0 { + $(Branches::$name => $body),* + } + } + } +} + +fn test_suite( + mut rand_t: impl FnMut(&mut R) -> T, + retain_fn: impl Fn(&T) -> bool, +) { + let mut set: CheckedSet = CheckedSet::new(); + let mut rng = R::seed_from_u64(25); + let mut max_size = 0; + let verbosity = 1; + for _ in 0..5000 { + weighted_choose! {&mut rng, + Insert: 1.0 => { + let item = rand_t(&mut rng); + let result = set.insert_full(item.clone()); + if verbosity > 0 { + println!("inserting {item:?} -> {result:?}"); + } + }, + GetPresent: 0.5 => { + if let Some(item) = set.ref_set.iter().choose(&mut rng).cloned() { + let result = set.get_full(&item); + if verbosity > 0 { + println!("getting {item:?} -> {result:?}"); + } + } + }, + GetRandom: 0.5 => { + let item = rand_t(&mut rng); + let result = set.get_full(&item); + if verbosity > 0 { + println!("getting {item:?} -> {result:?}"); + } + }, + RemovePresent: 0.3 => { + if let Some(item) = set.ref_set.iter().choose(&mut rng).cloned() { + let result = set.swap_remove_full(&item); + if verbosity > 0 { + println!("removing {item:?} -> {result:?}"); + } + } + }, + RemoveRandom: 0.5 => { + let item = rand_t(&mut rng); + let result = set.swap_remove_full(&item); + if verbosity > 0 { + println!("removing {item:?} -> {result:?}"); + } + }, + RemoveIndex: 0.2 => { + let len = set.ref_set.len(); + // try to generate invalid indices sometimes + let index = rng.gen_range(0..=(len + len.div_ceil(10))); + let result = set.swap_remove_index(index); + if verbosity > 0 { + println!("removing index {index:?} -> {result:?}"); + } + }, + Pop: 0.2 => { + let result = set.pop(); + if verbosity > 0 { + println!("popping -> {result:?}"); + } + }, + Retain: 0.05 => { + let old_len = set.ref_set.len(); + set.retain(&retain_fn); + let new_len = set.ref_set.len(); + if verbosity > 0 { + println!("retaining, {old_len} -> {new_len}"); + } + }, + Check: 0.15 => { + set.check(); + if verbosity > 0 { + println!("check"); + } + } + }; + max_size = std::cmp::max(max_size, set.ref_set.len()); + } + set.check(); + println!("max size: {max_size}"); +} + +#[test] +fn test_suite_usize() { + test_suite::( + |rng| rng.gen::() >> rng.gen_range(0..usize::BITS), + |item| item % 17 < 15, + ); +} + +#[test] +fn test_suite_boxed_usize() { + test_suite::, rand_pcg::Pcg64>( + |rng| Box::new(rng.gen::() >> rng.gen_range(0..usize::BITS)), + |item| **item % 17 < 15, + ); +} + +#[test] +fn test_suite_string() { + test_suite::( + |rng| { + let len = rng.gen_range(4..32); + String::from_iter((0..len).map(|_| rng.gen_range('!'..'~'))) + }, + |item| !item.contains('!') + ); +} + +#[test] +fn test_reserve() { + let mut rng = rand_pcg::Pcg64::seed_from_u64(58); + for size in [10, 50, 100, 200, 500, 1000] { + // just check if this doesn't cause a crash + let mut set = CheckedSet::::new(); + set.reserve(size); + for _ in 0..size + 5 { + set.insert_full(rng.gen()); + } + } +} + +#[test] +fn test_from_iter_extend() { + let rng_start = rand_pcg::Pcg64::seed_from_u64(58); + let mut rng = rng_start.clone(); + let mut set = (0..1000).map(|_| rng.gen()).collect::>(); + set.extend((0..1000).map(|_| rng.gen())); + set.check(); + rng = rng_start.clone(); + assert!(set.iter().copied().eq((0..2000).map(|_| rng.gen::()))); +} #[test] fn test_primes() { @@ -12,8 +235,9 @@ fn test_primes() { let mut ref_primes = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, - 191, 193, 197, 199 - ].into_iter(); + 191, 193, 197, 199, + ] + .into_iter(); while let Some(&k) = set.first() { assert_eq!(ref_primes.next(), Some(k)); set.retain(|&n| n % k != 0); @@ -21,4 +245,4 @@ fn test_primes() { } assert_eq!(ref_primes.next(), None); assert!(set.is_empty()); -} \ No newline at end of file +} From 8e147b21c618d52a8be9781319f9758d958b7e41 Mon Sep 17 00:00:00 2001 From: Emily Schmidt Date: Thu, 17 Oct 2024 09:35:19 +0100 Subject: [PATCH 4/6] stable_set: add tests for map, clean up tests --- stable_set/src/lib.rs | 7 +- stable_set/src/stable_map.rs | 15 +- stable_set/src/stable_set.rs | 5 +- stable_set/src/test_map.rs | 311 ++++++++++++++++++++++++++++++++++- stable_set/src/test_set.rs | 67 ++++---- 5 files changed, 351 insertions(+), 54 deletions(-) diff --git a/stable_set/src/lib.rs b/stable_set/src/lib.rs index 30c90fa..afee5af 100644 --- a/stable_set/src/lib.rs +++ b/stable_set/src/lib.rs @@ -23,4 +23,9 @@ pub use stable_set::StableSet; pub use stable_map::StableMap; pub mod stable_set; -pub mod stable_map; \ No newline at end of file +pub mod stable_map; + +#[cfg(test)] +mod test_set; +#[cfg(test)] +mod test_map; \ No newline at end of file diff --git a/stable_set/src/stable_map.rs b/stable_set/src/stable_map.rs index 749b462..29540bd 100644 --- a/stable_set/src/stable_map.rs +++ b/stable_set/src/stable_map.rs @@ -7,9 +7,6 @@ use core::hash::Hash; use index_table::{IndexTable, SmallIndex}; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; -#[path = "test_map.rs"] -mod test_map; - /// A hash map that maintains the order of its elements. #[derive(Clone)] pub struct StableMap { @@ -260,7 +257,7 @@ impl StableMap { /// Removes the entry with the specified index from the set and returns its key and value, if it exists. /// /// The last item is put in its place. - pub fn swap_remove_index_full(&mut self, index: usize) -> Option<(K, V)> { + pub fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { let hash = self.build_hasher.hash_one(&self.items.get(index)?.0); self.index_table .find_entry(hash, |i| i == index) @@ -268,12 +265,6 @@ impl StableMap { .remove(); Some(self.swap_remove_finish(index)) } - /// Removes the entry with the specified index from the set and returns its value, if it exists. - /// - /// The last item is put in its place. - pub fn swap_remove_index(&mut self, index: usize) -> Option { - self.swap_remove_index_full(index).map(|x| x.1) - } /// Removes all items for which `f` evaluates to `false`. /// /// `f` is guaranteed to be called exactly once for each item and in order, and may mutate the values. @@ -554,7 +545,7 @@ impl OccupiedEntry<'_, K, V, S, W> { } impl<'a, K, V, S, W: SmallIndex> Entry<'a, K, V, S, W> { - /// Returns a refernece to the key of the entry. + /// Returns a reference to the key of the entry. pub fn key(&self) -> &K { match self { Entry::Vacant(entry) => entry.key(), @@ -613,7 +604,7 @@ impl<'a, K, V, S, W: SmallIndex> Entry<'a, K, V, S, W> { impl StableMap { #[cfg(test)] - fn check(&self) { + pub(crate) fn check(&self) { assert!(self.index_table.len() == self.items.len()); for (index, (key, _)) in self.items.iter().enumerate() { let hash = self.build_hasher.hash_one(key); diff --git a/stable_set/src/stable_set.rs b/stable_set/src/stable_set.rs index 585e430..ba67cb5 100644 --- a/stable_set/src/stable_set.rs +++ b/stable_set/src/stable_set.rs @@ -7,9 +7,6 @@ use core::hash::Hash; use index_table::{IndexTable, SmallIndex}; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; -#[path = "test_set.rs"] -mod test_set; - /// A hash set that maintains the order of its elements. /// /// In `StableSet`, @@ -381,7 +378,7 @@ impl FromIterator for impl StableSet { #[cfg(test)] - fn check(&self) { + pub(crate) fn check(&self) { assert!(self.index_table.len() == self.items.len()); for (index, item) in self.items.iter().enumerate() { let hash = self.build_hasher.hash_one(item); diff --git a/stable_set/src/test_map.rs b/stable_set/src/test_map.rs index 7534bad..4177ea1 100644 --- a/stable_set/src/test_map.rs +++ b/stable_set/src/test_map.rs @@ -1,12 +1,308 @@ -#![cfg(test)] #![allow(missing_docs)] use crate::StableMap; +use indexmap::IndexMap; use rand::prelude::*; -use std::hash::BuildHasherDefault; -use zwohash::ZwoHasher; +use std::fmt::Debug; use std::hash::Hash; +use std::{borrow::Borrow, hash::BuildHasherDefault}; +use zwohash::ZwoHasher; + +type ZwoMap = StableMap, u8>; + +struct CheckedMap { + dut: ZwoMap, + ref_set: IndexMap, +} + +impl CheckedMap { + fn new() -> Self { + CheckedMap { + dut: ZwoMap::new(), + ref_set: IndexMap::new(), + } + } + fn len(&self) -> usize { + self.ref_set.len() + } + fn get_full(&mut self, key: &Q) -> Option<(usize, &K, &V)> + where + Q: Hash + Eq, + K: Borrow, + { + let ref_result = self.ref_set.get_full(key); + let dut_result = self.dut.get_full(key); + assert_eq!(ref_result, dut_result); + ref_result + } + fn get_index(&mut self, index: usize) -> Option<(&K, &V)> { + let ref_result = self.ref_set.get_index(index); + let dut_result = self.dut.get_index(index); + assert_eq!(ref_result, dut_result); + ref_result + } + fn insert_full(&mut self, key: K, value: V) -> (usize, Option) { + let ref_result = self.ref_set.insert_full(key.clone(), value.clone()); + let dut_result = self.dut.insert_full(key, value); + assert_eq!(ref_result, dut_result); + ref_result + } + fn swap_remove_full(&mut self, key: &Q) -> Option<(usize, K, V)> + where + Q: Hash + Eq, + K: Borrow, + { + let ref_result = self.ref_set.swap_remove_full(key); + let dut_result = self.dut.swap_remove_full(key); + assert_eq!(ref_result, dut_result); + ref_result + } + fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { + let ref_result = self.ref_set.swap_remove_index(index); + let dut_result = self.dut.swap_remove_index(index); + assert_eq!(ref_result, dut_result); + ref_result + } + fn retain(&mut self, f: impl Fn(&K, &mut V) -> bool) { + let mut ref_iter = self.ref_set.iter(); + self.dut.retain(|k, v| { + // make sure that retain visits in the correct order + assert_eq!(ref_iter.next(), Some((k, &*v))); + f(k, v) + }); + assert_eq!(ref_iter.next(), None); + self.ref_set.retain(f); + self.check(); + } + fn drain(&mut self, r: impl std::ops::RangeBounds + Clone) { + let ref_iter = self.ref_set.drain(r.clone()); + let dut_iter = self.dut.drain(r); + assert!(Iterator::eq(ref_iter, dut_iter)); + self.dut.check(); + } + fn check(&mut self) { + self.dut.check(); + assert!(Iterator::eq(self.ref_set.iter(), self.dut.iter())); + } + fn iterator_check(&mut self) { + assert!(Iterator::eq(self.ref_set.iter_mut(), self.dut.iter_mut())); + assert!(Iterator::eq(self.ref_set.keys(), self.dut.keys())); + assert!(Iterator::eq(self.ref_set.values(), self.dut.values())); + assert!(Iterator::eq( + self.ref_set.values_mut(), + self.dut.values_mut() + )); + } + fn entry_insert_or_remove(&mut self, key: K, value: V) -> impl Debug + '_ { + #[derive(Debug, PartialEq, Eq)] + enum OpResult<'a, K, V> { + Inserted(&'a mut V), + Removed((K, V)), + } + let ref_result = match self.ref_set.entry(key.clone()) { + indexmap::map::Entry::Occupied(entry) => OpResult::Removed(entry.swap_remove_entry()), + indexmap::map::Entry::Vacant(entry) => OpResult::Inserted(entry.insert(value.clone())), + }; + let dut_result = match self.dut.entry(key) { + crate::stable_map::Entry::Occupied(entry) => { + OpResult::Removed(entry.swap_remove_entry()) + } + crate::stable_map::Entry::Vacant(entry) => OpResult::Inserted(entry.insert(value)), + }; + assert_eq!(ref_result, dut_result); + ref_result + } + fn entry_or_insert(&mut self, key: K, value: V) -> &mut V { + let ref_result = self.ref_set.entry(key.clone()).or_insert(value.clone()); + let dut_result = self.dut.entry(key).or_insert(value); + assert_eq!(ref_result, dut_result); + ref_result + } + /// NB: `random_likelihood` is **not** a probability. `random_likelihood == 2.0` would be 2:1 odds random:present, i.e. 2/3 probability. + fn present_or_random_key( + &self, + random_likelihood: f64, + rng: &mut R, + mut rand_k: impl FnMut(&mut R) -> K, + ) -> K { + debug_assert!(random_likelihood >= 0.0); + if self.len() == 0 || rng.gen_range(0.0..1.0 + random_likelihood) >= 1.0 { + rand_k(rng) + } else { + self.ref_set.iter().choose(rng).unwrap().0.clone() + } + } + fn random_index(&self, error_likelihood: f64, rng: &mut R) -> usize { + let max = (self.len() as f64 * (1.0 + error_likelihood)).ceil() as usize; + rng.gen_range(0..=max) + } +} + +macro_rules! weighted_choose { + ($rng:expr, $($name:ident: $weight:expr => $body:expr),+) => { + { + enum Branches { $( $name, )* } + let weights = [$((Branches::$name, $weight)),+]; + match weights.choose_weighted($rng, |x| x.1).unwrap().0 { + $(Branches::$name => $body),* + } + } + } +} + +fn test_suite( + mut rand_k: impl FnMut(&mut R) -> K, + mut rand_v: impl FnMut(&mut R) -> V, + retain_fn: impl Fn(&K, &mut V) -> bool, +) where + K: Hash + Eq + Clone + Debug, + V: Eq + Clone + Debug, + R: Rng + SeedableRng, +{ + let mut map: CheckedMap = CheckedMap::new(); + let mut rng = R::seed_from_u64(39); + let mut max_size = 0; + let verbosity = 1; + for _ in 0..5000 { + weighted_choose! {&mut rng, + Insert: 2.0 => { + let k = map.present_or_random_key(6.0, &mut rng, &mut rand_k); + let v = rand_v(&mut rng); + let result = map.insert_full(k.clone(), v.clone()); + if verbosity > 0 { + println!("inserting {k:?}: {v:?} -> {result:?}"); + } + }, + Get: 0.5 => { + let k = map.present_or_random_key(1.0, &mut rng, &mut rand_k); + let result = map.get_full(&k); + if verbosity > 0 { + println!("getting {k:?} -> {result:?}"); + } + }, + GetIndex: 0.3 => { + let index = map.random_index(0.1, &mut rng); + let result = map.get_index(index); + if verbosity > 0 { + println!("getting index {index:?} -> {result:?}"); + } + }, + Remove: 0.5 => { + let k = map.present_or_random_key(1.0, &mut rng, &mut rand_k); + let result = map.swap_remove_full(&k); + if verbosity > 0 { + println!("removing {k:?} -> {result:?}"); + } + }, + RemoveIndex: 0.2 => { + let index = map.random_index(0.1, &mut rng); + let result = map.swap_remove_index(index); + if verbosity > 0 { + println!("removing index {index:?} -> {result:?}"); + } + }, + Retain: 0.05 => { + let old_len = map.len(); + map.retain(&retain_fn); + let new_len = map.len(); + if verbosity > 0 { + println!("retaining, {old_len} -> {new_len}"); + } + }, + Drain: 0.05 => { + if map.len() > 0 { + let start_index = rng.gen_range(0..map.len()); + let max_len = rng.gen_range(1..=map.len().div_ceil(10)); + let end_index = std::cmp::min(map.len(), start_index + max_len); + map.drain(start_index..end_index); + if verbosity > 0 { + println!("draining {start_index}..{end_index}"); + } + } + }, + EntryInsertOrRemove: 0.3 => { + let key = map.present_or_random_key(1.0, &mut rng, &mut rand_k); + let value = rand_v(&mut rng); + let result = map.entry_insert_or_remove(key.clone(), value.clone()); + if verbosity > 0 { + println!("entry insert/remove {key:?}: {value:?} -> {result:?}"); + } + }, + EntryOrInsert: 0.3 => { + let key = map.present_or_random_key(1.0, &mut rng, &mut rand_k); + let value = rand_v(&mut rng); + let result = map.entry_or_insert(key.clone(), value.clone()); + if verbosity > 0 { + println!("entry or_insert {key:?}: {value:?} -> {result:?}"); + } + }, + Check: 0.15 => { + map.check(); + } + }; + max_size = std::cmp::max(max_size, map.len()); + } + map.check(); + map.iterator_check(); + println!("max size {max_size}"); +} + +#[test] +fn test_suite_usize_usize() { + test_suite::( + |rng| rng.gen::() >> rng.gen_range(0..usize::BITS), + |rng| rng.gen(), + |k, v| { + *v = v.wrapping_add(3); + k % 7 < 6 + }, + ); +} -type ZwoMap = StableMap>; +#[test] +fn test_suite_boxed_usize_boxed_usize() { + test_suite::, Box, rand_pcg::Pcg64>( + |rng| Box::new(rng.gen::() >> rng.gen_range(0..usize::BITS)), + |rng| Box::new(rng.gen()), + |k, v| { + **v = v.wrapping_add(3); + **k % 7 < 6 + }, + ); +} + +#[test] +fn test_suite_string_u64() { + test_suite::( + |rng| { + let len = rng.gen_range(4..16); + String::from_iter((0..len).map(|_| rng.gen_range('!'..'~'))) + }, + |rng| rng.gen(), + |k: &String, v| { + *v = (*v).wrapping_add(3); + !k.contains('!') + }, + ); +} + +#[test] +fn test_suite_string_string() { + test_suite::( + |rng| { + let len = rng.gen_range(4..16); + String::from_iter((0..len).map(|_| rng.gen_range('!'..'~'))) + }, + |rng| { + let len = rng.gen_range(8..32); + String::from_iter((0..len).map(|_| rng.gen_range('!'..'~'))) + }, + |k: &String, v| { + let ch = v.remove(0); + v.push(ch); + !k.contains('!') + }, + ); +} fn dedup_vec(vec: &mut Vec<(K, V)>) { let mut seen = std::collections::HashSet::new(); @@ -61,5 +357,8 @@ fn test_drain() { let out_data_drain = out_data.drain(100..150); assert!(map_drain.eq(out_data_drain)); map.check(); - assert_eq!(map.iter().map(|(&k, &v)| (k, v)).collect::>(), out_data); -} \ No newline at end of file + assert_eq!( + map.iter().map(|(&k, &v)| (k, v)).collect::>(), + out_data + ); +} diff --git a/stable_set/src/test_set.rs b/stable_set/src/test_set.rs index 6f5afd4..1ac4ad1 100644 --- a/stable_set/src/test_set.rs +++ b/stable_set/src/test_set.rs @@ -1,4 +1,3 @@ -#![cfg(test)] #![allow(missing_docs)] use crate::StableSet; use indexmap::IndexSet; @@ -24,6 +23,9 @@ impl CheckedSet { ref_set: IndexSet::new(), } } + fn len(&self) -> usize { + self.ref_set.len() + } fn get_full(&mut self, value: &Q) -> Option<(usize, &T)> where T: Borrow, @@ -81,6 +83,24 @@ impl CheckedSet { self.dut.reserve(additional); self.ref_set.reserve(additional); } + /// NB: `random_likelihood` is **not** a probability. `random_likelihood == 2.0` would be 2:1 odds random:present, i.e. 2/3 probability. + fn present_or_random( + &self, + random_likelihood: f64, + rng: &mut R, + mut rand_t: impl FnMut(&mut R) -> T, + ) -> T { + debug_assert!(random_likelihood >= 0.0); + if self.len() == 0 || rng.gen_range(0.0..1.0 + random_likelihood) >= 1.0 { + rand_t(rng) + } else { + self.ref_set.iter().choose(rng).unwrap().clone() + } + } + fn random_index(&self, error_likelihood: f64, rng: &mut R) -> usize { + let max = (self.len() as f64 * (1.0 + error_likelihood)).ceil() as usize; + rng.gen_range(0..=max) + } } macro_rules! weighted_choose { @@ -105,47 +125,29 @@ fn test_suite( let verbosity = 1; for _ in 0..5000 { weighted_choose! {&mut rng, - Insert: 1.0 => { - let item = rand_t(&mut rng); + InsertRandom: 1.5 => { + let item = set.present_or_random(4.0, &mut rng, &mut rand_t); let result = set.insert_full(item.clone()); if verbosity > 0 { println!("inserting {item:?} -> {result:?}"); } }, - GetPresent: 0.5 => { - if let Some(item) = set.ref_set.iter().choose(&mut rng).cloned() { - let result = set.get_full(&item); - if verbosity > 0 { - println!("getting {item:?} -> {result:?}"); - } - } - }, - GetRandom: 0.5 => { - let item = rand_t(&mut rng); + Get: 1.0 => { + let item = set.present_or_random(1.0, &mut rng, &mut rand_t); let result = set.get_full(&item); if verbosity > 0 { println!("getting {item:?} -> {result:?}"); } }, - RemovePresent: 0.3 => { - if let Some(item) = set.ref_set.iter().choose(&mut rng).cloned() { - let result = set.swap_remove_full(&item); - if verbosity > 0 { - println!("removing {item:?} -> {result:?}"); - } - } - }, - RemoveRandom: 0.5 => { - let item = rand_t(&mut rng); + Remove: 0.5 => { + let item = set.present_or_random(0.5, &mut rng, &mut rand_t); let result = set.swap_remove_full(&item); if verbosity > 0 { println!("removing {item:?} -> {result:?}"); } }, RemoveIndex: 0.2 => { - let len = set.ref_set.len(); - // try to generate invalid indices sometimes - let index = rng.gen_range(0..=(len + len.div_ceil(10))); + let index = set.random_index(0.1, &mut rng); let result = set.swap_remove_index(index); if verbosity > 0 { println!("removing index {index:?} -> {result:?}"); @@ -158,9 +160,9 @@ fn test_suite( } }, Retain: 0.05 => { - let old_len = set.ref_set.len(); + let old_len = set.len(); set.retain(&retain_fn); - let new_len = set.ref_set.len(); + let new_len = set.len(); if verbosity > 0 { println!("retaining, {old_len} -> {new_len}"); } @@ -172,7 +174,7 @@ fn test_suite( } } }; - max_size = std::cmp::max(max_size, set.ref_set.len()); + max_size = std::cmp::max(max_size, set.len()); } set.check(); println!("max size: {max_size}"); @@ -201,7 +203,7 @@ fn test_suite_string() { let len = rng.gen_range(4..32); String::from_iter((0..len).map(|_| rng.gen_range('!'..'~'))) }, - |item| !item.contains('!') + |item| !item.contains('!'), ); } @@ -226,7 +228,10 @@ fn test_from_iter_extend() { set.extend((0..1000).map(|_| rng.gen())); set.check(); rng = rng_start.clone(); - assert!(set.iter().copied().eq((0..2000).map(|_| rng.gen::()))); + assert!(set + .iter() + .copied() + .eq((0..2000).map(|_| rng.gen::()))); } #[test] From 1bce70d3b55601f179327ae7dfe333ee4ccbd4bd Mon Sep 17 00:00:00 2001 From: Emily Schmidt Date: Thu, 17 Oct 2024 09:52:38 +0100 Subject: [PATCH 5/6] stable_set: add doc comment explaining StableMap generic arguments --- stable_set/src/stable_map.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/stable_set/src/stable_map.rs b/stable_set/src/stable_map.rs index 29540bd..7d6c946 100644 --- a/stable_set/src/stable_map.rs +++ b/stable_set/src/stable_map.rs @@ -8,6 +8,12 @@ use index_table::{IndexTable, SmallIndex}; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; /// A hash map that maintains the order of its elements. +/// +/// In `StableMap`, +/// `K: Hash + Eq` is the type of keys, +/// `V` is the type of values, +/// `S: BuildHasher` is used for hashing elements +/// and `W: SmallIndex` is the type used for small indices internally (`W` should usually be omitted, it then defaults to `u32`). #[derive(Clone)] pub struct StableMap { index_table: IndexTable, From ca7795a58f383b5f4b832a098428179489be6a55 Mon Sep 17 00:00:00 2001 From: Emily Schmidt Date: Tue, 29 Oct 2024 17:35:19 +0000 Subject: [PATCH 6/6] cargo fmt --- stable_set/src/lib.rs | 16 ++++++++-------- stable_set/src/stable_map.rs | 10 +++++++--- stable_set/src/stable_set.rs | 28 ++++++++++++++-------------- stable_set/src/util.rs | 10 ++++++++-- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/stable_set/src/lib.rs b/stable_set/src/lib.rs index afee5af..eae9366 100644 --- a/stable_set/src/lib.rs +++ b/stable_set/src/lib.rs @@ -1,14 +1,14 @@ //! [StableMap] and [StableSet] are a hash map and hash set that preserve the order of their entries, //! i.e. the order in which entries are inserted is remembered and iterators yield elements in that order. -//! +//! //! Strictly speaking, this only holds if elements are not removed, as removal via the `swap_remove_*` series of methods perturbs the order //! (use the more expensive `retain` operation to remove elements while preserving their order). -//! +//! //! Both structs are implemented as a `Vec` storing the actual entries, supplemented by a hashbrown `HashTable` to allow for fast lookups. //! This table only stores indices into the `Vec`. -//! +//! //! This crate is very similar to the `index_map` crate, however there are two key differences: -//! +//! //! 1. Hashes are not stored with the entries, but instead recalculated when needed. //! This saves memory and potentially improves performance when hashes are cheap to calculate. //! 2. For small tables, the index table stores `u32` values, again saving memory. @@ -19,13 +19,13 @@ mod index_table; mod util; -pub use stable_set::StableSet; pub use stable_map::StableMap; +pub use stable_set::StableSet; -pub mod stable_set; pub mod stable_map; +pub mod stable_set; #[cfg(test)] -mod test_set; +mod test_map; #[cfg(test)] -mod test_map; \ No newline at end of file +mod test_set; diff --git a/stable_set/src/stable_map.rs b/stable_set/src/stable_map.rs index 7d6c946..fdc31b3 100644 --- a/stable_set/src/stable_map.rs +++ b/stable_set/src/stable_map.rs @@ -15,7 +15,7 @@ use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; /// `S: BuildHasher` is used for hashing elements /// and `W: SmallIndex` is the type used for small indices internally (`W` should usually be omitted, it then defaults to `u32`). #[derive(Clone)] -pub struct StableMap { +pub struct StableMap { index_table: IndexTable, items: Vec<(K, V)>, build_hasher: S, @@ -64,7 +64,9 @@ impl StableMap { } } -impl std::fmt::Debug for StableMap { +impl std::fmt::Debug + for StableMap +{ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_map().entries(self.iter()).finish() } @@ -416,7 +418,9 @@ impl<'a, K, V, S, W: SmallIndex> IntoIterator for &'a mut StableMap } } -impl FromIterator<(K, V)> for StableMap { +impl FromIterator<(K, V)> + for StableMap +{ fn from_iter>(iter: Iter) -> Self { let iter = iter.into_iter(); let (lower_bound, _) = iter.size_hint(); diff --git a/stable_set/src/stable_set.rs b/stable_set/src/stable_set.rs index ba67cb5..09db103 100644 --- a/stable_set/src/stable_set.rs +++ b/stable_set/src/stable_set.rs @@ -8,13 +8,13 @@ use index_table::{IndexTable, SmallIndex}; use std::{borrow::Borrow, hash::BuildHasher, ops::RangeBounds}; /// A hash set that maintains the order of its elements. -/// +/// /// In `StableSet`, /// `T: Hash + Eq` is the type of elements of the set, /// `S: BuildHasher` is used for hashing elements /// and `W: SmallIndex` is the type used for small indices internally (`W` should usually be omitted, it then defaults to `u32`). #[derive(Clone)] -pub struct StableSet { +pub struct StableSet { index_table: IndexTable, items: Vec, build_hasher: S, @@ -206,7 +206,7 @@ impl StableSet { item } /// Removes the specified value from the set, if it exists. Returns `true` if an item was removed. - /// + /// /// The last item is put in its place. pub fn swap_remove(&mut self, value: &Q) -> bool where @@ -216,7 +216,7 @@ impl StableSet { self.swap_remove_full(value).is_some() } /// Removes the specified value from the set and returns it, if it exists. - /// + /// /// The last item is put in its place. pub fn swap_take(&mut self, value: &Q) -> Option where @@ -226,7 +226,7 @@ impl StableSet { self.swap_remove_full(value).map(|x| x.1) } /// Removes the specified value from the set and returns its index and it, if it exists. - /// + /// /// The last item is put in its place. pub fn swap_remove_full(&mut self, value: &Q) -> Option<(usize, T)> where @@ -247,7 +247,7 @@ impl StableSet { } } /// Removes the item at the given index and returns it, if it exists. - /// + /// /// The last item is put in its place. pub fn swap_remove_index(&mut self, index: usize) -> Option { let hash = self.build_hasher.hash_one(self.items.get(index)?); @@ -258,15 +258,15 @@ impl StableSet { Some(self.swap_remove_finish(index)) } /// Returns `true` if the set is a subset of `other`. - /// + /// /// The order of elements is ignored. pub fn is_subset(&self, other: &StableSet) -> bool { self.len() <= other.len() && self.iter().all(|t| other.contains(t)) } /// Removes all items from the set for which `f` evaluates to `false`. - /// + /// /// `f` is guaranteed to be called exactly once for each item and in order. - /// + /// /// The order of elements is preserved. pub fn retain(&mut self, mut f: impl FnMut(&T) -> bool) { let mut in_index = 0; @@ -304,7 +304,7 @@ impl PartialEq Eq for StableSet {} /// An iterator that moves out of a set. -/// +/// /// This struct is created by the `into_iter` method on [`StableSet`]. pub struct IntoIter { inner: std::vec::IntoIter, @@ -332,7 +332,7 @@ impl<'a, T, S, W> IntoIterator for &'a StableSet { } /// An iterator that returns references into a set. -/// +/// /// This struct is created by the [`iter`](StableSet::iter) method on [`StableSet`]. pub struct Iter<'a, T> { inner: std::slice::Iter<'a, T>, @@ -344,7 +344,7 @@ impl<'a, T> Iterator for Iter<'a, T> { impl StableSet { /// Returns an iterator over the set. - /// + /// /// The iterator yields all items in order. pub fn iter(&self) -> Iter<'_, T> { Iter { @@ -398,9 +398,9 @@ impl Iterator for Drain<'_, T> { impl StableSet { /// Returns an iterator yielding all elements with indices in the specified range and removes those elements from the set. - /// + /// /// Panics if the range is invalid or out of bounds. - /// + /// /// If the returned iterator is leaked, the set is left in an invalid state and can only be dropped. /// Any other operations may panic or return incorrect results. pub fn drain(&mut self, range: impl RangeBounds) -> Drain<'_, T> { diff --git a/stable_set/src/util.rs b/stable_set/src/util.rs index d3110a6..8774c18 100644 --- a/stable_set/src/util.rs +++ b/stable_set/src/util.rs @@ -11,9 +11,15 @@ pub fn simplify_range(range: impl RangeBounds, len: usize) -> Range n.checked_add(1).expect("end point of range too large"), std::ops::Bound::Excluded(&n) => n, }; - assert!(lower < len, "start point {lower} of range is >= length {len}"); + assert!( + lower < len, + "start point {lower} of range is >= length {len}" + ); assert!(upper <= len, "end point {upper} of range is > length {len}"); - assert!(lower <= upper, "start point {lower} is larger than end point {upper}"); + assert!( + lower <= upper, + "start point {lower} is larger than end point {upper}" + ); lower..upper }