diff --git a/table_seq/src/lib.rs b/table_seq/src/lib.rs index 05ea676..0e91f24 100644 --- a/table_seq/src/lib.rs +++ b/table_seq/src/lib.rs @@ -108,12 +108,13 @@ use hashbrown::HashTable; mod node_allocator; +pub mod map_seq; pub mod set_seq; pub mod table_seq; +#[doc(inline)] +pub use map_seq::MapSeq; #[doc(inline)] pub use set_seq::SetSeq; #[doc(inline)] pub use table_seq::TableSeq; - -// TODO Implement MapSeq on top of TableSeq diff --git a/table_seq/src/map_seq.rs b/table_seq/src/map_seq.rs new file mode 100644 index 0000000..6e8518e --- /dev/null +++ b/table_seq/src/map_seq.rs @@ -0,0 +1,752 @@ +//! Indexed sequence of hash maps and associated helper types. +use crate::table_seq::{self, SubtableIter, SubtableIterMut, TableSeq}; +use core::fmt; +use std::{ + borrow::Borrow, + hash::{BuildHasher, Hash}, + mem, +}; + +struct MapEntry { + key: K, + value: V, +} + +/// Indexed sequence of hash maps. +/// +/// This type serves as a memory and runtime efficient replacement for `Vec>`. In +/// particular, it is optimized for the use-case where the vast majority of contained maps are tiny, +/// each having 16 or fewer entries, while still allowing for a small but significant fraction of +/// maps to be large. +pub struct MapSeq { + tables: TableSeq>, + build_hasher: S, +} + +impl Default for MapSeq { + fn default() -> Self { + Self { + tables: Default::default(), + build_hasher: Default::default(), + } + } +} + +impl fmt::Debug for MapEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}: {:?}", self.key, self.value) + } +} + +impl fmt::Debug for MapSeq { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fmt::Debug::fmt(&self.tables, f) + } +} + +impl MapSeq { + /// Returns the number of maps in the sequence. + #[inline(always)] + pub fn len(&self) -> usize { + self.tables.len() + } + + /// Returns `true` if the sequence of maps is empty. + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.tables.is_empty() + } + + /// Discards all maps in the sequence. + #[inline(always)] + pub fn clear(&mut self) { + self.tables.clear() + } + + /// Resizes the sequence by appending empty maps or discarding trailing maps. + #[inline(always)] + pub fn resize(&mut self, maps: usize) { + self.tables.resize(maps) + } + + /// Ensures that the sequence contains a map at the given index by appending emtpy maps if the + /// sequence was too short. + /// + /// Provides mutable access to the map at the given index. + #[inline(always)] + pub fn grow_for(&mut self, map: usize) -> MapSeqMapMut { + self.tables.grow_for_subtable(map); + MapSeqMapMut { seq: self, map } + } + + /// Provides shared access to the map at a given index, panics if out-of-bounds. + /// + /// This is used instead of [`std::ops::Index`], as it returns a value of the custom + /// reference-like [`MapSeqMap`] type. + /// + /// Panics if `map >= self.len()`. + #[inline(always)] + pub fn at(&self, map: usize) -> MapSeqMap { + assert!(self.tables.len() > map); + MapSeqMap { seq: self, map } + } + + /// Provides mutable access to the map at a given index, panics if out-of-bounds. + /// + /// This is used instead of [`std::ops::IndexMut`], as it returns a value of the custom + /// reference-like [`MapSeqMapMut`] type. + /// + /// Panics if `map >= self.len()`. + #[inline(always)] + pub fn at_mut(&mut self, map: usize) -> MapSeqMapMut { + assert!(self.tables.len() > map); + MapSeqMapMut { seq: self, map } + } + + /// Provides shared access to the map at a given index. + /// + /// This returns `None` if `map >= self.len()`. + #[inline(always)] + pub fn get(&self, map: usize) -> Option> { + (self.tables.len() > map).then_some(MapSeqMap { seq: self, map }) + } + + /// Provides mutable access to the map at a given index. + /// + /// This returns `None` if `map >= self.len()`. + #[inline(always)] + pub fn get_mut(&mut self, map: usize) -> Option> { + (self.tables.len() > map).then_some(MapSeqMapMut { seq: self, map }) + } +} + +impl MapSeq { + #[inline(always)] + fn map_len(&self, map: usize) -> usize { + self.tables.subtable_len(map) + } + + #[inline(always)] + fn map_is_empty(&self, map: usize) -> bool { + self.map_len(map) != 0 + } + + #[inline(always)] + fn clear_map(&mut self, map: usize) { + self.tables.clear_subtable(map) + } +} + +impl MapSeq { + fn map_contains_key(&self, map: usize, key: &Q) -> bool + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + let hash = self.build_hasher.hash_one(key); + self.tables + .find(map, hash, |found| found.key.borrow() == key) + .is_some() + } + + fn map_get_key_value(&self, map: usize, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + let hash = self.build_hasher.hash_one(key); + self.tables + .find(map, hash, |found| found.key.borrow() == key) + .map(|entry| (&entry.key, &entry.value)) + } + + fn map_get_mut(&mut self, map: usize, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + let hash = self.build_hasher.hash_one(key); + self.tables + .find_mut(map, hash, |found| found.key.borrow() == key) + .map(|entry| &mut entry.value) + } + + fn map_insert(&mut self, map: usize, key: K, value: V) -> Option { + let hash = self.build_hasher.hash_one(&key); + let (entry, returned_value) = self.tables.insert( + map, + hash, + MapEntry { key, value }, + |found, inserting| found.key == inserting.key, + |found| self.build_hasher.hash_one(&found.key), + ); + returned_value.map(|MapEntry { value, .. }| mem::replace(&mut entry.value, value)) + } + + fn map_remove_entry(&mut self, map: usize, key: &Q) -> Option<(K, V)> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + let hash = self.build_hasher.hash_one(key); + + self.tables + .remove( + map, + hash, + |found| found.key.borrow() == key, + |found| self.build_hasher.hash_one(&found.key), + ) + .map(|entry| (entry.key, entry.value)) + } + + fn map_entry(&mut self, map: usize, key: K) -> Entry<'_, K, V> { + let hash = self.build_hasher.hash_one(&key); + match self.tables.entry( + map, + hash, + |found| found.key == key, + |found| self.build_hasher.hash_one(&found.key), + ) { + table_seq::Entry::Occupied(entry) => Entry::Occupied(OccupiedEntry(entry)), + table_seq::Entry::Vacant(entry) => Entry::Vacant(VacantEntry(entry, key)), + } + } +} + +/// Exclusive mutable access to a map of a [`MapSeq`]. +#[repr(C)] // SAFETY: layout must be compatible with MapSeqMap +pub struct MapSeqMapMut<'a, K, V, S> { + seq: &'a mut MapSeq, + map: usize, +} + +/// Shared read-only access to a map of a [`MapSeq`]. +#[repr(C)] // SAFETY: layout must be compatible with MapSeqMapMut +pub struct MapSeqMap<'a, K, V, S> { + seq: &'a MapSeq, + map: usize, +} + +impl Clone for MapSeqMap<'_, K, V, S> { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for MapSeqMap<'_, K, V, S> {} + +impl<'a, K, V, S> std::ops::Deref for MapSeqMapMut<'a, K, V, S> { + type Target = MapSeqMap<'a, K, V, S>; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + // SAFETY: we have compatible repr(C) layouts between MapSeqMap and MapSeqMapMut + unsafe { &*(self as *const Self).cast() } + } +} + +impl fmt::Debug for MapSeqMap<'_, K, V, S> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl fmt::Debug for MapSeqMapMut<'_, K, V, S> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl<'a, K, V, S> MapSeqMap<'a, K, V, S> { + /// Returns the number of elements the map contains. + #[inline(always)] + pub fn len(&self) -> usize { + self.seq.map_len(self.map) + } + + /// Returns `true` when the map is empty. + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.seq.map_is_empty(self.map) + } + + /// Returns an iterator over the key-value pairs of the map. + #[inline(always)] + pub fn iter(&self) -> MapIter<'a, K, V> { + MapIter { + inner: self.seq.tables.subtable_iter(self.map), + } + } + + /// Returns an iterator over the keys of the map. + #[inline(always)] + pub fn keys(&self) -> MapKeys<'a, K, V> { + MapKeys { + inner: self.seq.tables.subtable_iter(self.map), + } + } + + /// Returns an iterator over the values of the map. + #[inline(always)] + pub fn values(&self) -> MapValues<'a, K, V> { + MapValues { + inner: self.seq.tables.subtable_iter(self.map), + } + } +} + +impl MapSeqMapMut<'_, K, V, S> { + /// Discards all elements of the map. + #[inline(always)] + pub fn clear(&mut self) { + self.seq.clear_map(self.map) + } + + /// Returns an iterator over the elements of the map, with mutable references to values. + #[inline(always)] + pub fn iter_mut(&mut self) -> MapIterMut { + self.reborrow().into_iter() + } + + /// Reborrow the mutable reference to the map, creating a new mutable reference with a potentially shorter lifetime. + #[inline(always)] + pub fn reborrow(&mut self) -> MapSeqMapMut<'_, K, V, S> { + MapSeqMapMut { + seq: self.seq, + map: self.map, + } + } +} + +impl MapSeqMap<'_, K, V, S> { + /// Returns `true` if the map contains an element for the given key. + #[inline(always)] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.seq.map_contains_key(self.map, key) + } + + /// Returns a reference to the value corresponding to the given key. + #[inline(always)] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.seq + .map_get_key_value(self.map, key) + .map(|(_, value)| value) + } + + /// Returns a reference to the key-value pair corresponding to the given key. + #[inline(always)] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.seq.map_get_key_value(self.map, key) + } +} + +impl MapSeqMapMut<'_, K, V, S> { + /// Returns a mutable reference to the value corresponding to the given key. + #[inline(always)] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.seq.map_get_mut(self.map, key) + } + + /// Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, [`None`] is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. + pub fn insert(&mut self, key: K, value: V) -> Option { + self.seq.map_insert(self.map, key, value) + } + + /// Removes a key from the map, returning the value at the key if the key was previously in the map. + pub fn remove(&mut self, key: &Q) -> Option + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.seq + .map_remove_entry(self.map, key) + .map(|(_, value)| value) + } + + /// Removes a key from the map, returning the stored key and value if the key was previously in the map. + pub fn remove_entry(&mut self, key: &Q) -> Option<(K, V)> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.seq.map_remove_entry(self.map, key) + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + pub fn entry(&mut self, key: K) -> Entry<'_, K, V> { + self.seq.map_entry(self.map, key) + } +} + +/// A view into a vacant entry in a [`MapSeq`]. +/// It is part of the [`Entry`] enum. +pub struct VacantEntry<'a, K, V>(table_seq::VacantEntry<'a, MapEntry>, K); + +/// A view into an occupied entry in a [`MapSeq`]. +/// It is part of the [`Entry`] enum. +pub struct OccupiedEntry<'a, K, V>(table_seq::OccupiedEntry<'a, MapEntry>); + +/// A view into a single entry in a map, which may either be vacant or occupied. +/// +/// This `enum` is constructed from the [`entry`] method on [`MapSeqMapMut`]. +pub enum Entry<'a, K, V> { + /// A vacant entry. + Vacant(VacantEntry<'a, K, V>), + /// An occupied entry. + Occupied(OccupiedEntry<'a, K, V>), +} + +impl<'a, K, V> VacantEntry<'a, K, V> { + /// Gets a reference to the key that would be used when inserting a value + /// through the `VacantEntry`. + pub fn key(&self) -> &K { + &self.1 + } + /// Take ownership of the key. + pub fn into_key(self) -> K { + self.1 + } + /// Sets the value of the entry with the `VacantEntry`'s key, + /// and returns an `OccupiedEntry`. + pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V> { + let VacantEntry(entry, key) = self; + let new_entry = entry.insert(MapEntry { key, value }); + OccupiedEntry(new_entry) + } + /// Sets the value of the entry with the `VacantEntry`'s key, + /// and returns a mutable reference to it. + pub fn insert(self, value: V) -> &'a mut V { + self.insert_entry(value).into_mut() + } +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry. + pub fn get(&self) -> &V { + &self.0.get().value + } + /// Gets a mutable reference to the value in the entry. + pub fn get_mut(&mut self) -> &mut V { + &mut self.0.get_mut().value + } + /// Sets the value of the entry, and returns the entry’s old value. + pub fn insert(&mut self, value: V) -> V { + mem::replace(self.get_mut(), value) + } + /// Converts the `OccupiedEntry` into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself. + pub fn into_mut(self) -> &'a mut V { + &mut self.0.into_mut().value + } + /// Gets a reference to the key in the entry. + pub fn key(&self) -> &K { + &self.0.get().key + } + /// Take the ownership of the key and value from the map. + pub fn remove_entry(self) -> (K, V) { + let (MapEntry { key, value }, _) = self.0.remove(); + (key, value) + } + /// Takes the value out of the entry, and returns it. + pub fn remove(self) -> V { + self.remove_entry().1 + } +} + +impl<'a, K, V> Entry<'a, K, V> { + /// Provides in-place mutable access to an occupied entry before any potential inserts into the map. + pub fn and_modify(self, f: impl FnOnce(&mut V)) -> Self { + match self { + Entry::Vacant(entry) => Entry::Vacant(entry), + Entry::Occupied(mut entry) => { + f(entry.get_mut()); + Entry::Occupied(entry) + } + } + } + /// Ensures a value is in the entry by inserting the default if empty, and returns a mutable reference to the value in the entry. + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Entry::Vacant(entry) => entry.insert(default), + Entry::Occupied(entry) => entry.into_mut(), + } + } + /// Sets the value of the entry, and returns an `OccupiedEntry`. + pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V> { + match self { + Entry::Vacant(entry) => entry.insert_entry(value), + Entry::Occupied(mut entry) => { + entry.insert(value); + entry + } + } + } + /// Ensures a value is in the entry by inserting the result of the default function if empty, + /// and returns a mutable reference to the value in the entry. + pub fn or_insert_with(self, default: impl FnOnce() -> V) -> &'a mut V { + match self { + Entry::Vacant(entry) => entry.insert(default()), + Entry::Occupied(entry) => entry.into_mut(), + } + } + /// Ensures a value is in the entry by inserting, if empty, the result of the default function. + /// This method allows for generating key-derived values for insertion by providing the default + /// function a reference to the key that was moved during the `.entry(key)` method call. + /// + /// The reference to the moved key is provided so that cloning or copying the key is + /// unnecessary, unlike with `.or_insert_with(|| ... )`. + pub fn or_insert_with_key(self, f: impl FnOnce(&K) -> V) -> &'a mut V { + match self { + Entry::Vacant(entry) => { + let value = f(entry.key()); + entry.insert(value) + } + Entry::Occupied(entry) => entry.into_mut(), + } + } +} + +impl<'a, K, V: Default> Entry<'a, K, V> { + /// Ensures a value is in the entry by inserting the default value if empty, + /// and returns a mutable reference to the value in the entry. + pub fn or_default(self) -> &'a mut V { + match self { + Entry::Vacant(entry) => entry.insert(Default::default()), + Entry::Occupied(entry) => entry.into_mut(), + } + } +} + +/// Iterator yielding references to a map's keys and values. +pub struct MapIter<'a, K, V> { + inner: SubtableIter<'a, MapEntry>, +} + +impl Default for MapIter<'_, K, V> { + #[inline(always)] + fn default() -> Self { + Self { + inner: Default::default(), + } + } +} + +impl<'a, K, V> Iterator for MapIter<'a, K, V> { + type Item = (&'a K, &'a V); + + #[inline(always)] + fn next(&mut self) -> Option { + self.inner.next().map(|entry| (&entry.key, &entry.value)) + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl ExactSizeIterator for MapIter<'_, K, V> { + #[inline(always)] + fn len(&self) -> usize { + self.inner.len() + } +} + +/// Iterator yielding references to a map's keys. +pub struct MapKeys<'a, K, V> { + inner: SubtableIter<'a, MapEntry>, +} + +impl Default for MapKeys<'_, K, V> { + #[inline(always)] + fn default() -> Self { + Self { + inner: Default::default(), + } + } +} + +impl<'a, K, V> Iterator for MapKeys<'a, K, V> { + type Item = &'a K; + + #[inline(always)] + fn next(&mut self) -> Option { + self.inner.next().map(|entry| &entry.key) + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl ExactSizeIterator for MapKeys<'_, K, V> { + #[inline(always)] + fn len(&self) -> usize { + self.inner.len() + } +} + +/// Iterator yielding references to a map's values. +pub struct MapValues<'a, K, V> { + inner: SubtableIter<'a, MapEntry>, +} + +impl Default for MapValues<'_, K, V> { + #[inline(always)] + fn default() -> Self { + Self { + inner: Default::default(), + } + } +} + +impl<'a, K, V> Iterator for MapValues<'a, K, V> { + type Item = &'a V; + + #[inline(always)] + fn next(&mut self) -> Option { + self.inner.next().map(|entry| &entry.value) + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl ExactSizeIterator for MapValues<'_, K, V> { + #[inline(always)] + fn len(&self) -> usize { + self.inner.len() + } +} + +/// Iterator yielding references to a map's keys and values, with mutable value references. +pub struct MapIterMut<'a, K, V> { + inner: SubtableIterMut<'a, MapEntry>, +} + +impl Default for MapIterMut<'_, K, V> { + #[inline(always)] + fn default() -> Self { + Self { + inner: Default::default(), + } + } +} + +impl<'a, K, V> Iterator for MapIterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + #[inline(always)] + fn next(&mut self) -> Option { + self.inner + .next() + .map(|entry| (&entry.key, &mut entry.value)) + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl ExactSizeIterator for MapIterMut<'_, K, V> { + #[inline(always)] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V, S> IntoIterator for MapSeqMap<'a, K, V, S> { + type Item = (&'a K, &'a V); + type IntoIter = MapIter<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S> IntoIterator for MapSeqMapMut<'a, K, V, S> { + type Item = (&'a K, &'a mut V); + type IntoIter = MapIterMut<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + MapIterMut { + inner: self.seq.tables.subtable_iter_mut(self.map), + } + } +} + +impl Extend<(K, V)> for MapSeqMapMut<'_, K, V, S> +where + K: Eq + Hash, + S: BuildHasher, +{ + fn extend>(&mut self, iter: T) { + for (k, v) in iter { + self.insert(k, v); + } + } +} + +impl<'b, K, V, S> Extend<(&'b K, &'b V)> for MapSeqMapMut<'_, K, V, S> +where + K: Eq + Hash + Copy, + V: Copy, + S: BuildHasher, +{ + fn extend>(&mut self, iter: T) { + for (k, v) in iter { + self.insert(*k, *v); + } + } +} + +impl std::ops::Index<&Q> for MapSeqMap<'_, K, V, S> +where + K: Eq + Hash + Borrow, + S: BuildHasher, + Q: Eq + Hash, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).expect("no entry found for key") + } +} + +impl std::ops::Index<&Q> for MapSeqMapMut<'_, K, V, S> +where + K: Eq + Hash + Borrow, + S: BuildHasher, + Q: Eq + Hash, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).expect("no entry found for key") + } +} diff --git a/table_seq/src/set_seq.rs b/table_seq/src/set_seq.rs index 630400e..800da04 100644 --- a/table_seq/src/set_seq.rs +++ b/table_seq/src/set_seq.rs @@ -13,7 +13,7 @@ pub struct SetIter<'a, T> { inner: SubtableIter<'a, T>, } -impl<'a, T> Default for SetIter<'a, T> { +impl Default for SetIter<'_, T> { #[inline(always)] fn default() -> Self { Self { @@ -36,7 +36,7 @@ impl<'a, T> Iterator for SetIter<'a, T> { } } -impl<'a, T> ExactSizeIterator for SetIter<'a, T> { +impl ExactSizeIterator for SetIter<'_, T> { #[inline(always)] fn len(&self) -> usize { self.inner.len() @@ -273,13 +273,13 @@ impl<'a, T, S> std::ops::Deref for SetSeqSetMut<'a, T, S> { } } -impl<'a, T: fmt::Debug, S> fmt::Debug for SetSeqSet<'a, T, S> { +impl fmt::Debug for SetSeqSet<'_, T, S> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_set().entries(self.iter()).finish() } } -impl<'a, T: fmt::Debug, S> fmt::Debug for SetSeqSetMut<'a, T, S> { +impl fmt::Debug for SetSeqSetMut<'_, T, S> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_set().entries(self.iter()).finish() } @@ -307,7 +307,7 @@ impl<'a, T, S> SetSeqSet<'a, T, S> { } } -impl<'a, T, S> SetSeqSetMut<'a, T, S> { +impl SetSeqSetMut<'_, T, S> { /// Discards all elements of the set. #[inline(always)] pub fn clear(&mut self) { @@ -315,7 +315,7 @@ impl<'a, T, S> SetSeqSetMut<'a, T, S> { } } -impl<'a, T: Eq + Hash, S: BuildHasher> SetSeqSet<'a, T, S> { +impl SetSeqSet<'_, T, S> { /// Checks whether a given value is an element of the set. #[inline(always)] pub fn contains(&self, value: &Q) -> bool @@ -337,7 +337,7 @@ impl<'a, T: Eq + Hash, S: BuildHasher> SetSeqSet<'a, T, S> { } } -impl<'a, T: Eq + Hash, S: BuildHasher> SetSeqSetMut<'a, T, S> { +impl SetSeqSetMut<'_, T, S> { /// Inserts a value into the set. /// /// If the value is already present, the given value is discarded and the set is not modified. diff --git a/table_seq/src/table_seq.rs b/table_seq/src/table_seq.rs index bde4c98..82c1d45 100644 --- a/table_seq/src/table_seq.rs +++ b/table_seq/src/table_seq.rs @@ -10,6 +10,7 @@ use hashbrown::HashTable; use crate::node_allocator::AllocatorClass; mod chunk; +mod entry; mod iter; mod node; mod owned; @@ -18,8 +19,12 @@ mod table; use chunk::{Chunk, EntryType, CHUNK_MASK, CHUNK_SHIFT, CHUNK_SIZE}; use node::{NodeAllocator, NodeRef, SizeClass}; use owned::OwnedSubtableSmall; -use table::{SmallSubtable, Subtable}; +use table::{ + SmallSubtable, SmallSubtableEntry, SmallSubtableOccupiedEntry, SmallSubtableVacantEntry, + Subtable, +}; +pub use entry::{Entry, OccupiedEntry, VacantEntry}; pub use iter::{SubtableIter, SubtableIterMut}; pub use owned::OwnedSubtable; @@ -60,7 +65,7 @@ impl fmt::Debug for TableSeq { #[derive(Clone)] struct SubtableFmt<'a, T>(&'a TableSeq, usize); - impl<'a, T: fmt::Debug> fmt::Debug for SubtableFmt<'a, T> { + impl fmt::Debug for SubtableFmt<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_set().entries(self.0.subtable_iter(self.1)).finish() } @@ -88,10 +93,10 @@ impl Default for TableSeq { const ALLOCATOR_SHIFT: u32 = 20; -/// Drop guard to preven exposing invalid entries on panics. +/// Drop guard to prevent exposing invalid entries on panics. struct InvalidateChunkOnDrop<'a, T>(&'a mut Chunk); -impl<'a, T> Drop for InvalidateChunkOnDrop<'a, T> { +impl Drop for InvalidateChunkOnDrop<'_, T> { fn drop(&mut self) { // This is only called when the drop implementation of an entry panics. In that situation, // we do the minimum necessary to remain memory safe but spend no effort cleaning up. Since @@ -106,7 +111,7 @@ impl<'a, T> Drop for InvalidateChunkOnDrop<'a, T> { } } -impl<'a, T> InvalidateChunkOnDrop<'a, T> { +impl InvalidateChunkOnDrop<'_, T> { fn defuse(self) { let _ = ManuallyDrop::new(self); } @@ -401,6 +406,7 @@ impl TableSeq { self.entries += 1; let found_pair = node.entry_ptr(entry_offset).cast::<[T; 2]>().read(); + let pair_hashes = [hasher(&found_pair[0]), hasher(&found_pair[1])]; let table_offset = chunk.meta.table_offset(chunk_slot); node.close_entry_pair_gap_and_make_table_gap_resize( @@ -414,7 +420,7 @@ impl TableSeq { let table_alloc = &mut self.allocators[allocator_index ^ 1]; let (entry_ptr, table) = - SmallSubtable::new(found_pair, value, hash, hasher, table_alloc); + SmallSubtable::new(found_pair, pair_hashes, value, hash, table_alloc); table_ptr.write(Subtable::Small(table)); chunk.meta.make_table(chunk_slot); @@ -552,6 +558,7 @@ impl TableSeq { let entry_offset = chunk.meta.entry_offset(chunk_slot); let found_pair = node.entry_ptr(entry_offset).cast::<[T; 2]>().read(); + let pair_hashes = [hasher(&found_pair[0]), hasher(&found_pair[1])]; let table_offset = chunk.meta.table_offset(chunk_slot); node.close_entry_pair_gap_and_make_table_gap_resize( @@ -565,7 +572,7 @@ impl TableSeq { let table_alloc = &mut self.allocators[allocator_index ^ 1]; let (entry_ptr, table) = - SmallSubtable::new(found_pair, value, hash, hasher, table_alloc); + SmallSubtable::new(found_pair, pair_hashes, value, hash, table_alloc); table_ptr.write(Subtable::Small(table)); chunk.meta.make_table(chunk_slot); @@ -852,9 +859,6 @@ impl TableSeq { node.close_entry_gap_resize(entry_offset, chunk, chunk_alloc); chunk.meta.make_single(chunk_slot); - if chunk.meta.is_empty() { - chunk_alloc.dealloc(chunk.node) - } Some(value) } EntryType::Table => { diff --git a/table_seq/src/table_seq/entry.rs b/table_seq/src/table_seq/entry.rs new file mode 100644 index 0000000..5f9f736 --- /dev/null +++ b/table_seq/src/table_seq/entry.rs @@ -0,0 +1,588 @@ +use crate::table_seq::*; + +enum VacantEntryKind<'a, T> { + EmptyChunk, + EmptyTable, + SingletonTable, + // Since we can't store the hasher and we don't want to eagerly grow every pair into a small table, + // we remember the hashes of the future entry and the two existing entries here. + PairTable(u64, [u64; 2]), + SmallTable(SmallSubtableVacantEntry<'a, T>), + LargeTable(hashbrown::hash_table::VacantEntry<'a, T>), +} + +/// A view into a vacant entry in a [`TableSeq`]'s subtable. +/// It is part of the [`Entry`] enum. +// SAFETY: The kind accurately describes the state of the subtable. +pub struct VacantEntry<'a, T> { + tables: *mut TableSeq, + subtable: usize, + kind: VacantEntryKind<'a, T>, +} + +#[allow(clippy::enum_variant_names)] // more descriptive this way +enum OccupiedEntryKind<'a, T> { + SingletonTable, + // The `usize` argument is the index into the subtable (guaranteed to be either 0 or 1). + PairTable(usize), + // On both SmallTable and LargeTable the `bool` is true iff the OccupiedEntry is the only entry in the table. + SmallTable(SmallSubtableOccupiedEntry<'a, T>, bool), + // We need MaybeUninit here because we may deallocate the HashTable, at which point the OccupiedEntry becomes invalid. + LargeTable( + MaybeUninit>, + bool, + ), +} + +/// A view into an occupied entry in a [`TableSeq`]'s subtable. +/// It is part of the [`Entry`] enum. +// SAFETY: The kind accurately describes the state of the subtable. +pub struct OccupiedEntry<'a, T> { + tables: *mut TableSeq, + subtable: usize, + entry_ptr: *mut T, + kind: OccupiedEntryKind<'a, T>, +} + +/// A view into a single entry in a [`TableSeq`]'s subtable, which may either be vacant or occupied. +/// +/// This `enum` is constructed from the [`entry`] method on [`TableSeq`]. +pub enum Entry<'a, T> { + /// A vacant entry. + Vacant(VacantEntry<'a, T>), + /// An occupied entry. + Occupied(OccupiedEntry<'a, T>), +} + +impl TableSeq { + /// Gets the entry with the given hash value in a subtable for in-place manipulation. + /// + /// If there are existing entries with the hash value and `eq` returns true for one of them, an OccupiedEntry referring to that entry is returned. + /// Otherwise, a VacantEntry is returned. + /// + /// `eq` may be called on any entries in the given subtable, but is never called on entries from other subtables. + /// + /// If VacantEntry is returned, `entry` may resize the subtable to prepare for insertion. + /// In that case, `hasher` is called on all entries in the subtable to recompute their hash values. + /// It will never be called on entries from another subtable. + pub fn entry( + &mut self, + subtable: usize, + hash: u64, + mut eq: impl FnMut(&T) -> bool, + hasher: impl Fn(&T) -> u64, + ) -> Entry<'_, T> { + assert!(subtable < self.subtables); + // SAFETY: with the subtable checked to be in bounds, every unsafe call contained below + // either requires just the global data structure invariants to hold or has documented + // requirements that pair up with the immediately preceding or following operations. + // TODO reduce unsafe scope and go into more detail + unsafe { + let chunk_slot = (subtable & CHUNK_MASK) as u32; + let chunk_index = subtable >> CHUNK_SHIFT; + let allocator_index = subtable >> ALLOCATOR_SHIFT; + + self.defragment_allocator(allocator_index); + let chunk_alloc = self.allocators.get_unchecked_mut(allocator_index); + let chunk = self.chunks.get_unchecked_mut(chunk_index); + + if chunk.meta.is_empty() { + return Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::EmptyChunk, + }); + } + + match chunk.meta.entry_type(chunk_slot) { + EntryType::Empty => { + Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::EmptyTable, + }) + } + EntryType::Single => { + let node = chunk.node(chunk_alloc); + let entry_offset = chunk.meta.entry_offset(chunk_slot); + let found_entry_ptr = node.entry_ptr(entry_offset); + + if eq(&*found_entry_ptr) { + Entry::Occupied(OccupiedEntry { + tables: self, + subtable, + entry_ptr: found_entry_ptr, + kind: OccupiedEntryKind::SingletonTable, + }) + } else { + Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::SingletonTable, + }) + } + } + EntryType::Pair => { + let node = chunk.node(chunk_alloc); + let entry_offset = chunk.meta.entry_offset(chunk_slot); + + for i in 0..2 { + let found_entry_ptr = node.entry_ptr(entry_offset + i); + if eq(&*found_entry_ptr) { + return Entry::Occupied(OccupiedEntry { + tables: self, + subtable, + entry_ptr: found_entry_ptr, + kind: OccupiedEntryKind::PairTable(i), + }); + } + } + let hash0 = hasher(&*node.entry_ptr(entry_offset)); + let hash1 = hasher(&*node.entry_ptr(entry_offset + 1)); + Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::PairTable(hash, [hash0, hash1]), + }) + } + EntryType::Table => { + let table_offset = chunk.meta.table_offset(chunk_slot); + let node = chunk.node(chunk_alloc); + let table_ptr = node.table_ptr(table_offset); + + match &mut *table_ptr { + Subtable::Large(table) => { + let is_single = table.len() == 1; + match table.entry(hash, eq, hasher) { + hashbrown::hash_table::Entry::Occupied(mut entry) => { + Entry::Occupied(OccupiedEntry { + tables: self, + subtable, + entry_ptr: &mut *entry.get_mut(), + kind: OccupiedEntryKind::LargeTable( + MaybeUninit::new(entry), + is_single, + ), + }) + } + hashbrown::hash_table::Entry::Vacant(entry) => { + Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::LargeTable(entry), + }) + } + } + } + Subtable::Small(int_table) => { + let table_alloc = &mut self.allocators[allocator_index ^ 1]; + let is_single = int_table.len() == 1; + + match int_table.entry(hash, eq, table_alloc) { + SmallSubtableEntry::Occupied(mut entry) => { + Entry::Occupied(OccupiedEntry { + tables: self, + subtable, + entry_ptr: &mut *entry.get_mut(), + kind: OccupiedEntryKind::SmallTable(entry, is_single), + }) + } + SmallSubtableEntry::Vacant(entry) => Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::SmallTable(entry), + }), + SmallSubtableEntry::FullTable(int_table) => { + let mut hash_table = HashTable::with_capacity(CHUNK_SIZE * 2); + int_table.drain_and_dealloc_with( + |value, _byte_hash| { + hash_table.insert_unique( + hasher(&value), + value, + &hasher, + ); + }, + table_alloc, + ); + let _ = int_table; // int_table has been invalidated, don't use it anymore + + // SAFETY: int_table has been drained and invalidated, we can just overwrite it + table_ptr.write(Subtable::Large(hash_table)); + let Some(Subtable::Large(hash_table)) = table_ptr.as_mut() + else { + unreachable!(); + }; + + let hashbrown::hash_table::Entry::Vacant(new_entry) = + hash_table.entry(hash, |_| false, &hasher) + else { + unreachable!() + }; + Entry::Vacant(VacantEntry { + tables: self, + subtable, + kind: VacantEntryKind::LargeTable(new_entry), + }) + } + } + } + } + } + } + } + } +} + +impl<'a, T> VacantEntry<'a, T> { + /// Inserts an entry into the subtable at the hash value corresponding to the `VacantEntry`. + /// + /// If `value` does not hash to that hash value, the table is left in an indeterminate, but memory-safe state. + pub fn insert(self, value: T) -> OccupiedEntry<'a, T> { + // SAFETY: all relevant preconditions hold by construction + // TODO reduce unsafe scope and go into more detail + unsafe { + let VacantEntry { + tables, + subtable, + kind, + } = self; + let chunk_slot = (self.subtable & CHUNK_MASK) as u32; + let chunk_index = self.subtable >> CHUNK_SHIFT; + let allocator_index = self.subtable >> ALLOCATOR_SHIFT; + + let chunk_alloc = (*tables).allocators.get_unchecked_mut(allocator_index); + let chunk = (*tables).chunks.get_unchecked_mut(chunk_index); + match kind { + VacantEntryKind::EmptyChunk => { + (*tables).entries += 1; + let size_class = SizeClass::class_for_index(0); + + chunk.node = chunk_alloc.alloc(size_class); + let entry_ptr = chunk_alloc.ptr(chunk.node); + entry_ptr.write(value); + chunk.meta.make_single(chunk_slot); + OccupiedEntry { + tables, + subtable, + entry_ptr, + kind: OccupiedEntryKind::SingletonTable, + } + } + VacantEntryKind::EmptyTable => { + (*tables).entries += 1; + let mut node = chunk.node(chunk_alloc); + + let entry_offset = chunk.meta.entry_offset(chunk_slot); + node.make_entry_gap_resize(entry_offset, chunk, chunk_alloc); + + let entry_ptr = node.entry_ptr(entry_offset); + entry_ptr.write(value); + + chunk.meta.make_single(chunk_slot); + OccupiedEntry { + tables, + subtable, + entry_ptr, + kind: OccupiedEntryKind::SingletonTable, + } + } + VacantEntryKind::SingletonTable => { + let mut node = chunk.node(chunk_alloc); + let entry_offset = chunk.meta.entry_offset(chunk_slot); + (*tables).entries += 1; + + node.make_entry_gap_resize(entry_offset, chunk, chunk_alloc); + + let entry_ptr = node.entry_ptr(entry_offset); + entry_ptr.write(value); + + chunk.meta.make_pair(chunk_slot); + OccupiedEntry { + tables, + subtable, + entry_ptr, + kind: OccupiedEntryKind::PairTable(0), + } + } + VacantEntryKind::PairTable(hash, pair_hashes) => { + let mut node = chunk.node(chunk_alloc); + let entry_offset = chunk.meta.entry_offset(chunk_slot); + (*tables).entries += 1; + + let found_pair = node.entry_ptr(entry_offset).cast::<[T; 2]>().read(); + let table_offset = chunk.meta.table_offset(chunk_slot); + + node.close_entry_pair_gap_and_make_table_gap_resize( + entry_offset, + table_offset, + chunk, + chunk_alloc, + ); + + let table_ptr = node.table_ptr(table_offset); + + let table_alloc = &mut (*tables).allocators[allocator_index ^ 1]; + let (entry_ptr, table) = + SmallSubtable::new(found_pair, pair_hashes, value, hash, table_alloc); + + table_ptr.write(Subtable::Small(table)); + chunk.meta.make_table(chunk_slot); + + let Subtable::Small(table) = table_ptr.as_mut().unwrap() else { + unreachable!() + }; + + OccupiedEntry { + tables, + subtable, + entry_ptr, + kind: OccupiedEntryKind::SmallTable( + SmallSubtableOccupiedEntry::from_entry_ptr(table, entry_ptr), + false, + ), + } + } + VacantEntryKind::SmallTable(vacant_entry) => { + let table_alloc = &mut (*tables).allocators[allocator_index ^ 1]; + let mut new_entry = vacant_entry.insert(value, table_alloc); + (*tables).entries += 1; + OccupiedEntry { + tables, + subtable, + entry_ptr: &mut *new_entry.get_mut(), + kind: OccupiedEntryKind::SmallTable(new_entry, false), + } + } + VacantEntryKind::LargeTable(vacant_entry) => { + let mut new_entry = vacant_entry.insert(value); + (*tables).entries += 1; + OccupiedEntry { + tables, + subtable, + entry_ptr: &mut *new_entry.get_mut(), + kind: OccupiedEntryKind::LargeTable(MaybeUninit::new(new_entry), false), + } + } + } + } + } + + /// Converts the `VacantEntry` into a mutable reference to the underlying `TableSeq`. + pub fn into_tables(self) -> &'a mut TableSeq { + // SAFETY: self.tables is valid by construction + unsafe { &mut *self.tables } + } + /// Returns the subtable index of the `VacantEntry`. + pub fn subtable(&self) -> usize { + self.subtable + } +} + +impl<'a, T> OccupiedEntry<'a, T> { + /// Gets a reference to the value of the entry. + pub fn get(&self) -> &T { + // SAFETY: entry_ptr is valid by construction + unsafe { &*self.entry_ptr } + } + /// Gets a mutable reference to the value of the entry. + pub fn get_mut(&mut self) -> &mut T { + // SAFETY: entry_ptr is valid by construction + unsafe { &mut *self.entry_ptr } + } + /// Converts the `OccupiedEntry` into a mutable reference to the value of the entry. + pub fn into_mut(self) -> &'a mut T { + // SAFETY: entry_ptr is valid by construction + unsafe { &mut *self.entry_ptr } + } + /// Converts the `OccupiedEntry` into a mutable reference to the underlying `TableSeq`. + pub fn into_tables(self) -> &'a mut TableSeq { + // SAFETY: self.tables is valid by construction + unsafe { &mut *self.tables } + } + /// Returns the subtable index of the `OccupiedEntry`. + pub fn subtable(&self) -> usize { + self.subtable + } + /// Removes the entry from the subtable, returning the value of the entry and a `VacantEntry` referring to the same slot. + pub fn remove(self) -> (T, VacantEntry<'a, T>) { + // SAFETY: all relevant preconditions hold by construction + // TODO reduce unsafe scope and go into more detail + unsafe { + let OccupiedEntry { + tables, + subtable, + entry_ptr, + kind, + } = self; + let chunk_slot = (subtable & CHUNK_MASK) as u32; + let chunk_index = subtable >> CHUNK_SHIFT; + let allocator_index = subtable >> ALLOCATOR_SHIFT; + + let chunk = (*tables).chunks.get_unchecked_mut(chunk_index); + let chunk_alloc = (*tables).allocators.get_unchecked_mut(allocator_index); + + match kind { + OccupiedEntryKind::SingletonTable => { + let mut node = chunk.node(chunk_alloc); + let entry_offset = chunk.meta.entry_offset(chunk_slot); + + (*tables).entries -= 1; + let value = entry_ptr.read(); + + node.close_entry_gap_resize(entry_offset, chunk, chunk_alloc); + + chunk.meta.make_empty(chunk_slot); + let kind = if chunk.meta.is_empty() { + chunk_alloc.dealloc(chunk.node); + VacantEntryKind::EmptyChunk + } else { + VacantEntryKind::EmptyTable + }; + ( + value, + VacantEntry { + tables, + subtable, + kind, + }, + ) + } + OccupiedEntryKind::PairTable(index) => { + let mut node = chunk.node(chunk_alloc); + let entry_offset = chunk.meta.entry_offset(chunk_slot) + index; + + (*tables).entries -= 1; + let value = entry_ptr.read(); + + node.close_entry_gap_resize(entry_offset, chunk, chunk_alloc); + + chunk.meta.make_single(chunk_slot); + ( + value, + VacantEntry { + tables, + subtable, + kind: VacantEntryKind::SingletonTable, + }, + ) + } + OccupiedEntryKind::SmallTable(entry, will_delete) => { + let mut node = chunk.node(chunk_alloc); + let table_alloc = &mut (*tables).allocators[allocator_index ^ 1]; + (*tables).entries -= 1; + let (removed, entry) = entry.remove(table_alloc); + + // TODO earlier shrinking + let kind = if will_delete { + let table = entry.into_table(); + let table_offset = chunk.meta.table_offset(chunk_slot); + table.drop_and_dealloc(table_alloc); + let chunk_alloc = (*tables).allocators.get_unchecked_mut(allocator_index); + node.close_table_gap_resize(table_offset, chunk, chunk_alloc); + chunk.meta.make_empty(chunk_slot); + if chunk.meta.is_empty() { + chunk_alloc.dealloc(chunk.node); + VacantEntryKind::EmptyChunk + } else { + VacantEntryKind::EmptyTable + } + } else { + VacantEntryKind::SmallTable(entry) + }; + ( + removed, + VacantEntry { + tables, + subtable, + kind, + }, + ) + } + OccupiedEntryKind::LargeTable(entry, will_delete) => { + (*tables).entries -= 1; + let (removed, entry) = entry.assume_init().remove(); + + // TODO external -> internal shrinking + let kind = if will_delete { + let _ = entry; // we're deleting the table, thus entry becomes unusable + let mut node = chunk.node(chunk_alloc); + let table_offset = chunk.meta.table_offset(chunk_slot); + let table_ptr = node.table_ptr(table_offset); + table_ptr.drop_in_place(); + node.close_table_gap_resize(table_offset, chunk, chunk_alloc); + chunk.meta.make_empty(chunk_slot); + if chunk.meta.is_empty() { + chunk_alloc.dealloc(chunk.node); + VacantEntryKind::EmptyChunk + } else { + VacantEntryKind::EmptyTable + } + } else { + VacantEntryKind::LargeTable(entry) + }; + ( + removed, + VacantEntry { + tables, + subtable, + kind, + }, + ) + } + } + } + } +} + +impl<'a, T> Entry<'a, T> { + /// Converts the `Entry` into a mutable reference to the underlying `TableSeq`. + pub fn into_tables(self) -> &'a mut TableSeq { + match self { + Entry::Vacant(entry) => entry.into_tables(), + Entry::Occupied(entry) => entry.into_tables(), + } + } + /// Returns the subtable index of the `Entry`. + pub fn subtable(self) -> usize { + match self { + Entry::Vacant(entry) => entry.subtable(), + Entry::Occupied(entry) => entry.subtable(), + } + } + /// Inserts an entry into the subtable at the hash value corresponding to the `Entry`, overwriting any existing value. + /// + /// If `value` does not hash to that hash value, the table is left in an indeterminate, but memory-safe state. + pub fn insert(self, value: T) -> OccupiedEntry<'a, T> { + match self { + Entry::Vacant(entry) => entry.insert(value), + Entry::Occupied(mut entry) => { + *entry.get_mut() = value; + entry + } + } + } + /// Ensures a value is in the entry by inserting the default if empty, and returns an `OccupiedEntry`. + pub fn or_insert(self, default: T) -> OccupiedEntry<'a, T> { + match self { + Entry::Vacant(entry) => entry.insert(default), + Entry::Occupied(entry) => entry, + } + } + /// Ensures a value is in the entry by inserting the result of the default function if empty, and returns an `OccupiedEntry`. + pub fn or_insert_with(self, default: impl FnOnce() -> T) -> OccupiedEntry<'a, T> { + match self { + Entry::Vacant(entry) => entry.insert(default()), + Entry::Occupied(entry) => entry, + } + } + /// Provides in-place mutable access to an occupied entry before any potential inserts into the `TableSeq`. + pub fn and_modify(self, f: impl FnOnce(&mut T)) -> Self { + match self { + Entry::Vacant(entry) => Entry::Vacant(entry), + Entry::Occupied(mut entry) => { + f(entry.get_mut()); + Entry::Occupied(entry) + } + } + } +} diff --git a/table_seq/src/table_seq/iter.rs b/table_seq/src/table_seq/iter.rs index 8967b98..877fddf 100644 --- a/table_seq/src/table_seq/iter.rs +++ b/table_seq/src/table_seq/iter.rs @@ -5,7 +5,7 @@ pub struct SubtableIter<'a, T> { inner: SubtableIterInner<'a, T>, } -impl<'a, T> Default for SubtableIter<'a, T> { +impl Default for SubtableIter<'_, T> { fn default() -> Self { Self { inner: SubtableIterInner::Small([].as_slice().iter()), @@ -62,7 +62,7 @@ impl<'a, T> Iterator for SubtableIter<'a, T> { } } -impl<'a, T> ExactSizeIterator for SubtableIter<'a, T> { +impl ExactSizeIterator for SubtableIter<'_, T> { fn len(&self) -> usize { match &self.inner { SubtableIterInner::Small(iter) => iter.len(), @@ -76,7 +76,7 @@ pub struct SubtableIterMut<'a, T> { inner: SubtableIterMutInner<'a, T>, } -impl<'a, T> Default for SubtableIterMut<'a, T> { +impl Default for SubtableIterMut<'_, T> { fn default() -> Self { Self { inner: SubtableIterMutInner::Small(Default::default()), @@ -133,7 +133,7 @@ impl<'a, T> Iterator for SubtableIterMut<'a, T> { } } -impl<'a, T> ExactSizeIterator for SubtableIterMut<'a, T> { +impl ExactSizeIterator for SubtableIterMut<'_, T> { fn len(&self) -> usize { match &self.inner { SubtableIterMutInner::Small(iter) => iter.len(), diff --git a/table_seq/src/table_seq/table.rs b/table_seq/src/table_seq/table.rs index f40b4df..43a5106 100644 --- a/table_seq/src/table_seq/table.rs +++ b/table_seq/src/table_seq/table.rs @@ -56,9 +56,9 @@ fn find_byte_among_16(needle: u8, haystack: &[u8; 16]) -> u16 { impl SmallSubtable { pub fn new( pair: [T; 2], + pair_hashes: [u64; 2], third: T, third_hash: u64, - hasher: impl Fn(&T) -> u64, allocator: &mut NodeAllocator, ) -> (*mut T, Self) { let node = allocator.alloc(SizeClass::at_least_3()); @@ -68,7 +68,7 @@ impl SmallSubtable { let mut hashes = [0; SMALL_SUBTABLE_CAPACITY]; for i in 0..2 { - hashes[i] = byte_hash_from_hash(hasher(&pair[i])); + hashes[i] = byte_hash_from_hash(pair_hashes[i]); } hashes[2] = byte_hash_from_hash(third_hash); @@ -228,6 +228,50 @@ impl SmallSubtable { } } + /// # Safety + /// Callers need to ensure that the `SmallSubtable` is valid, that the correct allocator is + /// passed and that the nodes owned by this subtable are not modified except by calling + /// `SmallSubtable` methods. + pub unsafe fn entry( + &mut self, + hash: u64, + mut eq: impl FnMut(&T) -> bool, + allocator: &mut NodeAllocator, + ) -> SmallSubtableEntry<'_, T> { + let byte_hash = byte_hash_from_hash(hash); + let mut matches = find_byte_among_16(byte_hash, &self.hashes); + + // SAFETY: we require our node to be alive in the given allocator + let node_ptr = unsafe { allocator.ptr(self.node) }; + + while let Some(found_match) = NonZeroU16::new(matches) { + matches &= matches - 1; + let match_index = found_match.trailing_zeros() as usize; + if match_index >= self.len as usize { + break; + } + + // SAFETY: we just checked that the match_index is still in bounds + let entry_ptr = unsafe { node_ptr.add(match_index) }; + // SAFETY: so we can also safely dereference it + if eq(unsafe { &*entry_ptr }) { + return SmallSubtableEntry::Occupied(SmallSubtableOccupiedEntry { + table: self, + entry_ptr, + }); + } + } + + if self.len() == SMALL_SUBTABLE_CAPACITY { + SmallSubtableEntry::FullTable(self) + } else { + SmallSubtableEntry::Vacant(SmallSubtableVacantEntry { + table: self, + byte_hash, + }) + } + } + /// # Safety /// Callers need to ensure that the `SmallSubtable` is valid, that the correct allocator is /// passed and that the nodes owned by this subtable are not modified except by calling @@ -462,3 +506,118 @@ impl SmallSubtable { self.node = new_node_ref; } } + +pub struct SmallSubtableOccupiedEntry<'a, T> { + table: &'a mut SmallSubtable, + entry_ptr: *mut T, +} + +pub struct SmallSubtableVacantEntry<'a, T> { + table: &'a mut SmallSubtable, + byte_hash: u8, +} + +pub enum SmallSubtableEntry<'a, T> { + Occupied(SmallSubtableOccupiedEntry<'a, T>), + Vacant(SmallSubtableVacantEntry<'a, T>), + FullTable(&'a mut SmallSubtable), +} + +impl<'a, T> SmallSubtableVacantEntry<'a, T> { + pub fn into_table(self) -> &'a mut SmallSubtable { + self.table + } + // # Safety + // The referenced subtable must be alive in this allocator, i.e. allocated but not yet deallocated + pub unsafe fn insert( + self, + value: T, + allocator: &mut NodeAllocator, + ) -> SmallSubtableOccupiedEntry<'a, T> { + let SmallSubtableVacantEntry { table, byte_hash } = self; + + // SAFETY: we require our node to be alive in the given allocator + let node_ptr = unsafe { allocator.ptr(table.node) }; + + let target_offset = table.len as usize; + debug_assert!(target_offset < SMALL_SUBTABLE_CAPACITY); + + table.len += 1; + + table.hashes[target_offset] = byte_hash; + + let size_class = table.node.size_class(); + + if target_offset < size_class.len() { + // SAFETY: we have still capacity left for an additional slot at target_offset + let entry_ptr = unsafe { node_ptr.add(target_offset) }; + // SAFETY: and can thus safely write to it + unsafe { entry_ptr.write(value) }; + SmallSubtableOccupiedEntry { table, entry_ptr } + } else { + // we only grow by one at a time so no need to loop + let required_size_class = size_class.next(); + + let new_node = allocator.alloc(required_size_class); + + // SAFETY: just allocated above, so valid + let new_node_ptr = unsafe { allocator.ptr(new_node) }; + // SAFETY: valid by our own requirements + let node_ptr = unsafe { allocator.ptr(table.node) }; + + // SAFETY: the new node has a larger size class and is a new allocation so it's valid + // target and the copy is in bounds + unsafe { new_node_ptr.copy_from_nonoverlapping(node_ptr, target_offset) }; + + // SAFETY: valid up to this point by our own requirements + unsafe { allocator.dealloc(table.node) }; + table.node = new_node; + + // SAFETY: in bounds since this was one past the end for the previous node and the new + // node is of a larger size class. + let entry_ptr = unsafe { new_node_ptr.add(target_offset) }; + // SAFETY: so we can also safely write to it + unsafe { entry_ptr.write(value) }; + SmallSubtableOccupiedEntry { table, entry_ptr } + } + } +} + +impl<'a, T> SmallSubtableOccupiedEntry<'a, T> { + pub fn get_mut(&mut self) -> &mut T { + // SAFETY: entry_ptr is valid by construction + unsafe { &mut *self.entry_ptr } + } + // SAFETY: entry_ptr has to be a valid pointer in the table + pub unsafe fn from_entry_ptr(table: &'a mut SmallSubtable, entry_ptr: *mut T) -> Self { + SmallSubtableOccupiedEntry { table, entry_ptr } + } + // # Safety + // The referenced subtable must be alive in this allocator, i.e. allocated but not yet deallocated + pub unsafe fn remove( + self, + allocator: &mut NodeAllocator, + ) -> (T, SmallSubtableVacantEntry<'a, T>) { + let SmallSubtableOccupiedEntry { table, entry_ptr } = self; + // SAFETY: guaranteed by caller + let node_ptr = unsafe { allocator.ptr(table.node) }; + // SAFETY: entry_ptr is known to point to a valid entry and we're about to shrink the table, preventing future use. + let value = unsafe { entry_ptr.read() }; + // SAFETY: since we know we're non-empty this will be in bounds + let last_ptr = unsafe { node_ptr.add(table.len as usize - 1) }; + // SAFETY: if the item we just read taking ownership from was the last item, we're + // moving the now uninitialized item in place, otherwise the source is initialized + // and the target is uninitialized, with both being in bounds + unsafe { + last_ptr + .cast::>() + .copy_to(entry_ptr.cast::>(), 1) + }; + // SAFETY: entry_ptr pointed at a valid entry, thus we know it must be in bounds of the allocation + let match_index = unsafe { entry_ptr.offset_from(node_ptr) as usize }; + let byte_hash = table.hashes[match_index]; + table.hashes[match_index] = table.hashes[table.len as usize - 1]; + table.len -= 1; + (value, SmallSubtableVacantEntry { table, byte_hash }) + } +} diff --git a/table_seq/tests/test.rs b/table_seq/tests/test.rs index e8e3adf..07cee41 100644 --- a/table_seq/tests/test.rs +++ b/table_seq/tests/test.rs @@ -195,6 +195,49 @@ impl TestTableSeq { _ => panic!(), } } + + pub fn insert_with_entry(&mut self, subtable: usize, item: T) + where + T: Hash + Eq + Debug + Clone, + { + let mut inserted_spec = false; + let mut inserted_dut = false; + self.spec[subtable] + .entry(hash_ref(&item), |found| *found == item, hash_ref) + .or_insert_with(|| { + inserted_spec = true; + item.clone() + }); + self.under_test + .entry(subtable, hash_ref(&item), |found| found == &item, hash_ref) + .or_insert_with(|| { + inserted_dut = true; + item + }); + assert_eq!(inserted_spec, inserted_dut); + } + + pub fn remove_with_entry(&mut self, subtable: usize, item: &T) + where + T: Hash + Eq + Debug, + { + let removed = + match self + .under_test + .entry(subtable, hash_ref(item), |found| found == item, hash_ref) + { + table_seq::table_seq::Entry::Occupied(entry) => Some(entry.remove().0), + table_seq::table_seq::Entry::Vacant(_) => None, + }; + match self.spec[subtable].entry(hash_ref(item), |found| found == item, hash_ref) { + hashbrown::hash_table::Entry::Occupied(found) => { + assert_eq!(Some(found.remove().0), removed); + } + hashbrown::hash_table::Entry::Vacant(_) => { + assert!(removed.is_none()); + } + } + } } #[derive(Clone, Copy, Debug)] @@ -283,6 +326,74 @@ fn test_removal() { } } +#[test] +fn test_insertion_with_entry() { + let mut table = >::default(); + let size = 10000; + table.resize(size / 2); + for i in 1..size { + for j in 1..size { + if i % j == 0 { + table.grow_for_subtable(j); + table.insert_with_entry(j, i); + } + } + } + table.test_iter_all(); + + let size = 1000; + + for i in 1..size { + for j in 1..size { + if (i ^ j) % 7 == 0 { + table.grow_for_subtable(i); + table.insert_with_entry(i, j); + } + } + } + + table.test_iter_all(); +} + +#[test] +fn test_removal_with_entry() { + let mut flip_flop = false; + for size in [2, 3, 4, 5, 10, 100, 1000, 10000] { + let mut table = >::default(); + + for i in 1..size.min(1000000 / size) { + for j in 1..size { + if (i ^ j) % 7 == 0 { + table.grow_for_subtable(i); + table.insert(i, j); + } + } + } + table.resize(table.spec.len() - table.spec.len() / 10); + for p in [11, 5, 3, 2, 7] { + for i in 1..size.min(1000000 / size) { + table.grow_for_subtable(i); + for j in 1..size { + if (i ^ j) % p == 0 { + table.remove_with_entry(i, &j); + } + if (i ^ !j) % p == 0 && table.spec[i].len() <= p { + flip_flop = !flip_flop; + if flip_flop { + table.clear_subtable(i); + } else { + table.test_drain(i); + } + continue; + } + } + } + table.test_iter_all(); + } + assert_eq!(table.under_test.flat_len(), 0); + } +} + #[test] fn test_mutation() { let mut table = >>::default();