Skip to content

Commit e859e65

Browse files
committed
stable_set: make small index type generic for testing, write better tests for the set type
1 parent 26c2096 commit e859e65

File tree

6 files changed

+392
-118
lines changed

6 files changed

+392
-118
lines changed

Cargo.lock

Lines changed: 17 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

stable_set/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ hashbrown = { version = "0.14.5", default-features = false, features = ["inline-
1414
[dev-dependencies]
1515
zwohash = "0.1.2"
1616
rand = "0.8.5"
17-
rand_pcg = "0.3.1"
17+
rand_pcg = "0.3.1"
18+
indexmap = "2.6.0"

stable_set/src/index_table.rs

Lines changed: 83 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,91 @@
11
use hashbrown::hash_table::HashTable;
22

3+
/// Trait for a small index type. You can assume `T: SmallIndex` means `T = u32` most of the time.
4+
///
5+
/// The only exception is for testing this crate, where we use `T = u8`.
6+
///
7+
/// This is basically `TryInto<usize> + TryFrom<usize>` with some convenience methods
8+
pub trait SmallIndex: Copy + std::fmt::Debug + TryInto<usize> + TryFrom<usize> {
9+
fn into_usize(self) -> usize {
10+
// the unreachable! should be straightforwardly impossible bc the conversion is infallible
11+
// it would be nice if we could just rely on Into<usize> existing
12+
// but std doesn't define it for u32 because you might be on a 16-bit platform...
13+
self.try_into().unwrap_or_else(|_| unreachable!())
14+
}
15+
fn from_usize_unwrap(index: usize) -> Self {
16+
// unfortunately .try_into().unwrap() doesn't work in the generic case
17+
// because TryFrom::Error might not be Debug
18+
index
19+
.try_into()
20+
.unwrap_or_else(|_| panic!("shouldn't happen: small index overflow in IndexTable"))
21+
}
22+
}
23+
24+
impl SmallIndex for u8 {}
25+
impl SmallIndex for u16 {}
26+
impl SmallIndex for u32 {}
27+
impl SmallIndex for usize {}
28+
29+
// We could just use u32 directly for the small index,
30+
// but this is slightly less error-prone and allows us to use a smaller type for testing.
331
#[derive(Debug, Clone)]
4-
pub enum IndexTable {
5-
Small(HashTable<u32>),
32+
pub enum IndexTable<S> {
33+
Small(HashTable<S>),
634
Large(HashTable<usize>),
735
}
836

937
#[derive(Debug)]
10-
pub enum OccupiedEntry<'a> {
11-
Small(hashbrown::hash_table::OccupiedEntry<'a, u32>),
38+
pub enum OccupiedEntry<'a, S> {
39+
Small(hashbrown::hash_table::OccupiedEntry<'a, S>),
1240
Large(hashbrown::hash_table::OccupiedEntry<'a, usize>),
1341
}
1442

1543
#[derive(Debug)]
16-
pub enum VacantEntry<'a> {
17-
Small(hashbrown::hash_table::VacantEntry<'a, u32>),
44+
pub enum VacantEntry<'a, S> {
45+
Small(hashbrown::hash_table::VacantEntry<'a, S>),
1846
Large(hashbrown::hash_table::VacantEntry<'a, usize>),
1947
}
2048

2149
#[derive(Debug)]
2250
#[allow(dead_code)]
23-
pub enum AbsentEntry<'a> {
24-
Small(hashbrown::hash_table::AbsentEntry<'a, u32>),
51+
pub enum AbsentEntry<'a, S> {
52+
Small(hashbrown::hash_table::AbsentEntry<'a, S>),
2553
Large(hashbrown::hash_table::AbsentEntry<'a, usize>),
2654
}
2755

2856
#[derive(Debug)]
2957
#[allow(dead_code)]
30-
pub enum Entry<'a> {
31-
Occupied(OccupiedEntry<'a>),
32-
Vacant(VacantEntry<'a>),
58+
pub enum Entry<'a, S> {
59+
Occupied(OccupiedEntry<'a, S>),
60+
Vacant(VacantEntry<'a, S>),
3361
}
3462

35-
impl Default for IndexTable {
63+
impl<S> Default for IndexTable<S> {
3664
fn default() -> Self {
3765
IndexTable::Small(HashTable::new())
3866
}
3967
}
4068

41-
impl IndexTable {
69+
impl<S: SmallIndex> IndexTable<S> {
4270
pub fn with_capacity(capacity: usize) -> Self {
43-
if Self::try_as_small(capacity).is_ok() {
71+
if S::try_from(capacity).is_ok() {
4472
IndexTable::Small(HashTable::with_capacity(capacity))
4573
} else {
4674
IndexTable::Large(HashTable::with_capacity(capacity))
4775
}
4876
}
4977
#[inline(always)]
50-
fn as_small(index: usize) -> u32 {
51-
Self::try_as_small(index).unwrap()
52-
}
53-
#[inline(always)]
54-
fn try_as_small(index: usize) -> Result<u32, std::num::TryFromIntError> {
55-
u32::try_from(index)
56-
}
57-
#[inline(always)]
5878
pub fn entry(
5979
&mut self,
6080
hash: u64,
6181
mut eq: impl FnMut(usize) -> bool,
6282
hasher: impl Fn(usize) -> u64,
63-
) -> Entry<'_> {
83+
) -> Entry<'_, S> {
6484
match self {
6585
IndexTable::Small(table) => match table.entry(
6686
hash,
67-
|&index| eq(index as usize),
68-
|&index| hasher(index as usize),
87+
|&index| eq(index.into_usize()),
88+
|&index| hasher(index.into_usize()),
6989
) {
7090
hashbrown::hash_table::Entry::Occupied(entry) => {
7191
Entry::Occupied(OccupiedEntry::Small(entry))
@@ -91,12 +111,14 @@ impl IndexTable {
91111
&mut self,
92112
hash: u64,
93113
mut eq: impl FnMut(usize) -> bool,
94-
) -> Result<OccupiedEntry<'_>, AbsentEntry<'_>> {
114+
) -> Result<OccupiedEntry<'_, S>, AbsentEntry<'_, S>> {
95115
match self {
96-
IndexTable::Small(table) => match table.find_entry(hash, |&index| eq(index as usize)) {
97-
Ok(entry) => Ok(OccupiedEntry::Small(entry)),
98-
Err(entry) => Err(AbsentEntry::Small(entry)),
99-
},
116+
IndexTable::Small(table) => {
117+
match table.find_entry(hash, |&index| eq(index.into_usize())) {
118+
Ok(entry) => Ok(OccupiedEntry::Small(entry)),
119+
Err(entry) => Err(AbsentEntry::Small(entry)),
120+
}
121+
}
100122
IndexTable::Large(table) => match table.find_entry(hash, |&index| eq(index)) {
101123
Ok(entry) => Ok(OccupiedEntry::Large(entry)),
102124
Err(entry) => Err(AbsentEntry::Large(entry)),
@@ -107,8 +129,8 @@ impl IndexTable {
107129
pub fn find(&self, hash: u64, mut eq: impl FnMut(usize) -> bool) -> Option<usize> {
108130
match self {
109131
IndexTable::Small(table) => table
110-
.find(hash, |&index| eq(index as usize))
111-
.map(|&index| index as usize),
132+
.find(hash, |&index| eq(index.into_usize()))
133+
.map(|&index| index.into_usize()),
112134
IndexTable::Large(table) => table.find(hash, |&index| eq(index)).copied(),
113135
}
114136
}
@@ -121,9 +143,13 @@ impl IndexTable {
121143
value: usize,
122144
) -> Option<usize> {
123145
match self {
124-
IndexTable::Small(table) => table
125-
.find_mut(hash, |&index| eq(index as usize))
126-
.map(|index_ref| std::mem::replace(index_ref, Self::as_small(value)) as usize),
146+
IndexTable::Small(table) => {
147+
table
148+
.find_mut(hash, |&index| eq(index.into_usize()))
149+
.map(|index_ref| {
150+
std::mem::replace(index_ref, S::from_usize_unwrap(value)).into_usize()
151+
})
152+
}
127153
IndexTable::Large(table) => table
128154
.find_mut(hash, |&index| eq(index))
129155
.map(|index_ref| std::mem::replace(index_ref, value)),
@@ -138,7 +164,7 @@ impl IndexTable {
138164
}
139165
#[inline(always)]
140166
pub fn grow_for(&mut self, index: usize, hasher: impl Fn(usize) -> u64) {
141-
if Self::try_as_small(index).is_err() && self.is_small() {
167+
if S::try_from(index).is_err() && self.is_small() {
142168
self.grow_cold(hasher)
143169
}
144170
}
@@ -155,7 +181,7 @@ impl IndexTable {
155181
};
156182
new_table.reserve(old_table.len(), |&j| hasher(j));
157183
for i in old_table {
158-
new_table.insert_unique(hasher(i as usize), i as usize, |&j| hasher(j));
184+
new_table.insert_unique(hasher(i.into_usize()), i.into_usize(), |&j| hasher(j));
159185
}
160186
}
161187
#[inline(always)]
@@ -175,9 +201,9 @@ impl IndexTable {
175201
#[inline(always)]
176202
pub fn retain(&mut self, mut f: impl FnMut(usize) -> Option<usize>) {
177203
match self {
178-
IndexTable::Small(table) => table.retain(|index| match f(*index as usize) {
204+
IndexTable::Small(table) => table.retain(|index| match f((*index).into_usize()) {
179205
Some(new_index) => {
180-
*index = Self::as_small(new_index);
206+
*index = S::from_usize_unwrap(new_index);
181207
true
182208
}
183209
None => false,
@@ -195,7 +221,9 @@ impl IndexTable {
195221
pub fn reserve(&mut self, additional: usize, hasher: impl Fn(usize) -> u64) {
196222
self.grow_for((self.len() + additional).saturating_sub(1), &hasher);
197223
match self {
198-
IndexTable::Small(table) => table.reserve(additional, |&index| hasher(index as usize)),
224+
IndexTable::Small(table) => {
225+
table.reserve(additional, |&index| hasher(index.into_usize()))
226+
}
199227
IndexTable::Large(table) => table.reserve(additional, |&index| hasher(index)),
200228
}
201229
}
@@ -217,12 +245,12 @@ impl IndexTable {
217245
}
218246
}
219247

220-
impl<'a> VacantEntry<'a> {
248+
impl<'a, S: SmallIndex> VacantEntry<'a, S> {
221249
#[inline(always)]
222-
pub fn insert(self, index: usize) -> OccupiedEntry<'a> {
250+
pub fn insert(self, index: usize) -> OccupiedEntry<'a, S> {
223251
match self {
224252
VacantEntry::Small(entry) => {
225-
OccupiedEntry::Small(entry.insert(IndexTable::as_small(index)))
253+
OccupiedEntry::Small(entry.insert(S::from_usize_unwrap(index)))
226254
}
227255
VacantEntry::Large(entry) => OccupiedEntry::Large(entry.insert(index)),
228256
}
@@ -237,30 +265,34 @@ impl<'a> VacantEntry<'a> {
237265
value: usize,
238266
) -> Option<usize> {
239267
match self {
240-
VacantEntry::Small(entry) => entry.into_table()
241-
.find_mut(hash, |&index| eq(index as usize))
242-
.map(|index_ref| std::mem::replace(index_ref, IndexTable::as_small(value)) as usize),
243-
VacantEntry::Large(entry) => entry.into_table()
268+
VacantEntry::Small(entry) => entry
269+
.into_table()
270+
.find_mut(hash, |&index| eq(index.into_usize()))
271+
.map(|index_ref| {
272+
std::mem::replace(index_ref, S::from_usize_unwrap(value)).into_usize()
273+
}),
274+
VacantEntry::Large(entry) => entry
275+
.into_table()
244276
.find_mut(hash, |&index| eq(index))
245277
.map(|index_ref| std::mem::replace(index_ref, value)),
246278
}
247279
}
248280
}
249281

250-
impl<'a> OccupiedEntry<'a> {
282+
impl<'a, S: SmallIndex> OccupiedEntry<'a, S> {
251283
#[inline(always)]
252284
pub fn get(&self) -> usize {
253285
match self {
254-
OccupiedEntry::Small(entry) => *entry.get() as usize,
286+
OccupiedEntry::Small(entry) => (*entry.get()).into_usize(),
255287
OccupiedEntry::Large(entry) => *entry.get(),
256288
}
257289
}
258290
#[inline(always)]
259-
pub fn remove(self) -> (usize, VacantEntry<'a>) {
291+
pub fn remove(self) -> (usize, VacantEntry<'a, S>) {
260292
match self {
261293
OccupiedEntry::Small(entry) => {
262294
let (index, entry) = entry.remove();
263-
(index as usize, VacantEntry::Small(entry))
295+
(index.into_usize(), VacantEntry::Small(entry))
264296
}
265297
OccupiedEntry::Large(entry) => {
266298
let (index, entry) = entry.remove();

0 commit comments

Comments
 (0)