Skip to content

Commit bd7a367

Browse files
committed
Initial commit
0 parents  commit bd7a367

9 files changed

+416
-0
lines changed

.clang-format

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
BasedOnStyle: LLVM

.dir-locals.el

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
((c++-mode (eval add-hook 'before-save-hook #'clang-format-buffer nil t))
2+
(nil . ((compile-command . "cmake --build build")))
3+
)

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
build
2+
*~

.projectile

Whitespace-only changes.

CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.2)
2+
project(HashMap)
3+
4+
set(CMAKE_CXX_FLAGS "-std=c++14 -Wall")
5+
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
6+
7+
add_executable(benchmark benchmark.cpp)

HashMap.h

+262
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
/*
2+
Copyright (c) 2015 Erik Rigtorp <[email protected]>
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy
5+
of this software and associated documentation files (the "Software"), to deal
6+
in the Software without restriction, including without limitation the rights
7+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
copies of the Software, and to permit persons to whom the Software is
9+
furnished to do so, subject to the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included in all
12+
copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
SOFTWARE.
21+
*/
22+
23+
/*
24+
HashMap
25+
26+
A high performance hash map. Uses open addressing with linear
27+
probing.
28+
29+
Advantages:
30+
- Predictable performance. Doesn't use the allocator unless load factor
31+
grows beyond 50%. Linear probing ensures cash efficency.
32+
- Deletes items by rearranging items and marking slots as empty instead of
33+
marking items as deleted. This is keeps performance high when there
34+
is a high rate of churn (many paired inserts and deletes) since otherwise
35+
most slots would be marked deleted and probing would end up scanning
36+
most of the table.
37+
38+
Disadvantages:
39+
- Significant performance degradation at high load factors.
40+
- Maximum load factor hard coded to 50%, memory inefficient.
41+
- Memory is not reclaimed on erase.
42+
*/
43+
44+
#pragma once
45+
46+
#include <cstddef>
47+
#include <cstdint>
48+
#include <limits>
49+
#include <stdexcept>
50+
#include <vector>
51+
52+
template <typename Key, typename T, typename Hash = std::hash<Key>>
53+
class HashMap {
54+
public:
55+
using key_type = Key;
56+
using mapped_type = T;
57+
using value_type = std::pair<Key, T>;
58+
using size_type = std::size_t;
59+
using hasher = Hash;
60+
using reference = value_type &;
61+
using const_reference = const value_type &;
62+
using buckets = std::vector<value_type>;
63+
64+
template <typename ContT, typename IterVal> struct hm_iterator {
65+
using value_type = IterVal;
66+
using pointer = value_type *;
67+
using reference = value_type &;
68+
using iterator_category = std::forward_iterator_tag;
69+
70+
bool operator==(const hm_iterator &other) const {
71+
return other.hm_ == hm_ && other.idx_ == idx_;
72+
}
73+
bool operator!=(const hm_iterator &other) { return !(other == *this); }
74+
75+
hm_iterator &operator++() {
76+
++idx_;
77+
advance_past_empty();
78+
return *this;
79+
}
80+
81+
reference operator*() const { return hm_->buckets_[idx_]; }
82+
pointer operator->() const { return &hm_->buckets_[idx_]; }
83+
84+
private:
85+
explicit hm_iterator(ContT *hm) : hm_(hm) { advance_past_empty(); }
86+
explicit hm_iterator(ContT *hm, size_type idx) : hm_(hm), idx_(idx) {}
87+
88+
void advance_past_empty() {
89+
while (idx_ < hm_->buckets_.size() &&
90+
hm_->buckets_[idx_].first == hm_->empty_key_) {
91+
++idx_;
92+
}
93+
}
94+
95+
ContT *hm_ = nullptr;
96+
typename ContT::size_type idx_ = 0;
97+
friend ContT;
98+
};
99+
100+
using iterator = hm_iterator<HashMap, value_type>;
101+
using const_iterator = hm_iterator<const HashMap, const value_type>;
102+
103+
public:
104+
HashMap(size_type bucket_count, key_type empty_key) : empty_key_(empty_key) {
105+
size_t pow2 = 1;
106+
while (pow2 < bucket_count) {
107+
pow2 <<= 1;
108+
}
109+
buckets_.resize(pow2, std::make_pair(empty_key_, T()));
110+
}
111+
112+
HashMap(const HashMap &other, size_type bucket_count)
113+
: HashMap(bucket_count, other.empty_key_) {
114+
for (auto it = other.begin(); it != other.end(); ++it) {
115+
insert(*it);
116+
}
117+
}
118+
119+
// Iterators
120+
iterator begin() { return iterator(this); }
121+
122+
const_iterator begin() const { return const_iterator(this); }
123+
124+
iterator end() { return iterator(this, buckets_.size()); }
125+
126+
const_iterator end() const { return const_iterator(this, buckets_.size()); }
127+
128+
// Capacity
129+
bool empty() const { return size() == 0; }
130+
size_type size() const { return size_; }
131+
size_type max_size() const { return std::numeric_limits<size_type>::max(); }
132+
133+
// Modifiers
134+
void clear() {
135+
for (auto it = begin(); it != end(); ++it) {
136+
it->first = empty_key_;
137+
}
138+
}
139+
140+
std::pair<iterator, bool> insert(const value_type &value) {
141+
return emplace(value.first, value.second);
142+
};
143+
144+
std::pair<iterator, bool> insert(value_type &&value) {
145+
return emplace(value.first, std::move(value.second));
146+
};
147+
148+
template <typename... Args>
149+
std::pair<iterator, bool> emplace(key_type key, Args &&... args) {
150+
reserve(size_ + 1);
151+
for (size_t idx = key_to_idx(key);; idx = probe_next(idx)) {
152+
if (buckets_[idx].first == empty_key_) {
153+
buckets_[idx].second = mapped_type(std::forward<Args>(args)...);
154+
buckets_[idx].first = key;
155+
size_++;
156+
return std::make_pair(iterator(this, idx), true);
157+
} else if (buckets_[idx].first == key) {
158+
return std::make_pair(iterator(this, idx), false);
159+
}
160+
}
161+
};
162+
163+
void erase(iterator it) {
164+
size_t bucket = it.idx_;
165+
for (size_t idx = probe_next(bucket);; idx = probe_next(idx)) {
166+
if (buckets_[idx].first == empty_key_) {
167+
buckets_[bucket].first = empty_key_;
168+
size_--;
169+
return;
170+
}
171+
size_t ideal = key_to_idx(buckets_[idx].first);
172+
if (diff(bucket, ideal) < diff(idx, ideal)) {
173+
// swap, bucket is closer to ideal than idx
174+
buckets_[bucket] = buckets_[idx];
175+
bucket = idx;
176+
}
177+
}
178+
}
179+
180+
size_type erase(const key_type key) {
181+
auto it = find(key);
182+
if (it != end()) {
183+
erase(it);
184+
return 1;
185+
}
186+
return 0;
187+
}
188+
189+
void swap(HashMap &other) {
190+
std::swap(buckets_, other.buckets_);
191+
std::swap(size_, other.size_);
192+
std::swap(empty_key_, other.empty_key_);
193+
}
194+
195+
// Lookup
196+
reference at(key_type key) {
197+
iterator it = find(key);
198+
if (it != end()) {
199+
return it->second;
200+
}
201+
throw std::out_of_range();
202+
}
203+
204+
const_reference at(key_type key) const { return at(key); }
205+
206+
size_type count(key_type key) const { return find(key) == end() ? 0 : 1; }
207+
208+
iterator find(key_type key) {
209+
for (size_t idx = key_to_idx(key);; idx = probe_next(idx)) {
210+
if (buckets_[idx].first == key) {
211+
return iterator(this, idx);
212+
}
213+
if (buckets_[idx].first == empty_key_) {
214+
return end();
215+
}
216+
}
217+
}
218+
219+
const_iterator find(key_type key) const {
220+
return const_cast<HashMap *>(this)->find(key);
221+
}
222+
223+
// Bucket interface
224+
size_type bucket_count() const { return buckets_.size(); }
225+
226+
// Hash policy
227+
void rehash(size_type count) {
228+
count = std::max(count, size() * 2);
229+
HashMap other(*this, count);
230+
swap(other);
231+
}
232+
233+
void reserve(size_type count) {
234+
if (count * 2 > buckets_.size()) {
235+
HashMap other(*this, buckets_.size() * 2);
236+
swap(other);
237+
}
238+
}
239+
240+
// Observers
241+
hasher hash_function() const { return hasher(); }
242+
243+
private:
244+
inline size_t key_to_idx(key_type key) {
245+
const size_t mask = buckets_.size() - 1;
246+
return hasher()(key) & mask;
247+
}
248+
249+
inline size_t probe_next(size_t idx) {
250+
const size_t mask = buckets_.size() - 1;
251+
return (idx + 1) & mask;
252+
}
253+
254+
inline size_t diff(size_t a, size_t b) {
255+
const size_t mask = buckets_.size() - 1;
256+
return (buckets_.size() + (a - b)) & mask;
257+
}
258+
259+
key_type empty_key_;
260+
buckets buckets_;
261+
size_t size_ = 0;
262+
};

LICENSE

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2015 Erik Rigtorp <[email protected]>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
22+

README.md

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# HashMap.h
2+
3+
A hash table mostly compatible with the C++11 *std::unordered_map*
4+
interface, but with much higher performance.
5+
6+
This hash table uses open addressing with linear probing and backshift
7+
deletion. Open addressing and linear probing minimizes memory
8+
allocations and achives high cache effiency. Backshift deletion keeps
9+
performance high for delete heavy workloads by not clobbering the hash
10+
table with tombestones.
11+
12+
Please note that this hash table currently only works with POD-types,
13+
destructors are not called on *erase()*. It's not too hard to make it
14+
work with complex types.
15+
16+
## Benchmark
17+
18+
The benchmark first inserts 1M random entries in the table and then
19+
removes the last inserted item and inserts a new random entry 1
20+
billion times. This is benchmark is designed to simulate a delete
21+
heavy workload.
22+
23+
```
24+
HashMap: 77 ns/iter
25+
google::dense_hash_map: 122 ns/iter
26+
std::unordered_map: 220 ns/iter
27+
```

0 commit comments

Comments
 (0)