Skip to content

Commit bf9eb42

Browse files
committed
Adds a test to calculate latency of different hashmaps
1 parent 73f3cbb commit bf9eb42

File tree

3 files changed

+140
-0
lines changed

3 files changed

+140
-0
lines changed

doc/latency.gnuplot

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/gnuplot
2+
3+
set terminal pngcairo enhanced font "Arial,12" size 1600,900
4+
set output 'latency_cdf.png'
5+
6+
set title "Hashmap Operation insert & erase (n=1,000,000)"
7+
set xlabel "Retired instructions (PERF\\_COUNT\\_HW\\_INSTRUCTIONS)"
8+
set ylabel "Cumulative Probability (%)"
9+
10+
# Fine-grained grid
11+
set grid xtics ytics mxtics mytics linewidth 0.5
12+
set mxtics 10 # 10 minor divisions between major x-ticks
13+
set mytics 10 # 10 minor divisions between major y-ticks
14+
set xtics auto # Auto-adjust major x-ticks based on data range
15+
# set ytics 0,10,100 format "%.0f%%"
16+
17+
set xrange [0:500]
18+
19+
set key right bottom
20+
21+
stats 'std.dat' using 1 name "TIMES" nooutput
22+
23+
# Plot CDF for all entries
24+
plot \
25+
'std.dat' using 1:(100.0*$0/TIMES_records) with lines lw 1 title "std::unordered\\_map (max: 698)", \
26+
'unordered_dense.dat' using 1:(100.0*$0/TIMES_records) with lines lw 1 title "ankerl::unordered\\_dense::map (max: 418)", \
27+
'boost_unordered_flat_map.dat' using 1:(100.0*$0/TIMES_records) with lines lw 1 title "boost::unordered\\_flat\\_map (max: 251230)", \
28+
'boost_unordered_map.dat' using 1:(100.0*$0/TIMES_records) with lines lw 1 title "boost::unordered\\_map (max: 629)", \

test/bench/latency_distribution.cpp

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#include <ankerl/unordered_dense.h>
2+
3+
#include <app/doctest.h>
4+
#include <app/print.h>
5+
#include <limits>
6+
7+
#include <boost/unordered/unordered_flat_map.hpp>
8+
#include <boost/unordered_map.hpp>
9+
10+
#include <fmt/ostream.h>
11+
#include <fmt/ranges.h>
12+
#include <third-party/nanobench.h>
13+
14+
#include <algorithm>
15+
#include <fstream>
16+
#include <unordered_map>
17+
18+
#include <x86intrin.h> // For RDTSC intrinsics
19+
20+
#if 0
21+
22+
inline uint64_t measure() {
23+
_mm_lfence(); // Ensure no reordering
24+
uint64_t tsc = __rdtsc();
25+
_mm_lfence(); // Ensure no reordering
26+
return tsc;
27+
}
28+
29+
#else
30+
31+
# include <linux/perf_event.h>
32+
# include <sys/ioctl.h>
33+
# include <sys/syscall.h>
34+
# include <unistd.h>
35+
36+
static int perf_fd;
37+
38+
void setup_perf() {
39+
struct perf_event_attr attr = {};
40+
attr.type = PERF_TYPE_HARDWARE;
41+
attr.size = sizeof(attr);
42+
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
43+
attr.disabled = 1;
44+
attr.exclude_kernel = 1; // Exclude kernel time if needed
45+
46+
perf_fd = static_cast<int>(syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0));
47+
if (perf_fd == -1) {
48+
perror("perf_event_open failed");
49+
exit(1);
50+
}
51+
52+
ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
53+
ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, 0);
54+
}
55+
56+
uint64_t measure() {
57+
uint64_t ns;
58+
read(perf_fd, &ns, sizeof(ns));
59+
return ns;
60+
}
61+
62+
/*
63+
inline uint64_t measure() {
64+
return static_cast<uint64_t>(std::chrono::steady_clock::now().time_since_epoch().count());
65+
}
66+
*/
67+
68+
#endif
69+
70+
TEST_CASE("bench_latency" * doctest::test_suite("bench") * doctest::skip()) {
71+
setup_perf();
72+
// using map_t = ankerl::unordered_dense::map<uint64_t, uint64_t>;
73+
// using map_t = std::unordered_map<uint64_t, uint64_t>;
74+
// using map_t = boost::unordered_flat_map<uint64_t, uint64_t>;
75+
using map_t = boost::unordered_map<uint64_t, uint64_t>;
76+
77+
static constexpr auto num_elements = size_t(16383);
78+
auto num_evaluations = size_t(1000000);
79+
auto measurements = std::vector<uint64_t>(num_evaluations);
80+
auto best_measurements = std::vector<uint64_t>(1, std::numeric_limits<uint64_t>::max());
81+
82+
// we just assume that both array only contain unique elements
83+
84+
auto rng = ankerl::nanobench::Rng(123);
85+
86+
auto map = map_t();
87+
88+
// do it several times, so we have some warmup
89+
for (size_t retries = 0; retries < 2; ++retries) {
90+
for (size_t eval = 0; eval < num_evaluations; ++eval) {
91+
auto before = measure();
92+
map.emplace(rng() % num_elements, 0);
93+
map.erase(rng() % num_elements);
94+
auto after = measure();
95+
96+
measurements[eval] = after - before;
97+
}
98+
99+
std::sort(measurements.begin(), measurements.end());
100+
if (measurements.back() < best_measurements.back()) {
101+
best_measurements = measurements;
102+
}
103+
test::print("min: {}, median: {}, max: {}\n",
104+
best_measurements.front(),
105+
best_measurements[best_measurements.size() / 2],
106+
best_measurements.back());
107+
}
108+
109+
auto fout = std::ofstream("times.dat");
110+
fmt::print(fout, "{}", fmt::join(best_measurements, "\n"));
111+
}

test/meson.build

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ test_sources = [
77
'app/ui/progress_bar.cpp',
88
'app/unordered_dense.cpp',
99

10+
'bench/latency_distribution.cpp',
1011
'bench/swap.cpp',
1112
'bench/show_allocations.cpp',
1213
'bench/quick_overall_map.cpp',

0 commit comments

Comments
 (0)