Skip to content

Commit 6f98e29

Browse files
committed
Add benchmarks for runtime.memory_* comparison procedures
1 parent 850bc3f commit 6f98e29

File tree

1 file changed

+228
-0
lines changed

1 file changed

+228
-0
lines changed
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
package benchmark_runtime
2+
3+
import "base:runtime"
4+
import "core:bytes"
5+
import "core:fmt"
6+
import "core:log"
7+
import "core:testing"
8+
import "core:strings"
9+
import "core:text/table"
10+
import "core:time"
11+
12+
RUNS_PER_SIZE :: 2500
13+
14+
sizes := [?]int {
15+
7, 8, 9,
16+
15, 16, 17,
17+
31, 32, 33,
18+
63, 64, 65,
19+
95, 96, 97,
20+
128,
21+
256,
22+
512,
23+
1024,
24+
4096,
25+
1024 * 1024,
26+
}
27+
28+
// These are the normal, unoptimized algorithms.
29+
30+
plain_memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
31+
switch {
32+
case n == 0: return true
33+
case x == y: return true
34+
}
35+
a, b := ([^]byte)(x), ([^]byte)(y)
36+
length := uint(n)
37+
38+
for i := uint(0); i < length; i += 1 {
39+
if a[i] != b[i] {
40+
return false
41+
}
42+
}
43+
return true
44+
}
45+
46+
plain_memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
47+
switch {
48+
case a == b: return 0
49+
case a == nil: return -1
50+
case b == nil: return +1
51+
}
52+
53+
x := uintptr(a)
54+
y := uintptr(b)
55+
n := uintptr(n)
56+
57+
SU :: size_of(uintptr)
58+
fast := n/SU + 1
59+
offset := (fast-1)*SU
60+
curr_block := uintptr(0)
61+
if n < SU {
62+
fast = 0
63+
}
64+
65+
for /**/; curr_block < fast; curr_block += 1 {
66+
va := (^uintptr)(x + curr_block * size_of(uintptr))^
67+
vb := (^uintptr)(y + curr_block * size_of(uintptr))^
68+
if va ~ vb != 0 {
69+
for pos := curr_block*SU; pos < n; pos += 1 {
70+
a := (^byte)(x+pos)^
71+
b := (^byte)(y+pos)^
72+
if a ~ b != 0 {
73+
return -1 if (int(a) - int(b)) < 0 else +1
74+
}
75+
}
76+
}
77+
}
78+
79+
for /**/; offset < n; offset += 1 {
80+
a := (^byte)(x+offset)^
81+
b := (^byte)(y+offset)^
82+
if a ~ b != 0 {
83+
return -1 if (int(a) - int(b)) < 0 else +1
84+
}
85+
}
86+
87+
return 0
88+
}
89+
90+
plain_memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
91+
x := uintptr(a)
92+
n := uintptr(n)
93+
94+
SU :: size_of(uintptr)
95+
fast := n/SU + 1
96+
offset := (fast-1)*SU
97+
curr_block := uintptr(0)
98+
if n < SU {
99+
fast = 0
100+
}
101+
102+
for /**/; curr_block < fast; curr_block += 1 {
103+
va := (^uintptr)(x + curr_block * size_of(uintptr))^
104+
if va ~ 0 != 0 {
105+
for pos := curr_block*SU; pos < n; pos += 1 {
106+
a := (^byte)(x+pos)^
107+
if a ~ 0 != 0 {
108+
return -1 if int(a) < 0 else +1
109+
}
110+
}
111+
}
112+
}
113+
114+
for /**/; offset < n; offset += 1 {
115+
a := (^byte)(x+offset)^
116+
if a ~ 0 != 0 {
117+
return -1 if int(a) < 0 else +1
118+
}
119+
}
120+
121+
return 0
122+
}
123+
124+
run_trial_size_cmp :: proc(p: proc "contextless" (rawptr, rawptr, int) -> $R, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) {
125+
left := make([]u8, size)
126+
right := make([]u8, size)
127+
defer {
128+
delete(left)
129+
delete(right)
130+
}
131+
132+
right[idx] = 0x01
133+
134+
accumulator: int
135+
136+
watch: time.Stopwatch
137+
138+
time.stopwatch_start(&watch)
139+
for _ in 0..<runs {
140+
result := p(&left[0], &right[0], size)
141+
when R == bool {
142+
assert(result == false, loc = loc)
143+
accumulator += 1
144+
} else when R == int {
145+
assert(result == -1, loc = loc)
146+
accumulator += result
147+
}
148+
}
149+
time.stopwatch_stop(&watch)
150+
timing = time.stopwatch_duration(watch)
151+
152+
log.debug(accumulator)
153+
return
154+
}
155+
156+
run_trial_size_zero :: proc(p: proc "contextless" (rawptr, int) -> int, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) {
157+
data := make([]u8, size)
158+
defer delete(data)
159+
160+
data[idx] = 0x01
161+
162+
accumulator: int
163+
164+
watch: time.Stopwatch
165+
166+
time.stopwatch_start(&watch)
167+
for _ in 0..<runs {
168+
result := p(&data[0], size)
169+
assert(result == 1, loc = loc)
170+
accumulator += result
171+
}
172+
time.stopwatch_stop(&watch)
173+
timing = time.stopwatch_duration(watch)
174+
175+
log.debug(accumulator)
176+
return
177+
}
178+
179+
run_trial_size :: proc {
180+
run_trial_size_cmp,
181+
run_trial_size_zero,
182+
}
183+
184+
185+
bench_table :: proc(algo_name: string, plain, simd: $P) {
186+
string_buffer := strings.builder_make()
187+
defer strings.builder_destroy(&string_buffer)
188+
189+
tbl: table.Table
190+
table.init(&tbl)
191+
defer table.destroy(&tbl)
192+
193+
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Iterations", "Scalar", "SIMD", "SIMD Relative (%)", "SIMD Relative (x)")
194+
195+
for size in sizes {
196+
// Place the non-zero byte somewhere in the middle.
197+
needle_index := size / 2
198+
199+
plain_timing := run_trial_size(plain, size, needle_index, RUNS_PER_SIZE)
200+
simd_timing := run_trial_size(simd, size, needle_index, RUNS_PER_SIZE)
201+
202+
_plain := fmt.tprintf("%8M", plain_timing)
203+
_simd := fmt.tprintf("%8M", simd_timing)
204+
_relp := fmt.tprintf("%.3f %%", f64(simd_timing) / f64(plain_timing) * 100.0)
205+
_relx := fmt.tprintf("%.3f x", 1 / (f64(simd_timing) / f64(plain_timing)))
206+
207+
table.aligned_row_of_values(
208+
&tbl,
209+
.Right,
210+
algo_name,
211+
size, RUNS_PER_SIZE, _plain, _simd, _relp, _relx)
212+
}
213+
214+
builder_writer := strings.to_writer(&string_buffer)
215+
216+
fmt.sbprintln(&string_buffer)
217+
table.write_plain_table(builder_writer, &tbl)
218+
219+
my_table_string := strings.to_string(string_buffer)
220+
log.info(my_table_string)
221+
}
222+
223+
@test
224+
benchmark_memory_procs :: proc(t: ^testing.T) {
225+
bench_table("memory_equal", plain_memory_equal, runtime.memory_equal)
226+
bench_table("memory_compare", plain_memory_compare, runtime.memory_compare)
227+
bench_table("memory_compare_zero", plain_memory_compare_zero, runtime.memory_compare_zero)
228+
}

0 commit comments

Comments
 (0)