Skip to content

Commit 4e49fb4

Browse files
committed
Add benchmarks for runtime.memory_* comparison procedures
1 parent b15a665 commit 4e49fb4

File tree

1 file changed

+227
-0
lines changed

1 file changed

+227
-0
lines changed
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
package benchmark_runtime
2+
3+
import "base:runtime"
4+
import "core:fmt"
5+
import "core:log"
6+
import "core:testing"
7+
import "core:strings"
8+
import "core:text/table"
9+
import "core:time"
10+
11+
RUNS_PER_SIZE :: 2500
12+
13+
sizes := [?]int {
14+
7, 8, 9,
15+
15, 16, 17,
16+
31, 32, 33,
17+
63, 64, 65,
18+
95, 96, 97,
19+
128,
20+
256,
21+
512,
22+
1024,
23+
4096,
24+
1024 * 1024,
25+
}
26+
27+
// These are the normal, unoptimized algorithms.
28+
29+
plain_memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
30+
switch {
31+
case n == 0: return true
32+
case x == y: return true
33+
}
34+
a, b := ([^]byte)(x), ([^]byte)(y)
35+
length := uint(n)
36+
37+
for i := uint(0); i < length; i += 1 {
38+
if a[i] != b[i] {
39+
return false
40+
}
41+
}
42+
return true
43+
}
44+
45+
plain_memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
46+
switch {
47+
case a == b: return 0
48+
case a == nil: return -1
49+
case b == nil: return +1
50+
}
51+
52+
x := uintptr(a)
53+
y := uintptr(b)
54+
n := uintptr(n)
55+
56+
SU :: size_of(uintptr)
57+
fast := n/SU + 1
58+
offset := (fast-1)*SU
59+
curr_block := uintptr(0)
60+
if n < SU {
61+
fast = 0
62+
}
63+
64+
for /**/; curr_block < fast; curr_block += 1 {
65+
va := (^uintptr)(x + curr_block * size_of(uintptr))^
66+
vb := (^uintptr)(y + curr_block * size_of(uintptr))^
67+
if va ~ vb != 0 {
68+
for pos := curr_block*SU; pos < n; pos += 1 {
69+
a := (^byte)(x+pos)^
70+
b := (^byte)(y+pos)^
71+
if a ~ b != 0 {
72+
return -1 if (int(a) - int(b)) < 0 else +1
73+
}
74+
}
75+
}
76+
}
77+
78+
for /**/; offset < n; offset += 1 {
79+
a := (^byte)(x+offset)^
80+
b := (^byte)(y+offset)^
81+
if a ~ b != 0 {
82+
return -1 if (int(a) - int(b)) < 0 else +1
83+
}
84+
}
85+
86+
return 0
87+
}
88+
89+
plain_memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
90+
x := uintptr(a)
91+
n := uintptr(n)
92+
93+
SU :: size_of(uintptr)
94+
fast := n/SU + 1
95+
offset := (fast-1)*SU
96+
curr_block := uintptr(0)
97+
if n < SU {
98+
fast = 0
99+
}
100+
101+
for /**/; curr_block < fast; curr_block += 1 {
102+
va := (^uintptr)(x + curr_block * size_of(uintptr))^
103+
if va ~ 0 != 0 {
104+
for pos := curr_block*SU; pos < n; pos += 1 {
105+
a := (^byte)(x+pos)^
106+
if a ~ 0 != 0 {
107+
return -1 if int(a) < 0 else +1
108+
}
109+
}
110+
}
111+
}
112+
113+
for /**/; offset < n; offset += 1 {
114+
a := (^byte)(x+offset)^
115+
if a ~ 0 != 0 {
116+
return -1 if int(a) < 0 else +1
117+
}
118+
}
119+
120+
return 0
121+
}
122+
123+
run_trial_size_cmp :: proc(p: proc "contextless" (rawptr, rawptr, int) -> $R, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) {
124+
left := make([]u8, size)
125+
right := make([]u8, size)
126+
defer {
127+
delete(left)
128+
delete(right)
129+
}
130+
131+
right[idx] = 0x01
132+
133+
accumulator: int
134+
135+
watch: time.Stopwatch
136+
137+
time.stopwatch_start(&watch)
138+
for _ in 0..<runs {
139+
result := p(&left[0], &right[0], size)
140+
when R == bool {
141+
assert(result == false, loc = loc)
142+
accumulator += 1
143+
} else when R == int {
144+
assert(result == -1, loc = loc)
145+
accumulator += result
146+
}
147+
}
148+
time.stopwatch_stop(&watch)
149+
timing = time.stopwatch_duration(watch)
150+
151+
log.debug(accumulator)
152+
return
153+
}
154+
155+
run_trial_size_zero :: proc(p: proc "contextless" (rawptr, int) -> int, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) {
156+
data := make([]u8, size)
157+
defer delete(data)
158+
159+
data[idx] = 0x01
160+
161+
accumulator: int
162+
163+
watch: time.Stopwatch
164+
165+
time.stopwatch_start(&watch)
166+
for _ in 0..<runs {
167+
result := p(&data[0], size)
168+
assert(result == 1, loc = loc)
169+
accumulator += result
170+
}
171+
time.stopwatch_stop(&watch)
172+
timing = time.stopwatch_duration(watch)
173+
174+
log.debug(accumulator)
175+
return
176+
}
177+
178+
run_trial_size :: proc {
179+
run_trial_size_cmp,
180+
run_trial_size_zero,
181+
}
182+
183+
184+
bench_table :: proc(algo_name: string, plain, simd: $P) {
185+
string_buffer := strings.builder_make()
186+
defer strings.builder_destroy(&string_buffer)
187+
188+
tbl: table.Table
189+
table.init(&tbl)
190+
defer table.destroy(&tbl)
191+
192+
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Iterations", "Scalar", "SIMD", "SIMD Relative (%)", "SIMD Relative (x)")
193+
194+
for size in sizes {
195+
// Place the non-zero byte somewhere in the middle.
196+
needle_index := size / 2
197+
198+
plain_timing := run_trial_size(plain, size, needle_index, RUNS_PER_SIZE)
199+
simd_timing := run_trial_size(simd, size, needle_index, RUNS_PER_SIZE)
200+
201+
_plain := fmt.tprintf("%8M", plain_timing)
202+
_simd := fmt.tprintf("%8M", simd_timing)
203+
_relp := fmt.tprintf("%.3f %%", f64(simd_timing) / f64(plain_timing) * 100.0)
204+
_relx := fmt.tprintf("%.3f x", 1 / (f64(simd_timing) / f64(plain_timing)))
205+
206+
table.aligned_row_of_values(
207+
&tbl,
208+
.Right,
209+
algo_name,
210+
size, RUNS_PER_SIZE, _plain, _simd, _relp, _relx)
211+
}
212+
213+
builder_writer := strings.to_writer(&string_buffer)
214+
215+
fmt.sbprintln(&string_buffer)
216+
table.write_plain_table(builder_writer, &tbl)
217+
218+
my_table_string := strings.to_string(string_buffer)
219+
log.info(my_table_string)
220+
}
221+
222+
@test
223+
benchmark_memory_procs :: proc(t: ^testing.T) {
224+
bench_table("memory_equal", plain_memory_equal, runtime.memory_equal)
225+
bench_table("memory_compare", plain_memory_compare, runtime.memory_compare)
226+
bench_table("memory_compare_zero", plain_memory_compare_zero, runtime.memory_compare_zero)
227+
}

0 commit comments

Comments
 (0)