Skip to content

Commit 0538c1f

Browse files
committed
Fix euclidean distance measures
1 parent a03fc05 commit 0538c1f

6 files changed

+9
-46
lines changed

cfavml/BENCHMARKS.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ power management:
6666
### Results
6767

6868
- [Run 2024-08-16](benchmark-runs/hetzner-cpx51-amd-cfavml-v0_2_0-2024-08-16.txt)
69+
- Note: Euclidean distance measures are omitted due to a mistake in the benchmark code.
6970

7071

7172
## Benchmarks - Intel Xeon (Skylake)
@@ -111,6 +112,7 @@ power management:
111112
### Results
112113

113114
- [Run 2024-08-16](benchmark-runs/hetzner-cx52-intel-cfavml-v0_2_0-2024-08-16.txt)
115+
- Note: Euclidean distance measures are omitted due to a mistake in the benchmark code.
114116

115117

116118
## Benchmarks - Ampere (ARM)
@@ -125,6 +127,7 @@ Ndarray compiled with openblas installed via `libopenblas-dev`.
125127
### Results
126128

127129
- [Run 2024-08-16](benchmark-runs/hetzner-cax41-ampere-cfavml-v0_2_0-2024-08-16.txt)
130+
- Note: Euclidean distance measures are omitted due to a mistake in the benchmark code.
128131

129132

130133
## Benchmarks - AMD Ryzen™ 7
@@ -170,4 +173,5 @@ power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14] [15]
170173

171174
### Results
172175

173-
- [Run 2024-08-16](benchmark-runs/hetzner-ax42-amd-cfavml-v0_2_0-2024-08-16)
176+
- [Run 2024-08-16](benchmark-runs/hetzner-ax42-amd-cfavml-v0_2_0-2024-08-16)
177+
- Note: Euclidean distance measures are omitted due to a mistake in the benchmark code.

cfavml/benches/bench_distance_ops.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ mod euclidean {
173173
let l1_view = black_box(l1.as_ref());
174174
let l2_view = black_box(l2.as_ref());
175175

176-
cfavml::cosine(l1_view, l2_view)
176+
cfavml::squared_euclidean(l1_view, l2_view)
177177
});
178178
}
179179
}

cfavml/benchmark-runs/hetzner-ax42-amd-cfavml-v0_2_0-2024-08-16

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -703,14 +703,4 @@ bench_distance_ops fastest │ slowest │ median │ mean
703703
│ │ 4.557 Gitem/s │ 4.31 Gitem/s │ 4.545 Gitem/s │ 4.537 Gitem/s │ │
704704
│ ╰─ u8 321.8 ns │ 348.7 ns │ 322.2 ns │ 322.6 ns │ 500 │ 2500000
705705
│ 4.772 Gitem/s │ 4.404 Gitem/s │ 4.766 Gitem/s │ 4.76 Gitem/s │ │
706-
╰─ euclidean │ │ │ │ │
707-
├─ cfavml │ │ │ │ │
708-
│ ├─ f32 119.4 ns │ 139 ns │ 121 ns │ 121.1 ns │ 500 │ 2500000
709-
│ │ 12.86 Gitem/s │ 11.04 Gitem/s │ 12.69 Gitem/s │ 12.67 Gitem/s │ │
710-
│ ╰─ f64 206.3 ns │ 225.2 ns │ 207.2 ns │ 207.2 ns │ 500 │ 2500000
711-
│ 7.444 Gitem/s │ 6.817 Gitem/s │ 7.412 Gitem/s │ 7.411 Gitem/s │ │
712-
╰─ ndarray │ │ │ │ │
713-
├─ f32 165.2 ns │ 195.4 ns │ 170.6 ns │ 170.7 ns │ 500 │ 2500000
714-
│ 9.294 Gitem/s │ 7.859 Gitem/s │ 9.002 Gitem/s │ 8.995 Gitem/s │ │
715-
╰─ f64 368.8 ns │ 399.6 ns │ 369.5 ns │ 369.9 ns │ 500 │ 2500000
716-
4.164 Gitem/s │ 3.843 Gitem/s │ 4.156 Gitem/s │ 4.151 Gitem/s │ │
706+

cfavml/benchmark-runs/hetzner-cax41-ampere-cfavml-v0_2_0-2024-08-16.txt

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -703,14 +703,4 @@ bench_distance_ops fastest │ slowest │ median │ mean
703703
│ │ 996.1 Mitem/s │ 970.6 Mitem/s │ 992.8 Mitem/s │ 991.9 Mitem/s │ │
704704
│ ╰─ u8 132.3 ns │ 141.8 ns │ 132.8 ns │ 133.2 ns │ 500 │ 2500000
705705
│ 11.6 Gitem/s │ 10.82 Gitem/s │ 11.56 Gitem/s │ 11.52 Gitem/s │ │
706-
╰─ euclidean │ │ │ │ │
707-
├─ cfavml │ │ │ │ │
708-
│ ├─ f32 298.7 ns │ 310.3 ns │ 299.9 ns │ 300.5 ns │ 500 │ 2500000
709-
│ │ 5.141 Gitem/s │ 4.949 Gitem/s │ 5.121 Gitem/s │ 5.111 Gitem/s │ │
710-
│ ╰─ f64 597.6 ns │ 664 ns │ 603.9 ns │ 606 ns │ 500 │ 2500000
711-
│ 2.57 Gitem/s │ 2.313 Gitem/s │ 2.543 Gitem/s │ 2.534 Gitem/s │ │
712-
╰─ ndarray │ │ │ │ │
713-
├─ f32 427.1 ns │ 445.5 ns │ 429.4 ns │ 429.9 ns │ 500 │ 2500000
714-
│ 3.595 Gitem/s │ 3.447 Gitem/s │ 3.576 Gitem/s │ 3.572 Gitem/s │ │
715-
╰─ f64 1.085 µs │ 1.175 µs │ 1.097 µs │ 1.098 µs │ 500 │ 2500000
716-
1.414 Gitem/s │ 1.306 Gitem/s │ 1.399 Gitem/s │ 1.398 Gitem/s │ │
706+

cfavml/benchmark-runs/hetzner-cpx51-amd-cfavml-v0_2_0-2024-08-16.txt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -703,14 +703,3 @@ bench_distance_ops fastest │ slowest │ median │ mean
703703
│ │ 3.165 Gitem/s │ 119.9 Mitem/s │ 3.108 Gitem/s │ 2.886 Gitem/s │ │
704704
│ ╰─ u8 667.4 ns │ 12.95 µs │ 678.3 ns │ 734.6 ns │ 500 │ 2500000
705705
│ 2.301 Gitem/s │ 118.5 Mitem/s │ 2.264 Gitem/s │ 2.09 Gitem/s │ │
706-
╰─ euclidean │ │ │ │ │
707-
├─ cfavml │ │ │ │ │
708-
│ ├─ f32 197.7 ns │ 291 ns │ 202.1 ns │ 207.5 ns │ 500 │ 2500000
709-
│ │ 7.766 Gitem/s │ 5.276 Gitem/s │ 7.599 Gitem/s │ 7.402 Gitem/s │ │
710-
│ ╰─ f64 393.6 ns │ 12.78 µs │ 402.2 ns │ 461 ns │ 500 │ 2500000
711-
│ 3.902 Gitem/s │ 120.1 Mitem/s │ 3.818 Gitem/s │ 3.331 Gitem/s │ │
712-
╰─ ndarray │ │ │ │ │
713-
├─ f32 321.7 ns │ 472.6 ns │ 329.4 ns │ 340.7 ns │ 500 │ 2500000
714-
│ 4.774 Gitem/s │ 3.249 Gitem/s │ 4.662 Gitem/s │ 4.507 Gitem/s │ │
715-
╰─ f64 565.5 ns │ 13.04 µs │ 583.7 ns │ 669.1 ns │ 500 │ 2500000
716-
2.715 Gitem/s │ 117.7 Mitem/s │ 2.631 Gitem/s │ 2.295 Gitem/s │ │

cfavml/benchmark-runs/hetzner-cx52-intel-cfavml-v0_2_0-2024-08-16.txt

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -703,14 +703,4 @@ bench_distance_ops fastest │ slowest │ median │ mean
703703
│ │ 2.677 Gitem/s │ 1.443 Gitem/s │ 2.45 Gitem/s │ 2.354 Gitem/s │ │
704704
│ ╰─ u8 571.7 ns │ 1.262 µs │ 694 ns │ 768.8 ns │ 500 │ 2500000
705705
│ 2.686 Gitem/s │ 1.216 Gitem/s │ 2.213 Gitem/s │ 1.997 Gitem/s │ │
706-
╰─ euclidean │ │ │ │ │
707-
├─ cfavml │ │ │ │ │
708-
│ ├─ f32 316.4 ns │ 538.4 ns │ 340.1 ns │ 359.4 ns │ 500 │ 2500000
709-
│ │ 4.853 Gitem/s │ 2.852 Gitem/s │ 4.515 Gitem/s │ 4.272 Gitem/s │ │
710-
│ ╰─ f64 606.3 ns │ 1.181 µs │ 700.5 ns │ 755.8 ns │ 500 │ 2500000
711-
│ 2.533 Gitem/s │ 1.3 Gitem/s │ 2.192 Gitem/s │ 2.032 Gitem/s │ │
712-
╰─ ndarray │ │ │ │ │
713-
├─ f32 535 ns │ 1.609 µs │ 689.4 ns │ 716.7 ns │ 500 │ 2500000
714-
│ 2.87 Gitem/s │ 954 Mitem/s │ 2.227 Gitem/s │ 2.142 Gitem/s │ │
715-
╰─ f64 1.003 µs │ 1.976 µs │ 1.211 µs │ 1.247 µs │ 500 │ 2500000
716-
1.529 Gitem/s │ 777.2 Mitem/s │ 1.267 Gitem/s │ 1.231 Gitem/s │ │
706+

0 commit comments

Comments
 (0)