Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .idea/ndarray-accel.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions .idea/runConfigurations/Coverage_x86_CFAVML_NDARRAY_w_nightly.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/runConfigurations/Test_CFAVML_w_nightly.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/runConfigurations/Test_CFAVML_wo_nightly.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ resolver = "2"
members = [
"cfavml",
"cfavml-gemm",
"cfavml-ndarray",
"cfavml-utils",
# Testing and profiling
"cfavml/asm-view"
]

[profile.test]
overflow-checks = false
2 changes: 1 addition & 1 deletion cfavml-gemm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ description = "BLAS-like general matrix multiplication extension for `cfavml`."
[dependencies]
num_cpus = "1.16.0"

cfavml = { version = "0.3", path = "../cfavml" }
cfavml = { version = "0.4", path = "../cfavml" }
cfavml-utils = { version = "0.1", path = "../cfavml-utils" }

[dev-dependencies]
Expand Down
32 changes: 32 additions & 0 deletions cfavml-ndarray/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[package]
name = "cfavml-ndarray"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
authors = ["Harrison Burt <[email protected]>"]
description = "Accelerate your ndarray operations with CFAVML's SIMD acceleration."
keywords = ["linear-algebra", "vector", "simd"]
categories = ["concurrency"]
readme = "README.md"
repository = "https://github.com/ChillFish8/cfavml"
license = "MIT OR Apache-2.0"

[dependencies]
ndarray = "0.16"

cfavml = { version = "0.4.0", path = "../cfavml" }

[features]
# Enables cfavml's nightly only optimizations, which includes AVX512 support.
nightly = ["cfavml/nightly"]

[dev-dependencies]
ndarray-rand = "0.15.0"
paste = "1.0.15"
divan = "0.1.14"

mimalloc = { version = "0.1.43", default-features = false }

[[bench]]
name = "bench_arithmetic_ops"
harness = false
11 changes: 11 additions & 0 deletions cfavml-ndarray/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# cfavml-ndarray

Accelerate your ndarray operations using CFAVML's SIMD routines.

This library acts as an extension system to your existing ndarray workloads, allowing
you to incrementally switch over to the CFAVML optimized routines brining SIMD acceleration
across all primitive integer and float types across x86 and ARM hardware.

### NOTE

Currently, this library is a WIP and requires alloc and std.
174 changes: 174 additions & 0 deletions cfavml-ndarray/benches/bench_arithmetic_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
use std::hint::black_box;

use divan::Bencher;
use divan::counter::ItemsCount;
use ndarray::Array3;
use ndarray_rand::rand_distr::Uniform;
use ndarray_rand::RandomExt;

#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;

fn main() {
divan::main();
}


#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_cfavml_two_array_no_broadcast_no_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));
let b = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher
.with_inputs(|| {
a.clone()
})
.bench_local_values(|a| {
cfavml_ndarray::ops::mul(a, black_box(&b))
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_cfavml_two_array_no_broadcast_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));
let b = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher.bench_local(|| {
let a = black_box(&a).clone();
cfavml_ndarray::ops::mul(a, black_box(&b))
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_cfavml_one_array_broadcast_value_no_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher
.with_inputs(|| {
a.clone()
})
.bench_local_values(|a| {
cfavml_ndarray::ops::mul(a, black_box(3.0))
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_cfavml_one_array_broadcast_value_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher.bench_local(|| {
let a = black_box(&a).clone();
cfavml_ndarray::ops::mul(a, black_box(3.0))
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_default_two_array_no_broadcast_no_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));
let b = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher
.with_inputs(|| {
a.clone()
})
.bench_local_values(|a| {
a * black_box(&b)
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_default_two_array_no_broadcast_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));
let b = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher.bench_local(|| {
let a = black_box(&a).clone();
a * black_box(&b)
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_default_two_array_no_broadcast_two_views(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));
let b = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher.bench_local(|| {
let a = black_box(&a);
let b = black_box(&b);
a * b
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_default_one_array_broadcast_value_no_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher
.with_inputs(|| {
a.clone()
})
.bench_local_values(|a| {
a * black_box(3.0)
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_default_one_array_broadcast_value_time_alloc(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher.bench_local(|| {
let a = black_box(&a).clone();
a * black_box(3.0)
});
}

#[divan::bench(
sample_count = 32,
sample_size = 25,
counters = [ItemsCount::new(2usize * 10 * 256 * 256)],
)]
fn bench_default_one_array_broadcast_value_with_view(bencher: Bencher) {
let a = Array3::random((10, 256, 256), Uniform::new(1.0, 10.0));

bencher.bench_local(|| {
let a = black_box(&a);
let b = black_box(3.0);
a * b
});
}
Loading
Loading