ChillFish8
diff --git a/‎.github/workflows/miri.yaml‎
Lines changed: 1 addition & 7 deletions b/‎.github/workflows/miri.yaml‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎.idea/runConfigurations/Test_CFAVML_w_nightly.xml‎
Lines changed: 1 addition & 1 deletion b/‎.idea/runConfigurations/Test_CFAVML_w_nightly.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.idea/runConfigurations/Test_CFAVML_wo_nightly.xml‎
Lines changed: 3 additions & 1 deletion b/‎.idea/runConfigurations/Test_CFAVML_wo_nightly.xml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎cfavml-gemm/Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎cfavml-gemm/Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cfavml/Cargo.toml‎
Lines changed: 1 addition & 2 deletions b/‎cfavml/Cargo.toml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎cfavml/README.md‎
Lines changed: 36 additions & 8 deletions b/‎cfavml/README.md‎
Lines changed: 36 additions & 8 deletions
@@ -1,11 +1,5 @@
 name: Run Miri
-on:
-  pull_request:
-    branches:
-      - main
-  push:
-    branches:
-      - main
+on: workflow_dispatch
 
 jobs:
   miri:
 
@@ -7,7 +7,7 @@ description = "BLAS-like general matrix multiplication extension for `cfavml`."
 [dependencies]
 num_cpus = "1.16.0"
 
-cfavml = { version = "0.2", path = "../cfavml" }
+cfavml = { version = "0.3", path = "../cfavml" }
 cfavml-utils = { version = "0.1", path = "../cfavml-utils" }
 
 [dev-dependencies]
 
@@ -1,6 +1,6 @@
 [package]
 name = "cfavml"
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 rust-version = "1.75"
 authors = ["Harrison Burt <[email protected]>"]
@@ -21,7 +21,6 @@ rand_chacha = "0.3.1"
 paste = "1.0.14"
 divan = "0.1.14"
 num-traits = "0.2.19"
-mimalloc = { version = "0.1.43", default-features = false }
 simsimd = "5.0.1"
 
 [target.'cfg(unix)'.dev-dependencies]
 
@@ -12,19 +12,31 @@ feature flag:
 
 ##### Default Setup
 ```toml
-cfavml = "0.2.0" 
+cfavml = "0.3.0" 
 ```
 
 ##### No-std Setup
 ```toml
-cfavml = { version = "0.2.0", default-features = false }
+cfavml = { version = "0.3.0", default-features = false }
 ```
 
+### Important Version Upgrade Notes
+
+If you are upgrading on a breaking release, i.e. `0.2.0` to `0.3.0` there may be some important
+changes that affects your system, although the public _safe_ APIs I try my best to avoid breaking.
+
+- AVX512 required CPU features changed in `0.3.0+`
+  * In versions older than `0.3.0` avx512 was used when only the `avx512f` cpu feature was available
+    since this is the base/foundation version of AVX512. However, in `0.3.0` we introduced more extensive
+    cmp operations (`eq/neq/lt/lte/gt/gte`) which changed our required CPU features to include `avx512bw`
+  * **This means on _unsafe_ APIs you must update your feature checks to include `avx512bw`.** 
+  * **Safe APIs do not require changes but may fallback to AVX2 on some of the first gen AVX512 CPUs, i.e. Skylake**
+
 ### Available SIMD Architectures
 
 - AVX2
 - AVX2 + FMA
-- AVX512
+- AVX512 (`avx512f` + `avx512bw`) _nightly only_
 - NEON
 - Fallback (Typically optimized to SSE automatically by LLVM on x86)
 
@@ -50,8 +62,8 @@ with SIMD and adds a significant amount of cognitive overhead when reading the c
 Although to be honest I have some serious questions about your application if you're doing 
 heavy integer division...
 
-### Supported Operations & Distances
 
+## Supported Operations
 
 ### Spacial distances
 
@@ -80,6 +92,8 @@ These are routines that can be used for things like KNN classification or index
 - Vertical min element of two vectors
 - Vertical max element of a vector and broadcast value
 - Vertical min element of a vector and broadcast value
+- EQ/NEQ/LT/LTE/GT/GTE cmp of a vector and broadcast value
+- EQ/NEQ/LT/LTE/GT/GTE cmp of two vectors
 
 ### Aggregation
 
@@ -102,10 +116,24 @@ provided as generic functions (with no target features):
 - `generic_squared_euclidean`
 - `generic_cosine`
 - `generic_squared_norm`
-- `generic_max_horizontal`
-- `generic_max_vector`
-- `generic_min_horizontal`
-- `generic_min_vector`
+- `generic_cmp_max`
+- `generic_cmp_max_vector`
+- `generic_cmp_max_value`
+- `generic_cmp_min`
+- `generic_cmp_min_vector`
+- `generic_cmp_min_value`
+- `generic_cmp_eq_vector`
+- `generic_cmp_eq_value`
+- `generic_cmp_neq_vector`
+- `generic_cmp_neq_value`
+- `generic_cmp_lt_vector`
+- `generic_cmp_lt_value`
+- `generic_cmp_lte_vector`
+- `generic_cmp_lte_value`
+- `generic_cmp_gt_vector`
+- `generic_cmp_gt_value`
+- `generic_cmp_gte_vector`
+- `generic_cmp_gte_value`
 - `generic_sum`
 - `generic_add_value`
 - `generic_sub_value`