|
| 1 | +//! Common arithmetic operations |
| 2 | +//! |
| 3 | +//! I.e. Add, Sub, Mul, Div... |
| 4 | +
|
| 5 | +use crate::buffer::WriteOnlyBuffer; |
| 6 | +use crate::danger::core_simd_api::Hypot; |
| 7 | +use crate::danger::{generic_hypot_vertical, SimdRegister}; |
| 8 | +use crate::math::{AutoMath, Math, Numeric}; |
| 9 | +use crate::mem_loader::{IntoMemLoader, MemLoader}; |
| 10 | + |
| 11 | +macro_rules! define_hypot_impls { |
| 12 | + ( |
| 13 | + $hypot_name:ident, |
| 14 | + $imp:ident $(,)? |
| 15 | + $(target_features = $($feat:expr $(,)?)+)? |
| 16 | + ) => { |
| 17 | + #[inline] |
| 18 | + $(#[target_feature($(enable = $feat, )*)])* |
| 19 | + #[doc = include_str!("../export_docs/arithmetic_hypot_vertical.md")] |
| 20 | + $( |
| 21 | + |
| 22 | + #[doc = concat!("- ", $("**`+", $feat, "`** ", )*)] |
| 23 | + #[doc = "CPU features are available at runtime. Running on hardware _without_ this feature available will cause immediate UB."] |
| 24 | + )* |
| 25 | + pub unsafe fn $hypot_name<T, B1, B2, B3>( |
| 26 | + a: B1, |
| 27 | + b: B2, |
| 28 | + result: &mut [B3], |
| 29 | + ) |
| 30 | + where |
| 31 | + T: Copy, |
| 32 | + B1: IntoMemLoader<T>, |
| 33 | + B1::Loader: MemLoader<Value = T>, |
| 34 | + B2: IntoMemLoader<T>, |
| 35 | + B2::Loader: MemLoader<Value = T>, |
| 36 | + crate::danger::$imp: SimdRegister<T>+ Hypot<T>, |
| 37 | + AutoMath: Math<T> + Numeric<T>, |
| 38 | + for<'a> &'a mut [B3]: WriteOnlyBuffer<Item = T>, |
| 39 | + { |
| 40 | + generic_hypot_vertical::<T, crate::danger::$imp, AutoMath, B1, B2, B3>( |
| 41 | + a, |
| 42 | + b, |
| 43 | + result, |
| 44 | + ) |
| 45 | + } |
| 46 | + }; |
| 47 | +} |
| 48 | + |
| 49 | +define_hypot_impls!(generic_fallback_hypot_vertical, Fallback,); |
| 50 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 51 | +define_hypot_impls!(generic_avx2_hypot_vertical, Avx2, target_features = "avx2"); |
| 52 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 53 | +define_hypot_impls!( |
| 54 | + generic_avx2fma_hypot_vertical, |
| 55 | + Avx2Fma, |
| 56 | + target_features = "avx2", |
| 57 | + "fma" |
| 58 | +); |
| 59 | +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "nightly"))] |
| 60 | +define_hypot_impls!( |
| 61 | + generic_avx512_hypot_vertical, |
| 62 | + Avx512, |
| 63 | + target_features = "avx512f", |
| 64 | + "avx512bw" |
| 65 | +); |
| 66 | +#[cfg(target_arch = "aarch64")] |
| 67 | +define_hypot_impls!(generic_neon_hypot_vertical, Neon, target_features = "neon"); |
| 68 | + |
| 69 | +#[cfg(test)] |
| 70 | +mod tests { |
| 71 | + use num_traits::{Float, FloatConst}; |
| 72 | + |
| 73 | + use super::*; |
| 74 | + |
| 75 | + macro_rules! define_inner_test { |
| 76 | + ($variant:ident, op = $op:ident, ty = $t:ident) => { |
| 77 | + paste::paste! { |
| 78 | + #[test] |
| 79 | + fn [< $variant _ $op _value_ $t >]() { |
| 80 | + let (l1, _) = crate::test_utils::get_sample_vectors::<$t>(533); |
| 81 | + |
| 82 | + let mut result = vec![$t::default(); 533]; |
| 83 | + unsafe { [< $variant _ $op _vertical >](&l1, 2 as $t, &mut result) }; |
| 84 | + |
| 85 | + let expected = l1.iter() |
| 86 | + .copied() |
| 87 | + .map(|v| AutoMath::$op(v, 2 as $t)) |
| 88 | + .collect::<Vec<_>>(); |
| 89 | + |
| 90 | + for ((initial, expected), actual) in l1.iter().zip(&expected).zip(&result) { |
| 91 | + let ulps_diff = get_diff_ulps(*expected, *actual); |
| 92 | + assert!( |
| 93 | + ulps_diff.abs() <= 1, |
| 94 | + "result differs by more than 1 ULP:\n initial inputs: {}, 2\n expected: {} actual: {}\nulps diff: {}", |
| 95 | + initial, expected, actual, ulps_diff |
| 96 | + ); |
| 97 | + |
| 98 | + } |
| 99 | + |
| 100 | + } |
| 101 | + |
| 102 | + #[test] |
| 103 | + fn [< $variant _ $op _vector_ $t >]() { |
| 104 | + let (l1, l2) = crate::test_utils::get_sample_vectors::<$t>(533); |
| 105 | + |
| 106 | + let mut result = vec![$t::default(); 533]; |
| 107 | + unsafe { [< $variant _ $op _vertical >](&l1, &l2, &mut result) }; |
| 108 | + |
| 109 | + let expected = l1.iter() |
| 110 | + .copied() |
| 111 | + .zip(l2.iter().copied()) |
| 112 | + .map(|(a, b)| AutoMath::$op(a, b)) |
| 113 | + .collect::<Vec<_>>(); |
| 114 | + for ((initial, expected), actual) in l1.iter().zip(&expected).zip(&result) { |
| 115 | + let ulps_diff = get_diff_ulps(*expected, *actual); |
| 116 | + assert!( |
| 117 | + ulps_diff.abs() <= 1, |
| 118 | + "result differs by more than 1 ULP:\n initial inputs: {}, 2\n expected: {} actual: {}\nulps diff: {}", |
| 119 | + initial, expected, actual, ulps_diff |
| 120 | + ); |
| 121 | + } |
| 122 | + } |
| 123 | + #[test] |
| 124 | + fn [< $variant _ $op _subnormal_value_ $t >]() { |
| 125 | + let (l1, _) = crate::test_utils::get_subnormal_sample_vectors::<$t>(533); |
| 126 | + |
| 127 | + let mut result = vec![$t::default(); 533]; |
| 128 | + unsafe { [< $variant _ $op _vertical >](&l1, 2 as $t, &mut result) }; |
| 129 | + |
| 130 | + let expected = l1.iter() |
| 131 | + .copied() |
| 132 | + .map(|v| AutoMath::$op(v, 2 as $t)) |
| 133 | + .collect::<Vec<_>>(); |
| 134 | + |
| 135 | + for ((initial, expected), actual) in l1.iter().zip(&expected).zip(&result) { |
| 136 | + let ulps_diff = get_diff_ulps(*expected, *actual); |
| 137 | + assert!( |
| 138 | + ulps_diff.abs() <= 1, |
| 139 | + "result differs by more than 1 ULP:\n initial inputs: {}, 2\n expected: {} actual: {}\nulps diff: {}", |
| 140 | + initial, expected, actual, ulps_diff |
| 141 | + ); |
| 142 | + |
| 143 | + } |
| 144 | + |
| 145 | + } |
| 146 | + |
| 147 | + #[test] |
| 148 | + fn [< $variant _ $op _subnormal_vector_ $t >]() { |
| 149 | + let (l1, l2) = crate::test_utils::get_subnormal_sample_vectors::<$t>(533); |
| 150 | + |
| 151 | + let mut result = vec![$t::default(); 533]; |
| 152 | + unsafe { [< $variant _ $op _vertical >](&l1, &l2, &mut result) }; |
| 153 | + |
| 154 | + let expected = l1.iter() |
| 155 | + .copied() |
| 156 | + .zip(l2.iter().copied()) |
| 157 | + .map(|(a, b)| AutoMath::$op(a, b)) |
| 158 | + .collect::<Vec<_>>(); |
| 159 | + for ((initial, expected), actual) in l1.iter().zip(&expected).zip(&result) { |
| 160 | + let ulps_diff = get_diff_ulps(*expected, *actual); |
| 161 | + assert!( |
| 162 | + ulps_diff.abs() <= 1, |
| 163 | + "result differs by more than 1 ULP:\n initial inputs: {}, 2\n expected: {} actual: {}\nulps diff: {}", |
| 164 | + initial, expected, actual, ulps_diff |
| 165 | + ); |
| 166 | + } |
| 167 | + } |
| 168 | + } |
| 169 | + }; |
| 170 | + } |
| 171 | + |
| 172 | + fn get_diff_ulps<T>(a: T, b: T) -> i64 |
| 173 | + where |
| 174 | + T: Float + FloatConst, |
| 175 | + { |
| 176 | + let (a_mant, a_exp, a_sign) = a.integer_decode(); |
| 177 | + let (b_mant, b_exp, b_sign) = b.integer_decode(); |
| 178 | + assert!(a_sign == b_sign); |
| 179 | + assert!(a_exp == b_exp); |
| 180 | + a_mant as i64 - b_mant as i64 |
| 181 | + } |
| 182 | + |
| 183 | + macro_rules! define_numeric_test { |
| 184 | + ($variant:ident, types = $($t:ident $(,)?)+) => { |
| 185 | + $( |
| 186 | + define_inner_test!($variant, op = hypot, ty = $t); |
| 187 | + |
| 188 | + )* |
| 189 | + }; |
| 190 | + } |
| 191 | + |
| 192 | + define_numeric_test!(generic_fallback, types = f32, f64,); |
| 193 | + #[cfg(all( |
| 194 | + any(target_arch = "x86", target_arch = "x86_64"), |
| 195 | + target_feature = "avx2" |
| 196 | + ))] |
| 197 | + define_numeric_test!(generic_avx2, types = f32, f64,); |
| 198 | + #[cfg(all( |
| 199 | + any(target_arch = "x86", target_arch = "x86_64"), |
| 200 | + target_feature = "avx2", |
| 201 | + target_feature = "fma" |
| 202 | + ))] |
| 203 | + define_numeric_test!(generic_avx2fma, types = f32, f64,); |
| 204 | + #[cfg(all( |
| 205 | + any(target_arch = "x86", target_arch = "x86_64"), |
| 206 | + feature = "nightly", |
| 207 | + target_feature = "avx512f" |
| 208 | + ))] |
| 209 | + define_numeric_test!(generic_avx512, types = f32, f64,); |
| 210 | + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] |
| 211 | + define_numeric_test!(generic_neon, types = f32, f64,); |
| 212 | +} |
0 commit comments