From be63d40977134cc8d25895f1114afeb7e3c4d969 Mon Sep 17 00:00:00 2001 From: Magnus Lundmark Date: Thu, 28 Sep 2023 08:37:58 +0200 Subject: [PATCH] renamed for consistency Signed-off-by: Magnus Lundmark --- include/volk/volk_avx2_fma_intrinsics.h | 2 +- include/volk/volk_avx_intrinsics.h | 2 +- include/volk/volk_sse_intrinsics.h | 2 +- kernels/volk/volk_32f_atan_32f.h | 12 ++++++------ 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/volk/volk_avx2_fma_intrinsics.h b/include/volk/volk_avx2_fma_intrinsics.h index 1027200f..03b24e6c 100644 --- a/include/volk/volk_avx2_fma_intrinsics.h +++ b/include/volk/volk_avx2_fma_intrinsics.h @@ -23,7 +23,7 @@ * Maximum relative error ~6.5e-7 * Polynomial evaluated via Horner's method */ -static inline __m256 _m256_arctan_approximation_avx2_fma(const __m256 x) +static inline __m256 _m256_arctan_poly_avx2_fma(const __m256 x) { const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f); const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f); diff --git a/include/volk/volk_avx_intrinsics.h b/include/volk/volk_avx_intrinsics.h index 1277a053..2fc0f064 100644 --- a/include/volk/volk_avx_intrinsics.h +++ b/include/volk/volk_avx_intrinsics.h @@ -24,7 +24,7 @@ * Maximum relative error ~6.5e-7 * Polynomial evaluated via Horner's method */ -static inline __m256 _m256_arctan_approximation_avx(const __m256 x) +static inline __m256 _m256_arctan_poly_avx(const __m256 x) { const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f); const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f); diff --git a/include/volk/volk_sse_intrinsics.h b/include/volk/volk_sse_intrinsics.h index 2cd0dd01..0ede1784 100644 --- a/include/volk/volk_sse_intrinsics.h +++ b/include/volk/volk_sse_intrinsics.h @@ -24,7 +24,7 @@ * Maximum relative error ~6.5e-7 * Polynomial evaluated via Horner's method */ -static inline __m128 _mm_arctan_approximation_sse(const __m128 x) +static inline __m128 _mm_arctan_poly_sse(const __m128 x) { const __m128 a1 = _mm_set1_ps(+0x1.ffffeap-1f); const __m128 a3 = _mm_set1_ps(-0x1.55437p-2f); diff --git a/kernels/volk/volk_32f_atan_32f.h b/kernels/volk/volk_32f_atan_32f.h index b1d68ed9..dc5987cb 100644 --- a/kernels/volk/volk_32f_atan_32f.h +++ b/kernels/volk/volk_32f_atan_32f.h @@ -77,7 +77,7 @@ volk_32f_atan_32f_a_avx2_fma(float* out, const float* in, unsigned int num_point __m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS); __m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask), _mm256_blendv_ps(one, x, swap_mask)); - __m256 result = _m256_arctan_approximation_avx2_fma(x_star); + __m256 result = _m256_arctan_poly_avx2_fma(x_star); __m256 term = _mm256_and_ps(x_star, sign_mask); term = _mm256_or_ps(pi_over_2, term); term = _mm256_sub_ps(term, result); @@ -112,7 +112,7 @@ volk_32f_atan_32f_a_avx2(float* out, const float* in, unsigned int num_points) __m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS); __m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask), _mm256_blendv_ps(one, x, swap_mask)); - __m256 result = _m256_arctan_approximation_avx(x_star); + __m256 result = _m256_arctan_poly_avx(x_star); __m256 term = _mm256_and_ps(x_star, sign_mask); term = _mm256_or_ps(pi_over_2, term); term = _mm256_sub_ps(term, result); @@ -147,7 +147,7 @@ volk_32f_atan_32f_a_sse4_1(float* out, const float* in, unsigned int num_points) __m128 swap_mask = _mm_cmpgt_ps(_mm_and_ps(x, abs_mask), one); __m128 x_star = _mm_div_ps(_mm_blendv_ps(x, one, swap_mask), _mm_blendv_ps(one, x, swap_mask)); - __m128 result = _mm_arctan_approximation_sse(x_star); + __m128 result = _mm_arctan_poly_sse(x_star); __m128 term = _mm_and_ps(x_star, sign_mask); term = _mm_or_ps(pi_over_2, term); term = _mm_sub_ps(term, result); @@ -185,7 +185,7 @@ volk_32f_atan_32f_u_avx2_fma(float* out, const float* in, unsigned int num_point __m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS); __m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask), _mm256_blendv_ps(one, x, swap_mask)); - __m256 result = _m256_arctan_approximation_avx2_fma(x_star); + __m256 result = _m256_arctan_poly_avx2_fma(x_star); __m256 term = _mm256_and_ps(x_star, sign_mask); term = _mm256_or_ps(pi_over_2, term); term = _mm256_sub_ps(term, result); @@ -219,7 +219,7 @@ volk_32f_atan_32f_u_avx2(float* out, const float* in, unsigned int num_points) __m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS); __m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask), _mm256_blendv_ps(one, x, swap_mask)); - __m256 result = _m256_arctan_approximation_avx(x_star); + __m256 result = _m256_arctan_poly_avx(x_star); __m256 term = _mm256_and_ps(x_star, sign_mask); term = _mm256_or_ps(pi_over_2, term); term = _mm256_sub_ps(term, result); @@ -254,7 +254,7 @@ volk_32f_atan_32f_u_sse4_1(float* out, const float* in, unsigned int num_points) __m128 swap_mask = _mm_cmpgt_ps(_mm_and_ps(x, abs_mask), one); __m128 x_star = _mm_div_ps(_mm_blendv_ps(x, one, swap_mask), _mm_blendv_ps(one, x, swap_mask)); - __m128 result = _mm_arctan_approximation_sse(x_star); + __m128 result = _mm_arctan_poly_sse(x_star); __m128 term = _mm_and_ps(x_star, sign_mask); term = _mm_or_ps(pi_over_2, term); term = _mm_sub_ps(term, result);