Skip to content

Commit

Permalink
renamed for consistency
Browse files Browse the repository at this point in the history
Signed-off-by: Magnus Lundmark <[email protected]>
  • Loading branch information
Ka-zam committed Sep 28, 2023
1 parent f762617 commit be63d40
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion include/volk/volk_avx2_fma_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* Maximum relative error ~6.5e-7
* Polynomial evaluated via Horner's method
*/
static inline __m256 _m256_arctan_approximation_avx2_fma(const __m256 x)
static inline __m256 _m256_arctan_poly_avx2_fma(const __m256 x)
{
const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f);
const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f);
Expand Down
2 changes: 1 addition & 1 deletion include/volk/volk_avx_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
* Maximum relative error ~6.5e-7
* Polynomial evaluated via Horner's method
*/
static inline __m256 _m256_arctan_approximation_avx(const __m256 x)
static inline __m256 _m256_arctan_poly_avx(const __m256 x)
{
const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f);
const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f);
Expand Down
2 changes: 1 addition & 1 deletion include/volk/volk_sse_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
* Maximum relative error ~6.5e-7
* Polynomial evaluated via Horner's method
*/
static inline __m128 _mm_arctan_approximation_sse(const __m128 x)
static inline __m128 _mm_arctan_poly_sse(const __m128 x)
{
const __m128 a1 = _mm_set1_ps(+0x1.ffffeap-1f);
const __m128 a3 = _mm_set1_ps(-0x1.55437p-2f);
Expand Down
12 changes: 6 additions & 6 deletions kernels/volk/volk_32f_atan_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ volk_32f_atan_32f_a_avx2_fma(float* out, const float* in, unsigned int num_point
__m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS);
__m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask),
_mm256_blendv_ps(one, x, swap_mask));
__m256 result = _m256_arctan_approximation_avx2_fma(x_star);
__m256 result = _m256_arctan_poly_avx2_fma(x_star);
__m256 term = _mm256_and_ps(x_star, sign_mask);
term = _mm256_or_ps(pi_over_2, term);
term = _mm256_sub_ps(term, result);
Expand Down Expand Up @@ -112,7 +112,7 @@ volk_32f_atan_32f_a_avx2(float* out, const float* in, unsigned int num_points)
__m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS);
__m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask),
_mm256_blendv_ps(one, x, swap_mask));
__m256 result = _m256_arctan_approximation_avx(x_star);
__m256 result = _m256_arctan_poly_avx(x_star);
__m256 term = _mm256_and_ps(x_star, sign_mask);
term = _mm256_or_ps(pi_over_2, term);
term = _mm256_sub_ps(term, result);
Expand Down Expand Up @@ -147,7 +147,7 @@ volk_32f_atan_32f_a_sse4_1(float* out, const float* in, unsigned int num_points)
__m128 swap_mask = _mm_cmpgt_ps(_mm_and_ps(x, abs_mask), one);
__m128 x_star = _mm_div_ps(_mm_blendv_ps(x, one, swap_mask),
_mm_blendv_ps(one, x, swap_mask));
__m128 result = _mm_arctan_approximation_sse(x_star);
__m128 result = _mm_arctan_poly_sse(x_star);
__m128 term = _mm_and_ps(x_star, sign_mask);
term = _mm_or_ps(pi_over_2, term);
term = _mm_sub_ps(term, result);
Expand Down Expand Up @@ -185,7 +185,7 @@ volk_32f_atan_32f_u_avx2_fma(float* out, const float* in, unsigned int num_point
__m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS);
__m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask),
_mm256_blendv_ps(one, x, swap_mask));
__m256 result = _m256_arctan_approximation_avx2_fma(x_star);
__m256 result = _m256_arctan_poly_avx2_fma(x_star);
__m256 term = _mm256_and_ps(x_star, sign_mask);
term = _mm256_or_ps(pi_over_2, term);
term = _mm256_sub_ps(term, result);
Expand Down Expand Up @@ -219,7 +219,7 @@ volk_32f_atan_32f_u_avx2(float* out, const float* in, unsigned int num_points)
__m256 swap_mask = _mm256_cmp_ps(_mm256_and_ps(x, abs_mask), one, _CMP_GT_OS);
__m256 x_star = _mm256_div_ps(_mm256_blendv_ps(x, one, swap_mask),
_mm256_blendv_ps(one, x, swap_mask));
__m256 result = _m256_arctan_approximation_avx(x_star);
__m256 result = _m256_arctan_poly_avx(x_star);
__m256 term = _mm256_and_ps(x_star, sign_mask);
term = _mm256_or_ps(pi_over_2, term);
term = _mm256_sub_ps(term, result);
Expand Down Expand Up @@ -254,7 +254,7 @@ volk_32f_atan_32f_u_sse4_1(float* out, const float* in, unsigned int num_points)
__m128 swap_mask = _mm_cmpgt_ps(_mm_and_ps(x, abs_mask), one);
__m128 x_star = _mm_div_ps(_mm_blendv_ps(x, one, swap_mask),
_mm_blendv_ps(one, x, swap_mask));
__m128 result = _mm_arctan_approximation_sse(x_star);
__m128 result = _mm_arctan_poly_sse(x_star);
__m128 term = _mm_and_ps(x_star, sign_mask);
term = _mm_or_ps(pi_over_2, term);
term = _mm_sub_ps(term, result);
Expand Down

0 comments on commit be63d40

Please sign in to comment.