|
1 | 1 | /* -*- c++ -*- */ |
2 | 2 | /* |
3 | 3 | * Copyright 2010, 2011, 2015-2017, 2019, 2020 Free Software Foundation, Inc. |
4 | | - * Copyright 2023 Magnus Lundmark <[email protected]> |
| 4 | + * Copyright 2023 - 2025 Magnus Lundmark <[email protected]> |
5 | 5 | * |
6 | 6 | * This file is part of VOLK |
7 | 7 | * |
@@ -200,6 +200,101 @@ static inline float volk_arctan_poly(const float x) |
200 | 200 | return arctan; |
201 | 201 | } |
202 | 202 | //////////////////////////////////////////////////////////////////////// |
| 203 | +// sin(x) polynomial expansion |
| 204 | +//////////////////////////////////////////////////////////////////////// |
| 205 | +static inline float volk_sin_poly(const float x) |
| 206 | +{ |
| 207 | + /* |
| 208 | + * Minimax polynomial for sin(x) on [-pi/4, pi/4] |
| 209 | + * Coefficients via Remez algorithm (Sollya) |
| 210 | + * Max |error| < 7.3e-9 |
| 211 | + * sin(x) = x + x^3 * (s1 + x^2 * (s2 + x^2 * s3)) |
| 212 | + */ |
| 213 | + const float s1 = -0x1.555552p-3f; |
| 214 | + const float s2 = +0x1.110be2p-7f; |
| 215 | + const float s3 = -0x1.9ab22ap-13f; |
| 216 | + |
| 217 | + const float x2 = x * x; |
| 218 | + const float x3 = x2 * x; |
| 219 | + |
| 220 | + float poly = fmaf(x2, s3, s2); |
| 221 | + poly = fmaf(x2, poly, s1); |
| 222 | + return fmaf(x3, poly, x); |
| 223 | +} |
| 224 | +//////////////////////////////////////////////////////////////////////// |
| 225 | +// cos(x) polynomial expansion |
| 226 | +//////////////////////////////////////////////////////////////////////// |
| 227 | +static inline float volk_cos_poly(const float x) |
| 228 | +{ |
| 229 | + /* |
| 230 | + * Minimax polynomial for cos(x) on [-pi/4, pi/4] |
| 231 | + * Coefficients via Remez algorithm (Sollya) |
| 232 | + * Max |error| < 1.1e-7 |
| 233 | + * cos(x) = 1 + x^2 * (c1 + x^2 * (c2 + x^2 * c3)) |
| 234 | + */ |
| 235 | + const float c1 = -0x1.fffff4p-2f; |
| 236 | + const float c2 = +0x1.554a46p-5f; |
| 237 | + const float c3 = -0x1.661be2p-10f; |
| 238 | + |
| 239 | + const float x2 = x * x; |
| 240 | + |
| 241 | + float poly = fmaf(x2, c3, c2); |
| 242 | + poly = fmaf(x2, poly, c1); |
| 243 | + return fmaf(x2, poly, 1.0f); |
| 244 | +} |
| 245 | +//////////////////////////////////////////////////////////////////////// |
| 246 | +// sin(x) with Cody-Waite argument reduction |
| 247 | +//////////////////////////////////////////////////////////////////////// |
| 248 | +static inline float volk_sin(const float x) |
| 249 | +{ |
| 250 | + /* |
| 251 | + * Cody-Waite argument reduction: n = round(x * 2/pi), r = x - n * pi/2 |
| 252 | + * Then use sin/cos polynomials based on quadrant |
| 253 | + */ |
| 254 | + const float two_over_pi = 0x1.45f306p-1f; |
| 255 | + const float pi_over_2_hi = 0x1.921fb6p+0f; |
| 256 | + const float pi_over_2_lo = -0x1.777a5cp-25f; |
| 257 | + |
| 258 | + float n_f = rintf(x * two_over_pi); |
| 259 | + int n = (int)n_f; |
| 260 | + |
| 261 | + float r = fmaf(-n_f, pi_over_2_hi, x); |
| 262 | + r = fmaf(-n_f, pi_over_2_lo, r); |
| 263 | + |
| 264 | + float sin_r = volk_sin_poly(r); |
| 265 | + float cos_r = volk_cos_poly(r); |
| 266 | + |
| 267 | + // Quadrant selection: n&1 swaps sin/cos, n&2 negates |
| 268 | + float result = (n & 1) ? cos_r : sin_r; |
| 269 | + return (n & 2) ? -result : result; |
| 270 | +} |
| 271 | +//////////////////////////////////////////////////////////////////////// |
| 272 | +// cos(x) with Cody-Waite argument reduction |
| 273 | +//////////////////////////////////////////////////////////////////////// |
| 274 | +static inline float volk_cos(const float x) |
| 275 | +{ |
| 276 | + /* |
| 277 | + * Cody-Waite argument reduction: n = round(x * 2/pi), r = x - n * pi/2 |
| 278 | + * Then use sin/cos polynomials based on quadrant |
| 279 | + */ |
| 280 | + const float two_over_pi = 0x1.45f306p-1f; |
| 281 | + const float pi_over_2_hi = 0x1.921fb6p+0f; |
| 282 | + const float pi_over_2_lo = -0x1.777a5cp-25f; |
| 283 | + |
| 284 | + float n_f = rintf(x * two_over_pi); |
| 285 | + int n = (int)n_f; |
| 286 | + |
| 287 | + float r = fmaf(-n_f, pi_over_2_hi, x); |
| 288 | + r = fmaf(-n_f, pi_over_2_lo, r); |
| 289 | + |
| 290 | + float sin_r = volk_sin_poly(r); |
| 291 | + float cos_r = volk_cos_poly(r); |
| 292 | + |
| 293 | + // Quadrant selection: n&1 swaps sin/cos, (n+1)&2 negates |
| 294 | + float result = (n & 1) ? sin_r : cos_r; |
| 295 | + return ((n + 1) & 2) ? -result : result; |
| 296 | +} |
| 297 | +//////////////////////////////////////////////////////////////////////// |
203 | 298 | // arctan(x) |
204 | 299 | //////////////////////////////////////////////////////////////////////// |
205 | 300 | static inline float volk_arctan(const float x) |
|
0 commit comments