Skip to content

Commit b70a397

Browse files
committed
Change generation of 2^n values for fixed point conversions.
As demonstrated by test code in #1260 the behavior of pow() in non-round-to-nearest rounding modes is not exact. This causes behavior divergence from ARMv8 hardware when not using round-to-nearest. The updated forms match hardware properly across a range of values. The tests are not updated to handle rounding modes, as doing this in a cross-platform way is not trivial. However, all existing test vectors pass properly, and in more detailed testing, these changes are closer to hardware.
1 parent cf1db25 commit b70a397

File tree

1 file changed

+37
-33
lines changed

1 file changed

+37
-33
lines changed

Diff for: simde/arm/neon/cvt_n.h

+37-33
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
*
2323
* Copyright:
2424
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology)
25+
*
26+
* Note: pow(2, n) does not generate proper (exact) results with rounding
27+
* modes other than round-to-nearest.
28+
* See https://github.com/simd-everywhere/simde/issues/1260
2529
*/
2630

2731
#if !defined(SIMDE_ARM_NEON_CVT_N_H)
@@ -40,7 +44,7 @@ simde_vcvth_n_u16_f16(simde_float16_t a, const int n)
4044
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
4145
return simde_vcvth_u16_f16(
4246
simde_float16_from_float32(
43-
simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
47+
simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
4448
}
4549
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
4650
#define simde_vcvth_n_u16_f16(a, n) vcvth_n_u16_f16(a, n)
@@ -56,7 +60,7 @@ simde_vcvth_n_f16_s16(int16_t a, const int n)
5660
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
5761
return simde_float16_from_float32(
5862
HEDLEY_STATIC_CAST(simde_float32_t,
59-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)));
63+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n)));
6064
}
6165
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
6266
#define simde_vcvth_n_f16_s16(a, n) vcvth_n_f16_s16(a, n)
@@ -72,7 +76,7 @@ simde_vcvth_n_f16_u16(uint16_t a, const int n)
7276
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
7377
return simde_float16_from_float32(
7478
HEDLEY_STATIC_CAST(simde_float32_t,
75-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)));
79+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n)));
7680
}
7781
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
7882
#define simde_vcvth_n_f16_u16(a, n) vcvth_n_f16_u16(a, n)
@@ -86,7 +90,7 @@ SIMDE_FUNCTION_ATTRIBUTES
8690
int32_t
8791
simde_vcvts_n_s32_f32(simde_float32_t a, const int n)
8892
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
89-
return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
93+
return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
9094
}
9195
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
9296
#define simde_vcvts_n_s32_f32(a, n) vcvts_n_s32_f32(a, n)
@@ -100,7 +104,7 @@ SIMDE_FUNCTION_ATTRIBUTES
100104
uint32_t
101105
simde_vcvts_n_u32_f32(simde_float32_t a, const int n)
102106
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
103-
return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
107+
return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
104108
}
105109
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
106110
#define simde_vcvts_n_u32_f32(a, n) vcvts_n_u32_f32(a, n)
@@ -115,7 +119,7 @@ simde_float32_t
115119
simde_vcvts_n_f32_s32(int32_t a, const int n)
116120
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
117121
return HEDLEY_STATIC_CAST(simde_float32_t,
118-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n));
122+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n));
119123
}
120124
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
121125
#define simde_vcvts_n_f32_s32(a, n) vcvts_n_f32_s32(a, n)
@@ -130,7 +134,7 @@ simde_float32_t
130134
simde_vcvts_n_f32_u32(uint32_t a, const int n)
131135
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
132136
return HEDLEY_STATIC_CAST(simde_float32_t,
133-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n));
137+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n));
134138
}
135139
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
136140
#define simde_vcvts_n_f32_u32(a, n) vcvts_n_f32_u32(a, n)
@@ -144,7 +148,7 @@ SIMDE_FUNCTION_ATTRIBUTES
144148
int64_t
145149
simde_vcvtd_n_s64_f64(simde_float64_t a, const int n)
146150
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
147-
return simde_vcvtd_s64_f64(a * simde_math_pow(2, n));
151+
return simde_vcvtd_s64_f64(a * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
148152
}
149153
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
150154
#define simde_vcvtd_n_s64_f64(a, n) vcvtd_n_s64_f64(a, n)
@@ -158,7 +162,7 @@ SIMDE_FUNCTION_ATTRIBUTES
158162
uint64_t
159163
simde_vcvtd_n_u64_f64(simde_float64_t a, const int n)
160164
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
161-
return simde_vcvtd_u64_f64(a * simde_math_pow(2, n));
165+
return simde_vcvtd_u64_f64(a * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
162166
}
163167
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
164168
#define simde_vcvtd_n_u64_f64(a, n) vcvtd_n_u64_f64(a, n)
@@ -172,7 +176,7 @@ SIMDE_FUNCTION_ATTRIBUTES
172176
simde_float64_t
173177
simde_vcvtd_n_f64_s64(int64_t a, const int n)
174178
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
175-
return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n);
179+
return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n);
176180
}
177181
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
178182
#define simde_vcvtd_n_f64_s64(a, n) vcvtd_n_f64_s64(a, n)
@@ -186,7 +190,7 @@ SIMDE_FUNCTION_ATTRIBUTES
186190
simde_float64_t
187191
simde_vcvtd_n_f64_u64(uint64_t a, const int n)
188192
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
189-
return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n);
193+
return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n);
190194
}
191195
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
192196
#define simde_vcvtd_n_f64_u64(a, n) vcvtd_n_f64_u64(a, n)
@@ -205,7 +209,7 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n)
205209

206210
SIMDE_VECTORIZE
207211
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
208-
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
212+
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
209213
}
210214

211215
return simde_int32x2_from_private(r_);
@@ -227,7 +231,7 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n)
227231

228232
SIMDE_VECTORIZE
229233
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
230-
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n));
234+
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
231235
}
232236

233237
return simde_int64x1_from_private(r_);
@@ -251,7 +255,7 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n)
251255
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
252256
r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32(
253257
simde_float16_to_float32(a_.values[i]) *
254-
HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
258+
HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
255259
}
256260

257261
return simde_uint16x4_from_private(r_);
@@ -273,7 +277,7 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n)
273277

274278
SIMDE_VECTORIZE
275279
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
276-
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
280+
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
277281
}
278282

279283
return simde_uint32x2_from_private(r_);
@@ -295,7 +299,7 @@ simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n)
295299

296300
SIMDE_VECTORIZE
297301
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
298-
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n));
302+
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
299303
}
300304

301305
return simde_uint64x1_from_private(r_);
@@ -317,7 +321,7 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n)
317321

318322
SIMDE_VECTORIZE
319323
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
320-
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
324+
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
321325
}
322326

323327
return simde_int32x4_from_private(r_);
@@ -339,7 +343,7 @@ simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n)
339343

340344
SIMDE_VECTORIZE
341345
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
342-
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n));
346+
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
343347
}
344348

345349
return simde_int64x2_from_private(r_);
@@ -363,7 +367,7 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n)
363367
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
364368
r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32(
365369
simde_float16_to_float32(a_.values[i]) *
366-
HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
370+
HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
367371
}
368372

369373
return simde_uint16x8_from_private(r_);
@@ -385,7 +389,7 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n)
385389

386390
SIMDE_VECTORIZE
387391
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
388-
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
392+
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
389393
}
390394

391395
return simde_uint32x4_from_private(r_);
@@ -407,7 +411,7 @@ simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n)
407411

408412
SIMDE_VECTORIZE
409413
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
410-
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n));
414+
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
411415
}
412416

413417
return simde_uint64x2_from_private(r_);
@@ -429,7 +433,7 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n)
429433

430434
SIMDE_VECTORIZE
431435
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
432-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
436+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
433437
}
434438

435439
return simde_float16x4_from_private(r_);
@@ -451,7 +455,7 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n)
451455

452456
SIMDE_VECTORIZE
453457
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
454-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
458+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
455459
}
456460

457461
return simde_float16x4_from_private(r_);
@@ -473,7 +477,7 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n)
473477

474478
SIMDE_VECTORIZE
475479
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
476-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
480+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
477481
}
478482

479483
return simde_float16x8_from_private(r_);
@@ -495,7 +499,7 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n)
495499

496500
SIMDE_VECTORIZE
497501
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
498-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / simde_math_pow(2, n))));
502+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
499503
}
500504

501505
return simde_float16x8_from_private(r_);
@@ -517,7 +521,7 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n)
517521

518522
SIMDE_VECTORIZE
519523
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
520-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
524+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
521525
}
522526

523527
return simde_float32x2_from_private(r_);
@@ -539,7 +543,7 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n)
539543

540544
SIMDE_VECTORIZE
541545
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
542-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
546+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
543547
}
544548

545549
return simde_float32x2_from_private(r_);
@@ -561,7 +565,7 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n)
561565

562566
SIMDE_VECTORIZE
563567
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
564-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
568+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
565569
}
566570

567571
return simde_float64x1_from_private(r_);
@@ -583,7 +587,7 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n)
583587

584588
SIMDE_VECTORIZE
585589
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
586-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
590+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
587591
}
588592

589593
return simde_float64x2_from_private(r_);
@@ -605,7 +609,7 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n)
605609

606610
SIMDE_VECTORIZE
607611
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
608-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
612+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
609613
}
610614

611615
return simde_float64x1_from_private(r_);
@@ -627,7 +631,7 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n)
627631

628632
SIMDE_VECTORIZE
629633
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
630-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
634+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
631635
}
632636

633637
return simde_float64x2_from_private(r_);
@@ -649,7 +653,7 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n)
649653

650654
SIMDE_VECTORIZE
651655
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
652-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
656+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
653657
}
654658

655659
return simde_float32x4_from_private(r_);
@@ -671,7 +675,7 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n)
671675

672676
SIMDE_VECTORIZE
673677
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
674-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
678+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
675679
}
676680

677681
return simde_float32x4_from_private(r_);

0 commit comments

Comments
 (0)