22
22
*
23
23
* Copyright:
24
24
* 2023 Yi-Yen Chung <[email protected] > (Copyright owned by Andes Technology)
25
+ *
26
+ * Note: pow(2, n) does not generate proper (exact) results with rounding
27
+ * modes other than round-to-nearest.
28
+ * See https://github.com/simd-everywhere/simde/issues/1260
25
29
*/
26
30
27
31
#if !defined(SIMDE_ARM_NEON_CVT_N_H )
@@ -40,7 +44,7 @@ simde_vcvth_n_u16_f16(simde_float16_t a, const int n)
40
44
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 16 ) {
41
45
return simde_vcvth_u16_f16 (
42
46
simde_float16_from_float32 (
43
- simde_float16_to_float32 (a ) * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n ))));
47
+ simde_float16_to_float32 (a ) * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n ))));
44
48
}
45
49
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE ) && defined(SIMDE_ARM_NEON_FP16 )
46
50
#define simde_vcvth_n_u16_f16 (a , n ) vcvth_n_u16_f16(a, n)
@@ -56,7 +60,7 @@ simde_vcvth_n_f16_s16(int16_t a, const int n)
56
60
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 16 ) {
57
61
return simde_float16_from_float32 (
58
62
HEDLEY_STATIC_CAST (simde_float32_t ,
59
- HEDLEY_STATIC_CAST (simde_float64_t , a ) / simde_math_pow ( 2 , n )));
63
+ HEDLEY_STATIC_CAST (simde_float64_t , a ) / ( UINT64_C ( 1 ) << n )));
60
64
}
61
65
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE ) && defined(SIMDE_ARM_NEON_FP16 )
62
66
#define simde_vcvth_n_f16_s16 (a , n ) vcvth_n_f16_s16(a, n)
@@ -72,7 +76,7 @@ simde_vcvth_n_f16_u16(uint16_t a, const int n)
72
76
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 16 ) {
73
77
return simde_float16_from_float32 (
74
78
HEDLEY_STATIC_CAST (simde_float32_t ,
75
- HEDLEY_STATIC_CAST (simde_float64_t , a ) / simde_math_pow ( 2 , n )));
79
+ HEDLEY_STATIC_CAST (simde_float64_t , a ) / ( UINT64_C ( 1 ) << n )));
76
80
}
77
81
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE ) && defined(SIMDE_ARM_NEON_FP16 )
78
82
#define simde_vcvth_n_f16_u16 (a , n ) vcvth_n_f16_u16(a, n)
@@ -86,7 +90,7 @@ SIMDE_FUNCTION_ATTRIBUTES
86
90
int32_t
87
91
simde_vcvts_n_s32_f32 (simde_float32_t a , const int n )
88
92
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 32 ) {
89
- return simde_vcvts_s32_f32 (a * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n )));
93
+ return simde_vcvts_s32_f32 (a * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n )));
90
94
}
91
95
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
92
96
#define simde_vcvts_n_s32_f32 (a , n ) vcvts_n_s32_f32(a, n)
@@ -100,7 +104,7 @@ SIMDE_FUNCTION_ATTRIBUTES
100
104
uint32_t
101
105
simde_vcvts_n_u32_f32 (simde_float32_t a , const int n )
102
106
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 32 ) {
103
- return simde_vcvts_u32_f32 (a * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n )));
107
+ return simde_vcvts_u32_f32 (a * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n )));
104
108
}
105
109
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
106
110
#define simde_vcvts_n_u32_f32 (a , n ) vcvts_n_u32_f32(a, n)
@@ -115,7 +119,7 @@ simde_float32_t
115
119
simde_vcvts_n_f32_s32 (int32_t a , const int n )
116
120
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 32 ) {
117
121
return HEDLEY_STATIC_CAST (simde_float32_t ,
118
- HEDLEY_STATIC_CAST (simde_float64_t , a ) / simde_math_pow ( 2 , n ));
122
+ HEDLEY_STATIC_CAST (simde_float64_t , a ) / ( UINT64_C ( 1 ) << n ));
119
123
}
120
124
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
121
125
#define simde_vcvts_n_f32_s32 (a , n ) vcvts_n_f32_s32(a, n)
@@ -130,7 +134,7 @@ simde_float32_t
130
134
simde_vcvts_n_f32_u32 (uint32_t a , const int n )
131
135
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 32 ) {
132
136
return HEDLEY_STATIC_CAST (simde_float32_t ,
133
- HEDLEY_STATIC_CAST (simde_float64_t , a ) / simde_math_pow ( 2 , n ));
137
+ HEDLEY_STATIC_CAST (simde_float64_t , a ) / ( UINT64_C ( 1 ) << n ));
134
138
}
135
139
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
136
140
#define simde_vcvts_n_f32_u32 (a , n ) vcvts_n_f32_u32(a, n)
@@ -144,7 +148,7 @@ SIMDE_FUNCTION_ATTRIBUTES
144
148
int64_t
145
149
simde_vcvtd_n_s64_f64 (simde_float64_t a , const int n )
146
150
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 64 ) {
147
- return simde_vcvtd_s64_f64 (a * simde_math_pow (2 , n ));
151
+ return simde_vcvtd_s64_f64 (a * (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
148
152
}
149
153
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
150
154
#define simde_vcvtd_n_s64_f64 (a , n ) vcvtd_n_s64_f64(a, n)
@@ -158,7 +162,7 @@ SIMDE_FUNCTION_ATTRIBUTES
158
162
uint64_t
159
163
simde_vcvtd_n_u64_f64 (simde_float64_t a , const int n )
160
164
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 64 ) {
161
- return simde_vcvtd_u64_f64 (a * simde_math_pow (2 , n ));
165
+ return simde_vcvtd_u64_f64 (a * (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
162
166
}
163
167
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
164
168
#define simde_vcvtd_n_u64_f64 (a , n ) vcvtd_n_u64_f64(a, n)
@@ -172,7 +176,7 @@ SIMDE_FUNCTION_ATTRIBUTES
172
176
simde_float64_t
173
177
simde_vcvtd_n_f64_s64 (int64_t a , const int n )
174
178
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 64 ) {
175
- return HEDLEY_STATIC_CAST (simde_float64_t , a ) / simde_math_pow (2 , n );
179
+ return HEDLEY_STATIC_CAST (simde_float64_t , a ) / (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n );
176
180
}
177
181
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
178
182
#define simde_vcvtd_n_f64_s64 (a , n ) vcvtd_n_f64_s64(a, n)
@@ -186,7 +190,7 @@ SIMDE_FUNCTION_ATTRIBUTES
186
190
simde_float64_t
187
191
simde_vcvtd_n_f64_u64 (uint64_t a , const int n )
188
192
SIMDE_REQUIRE_CONSTANT_RANGE (n , 1 , 64 ) {
189
- return HEDLEY_STATIC_CAST (simde_float64_t , a ) / simde_math_pow (2 , n );
193
+ return HEDLEY_STATIC_CAST (simde_float64_t , a ) / (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n );
190
194
}
191
195
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
192
196
#define simde_vcvtd_n_f64_u64 (a , n ) vcvtd_n_f64_u64(a, n)
@@ -205,7 +209,7 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n)
205
209
206
210
SIMDE_VECTORIZE
207
211
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
208
- r_ .values [i ] = simde_vcvts_s32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n )));
212
+ r_ .values [i ] = simde_vcvts_s32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n )));
209
213
}
210
214
211
215
return simde_int32x2_from_private (r_ );
@@ -227,7 +231,7 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n)
227
231
228
232
SIMDE_VECTORIZE
229
233
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
230
- r_ .values [i ] = simde_vcvtd_s64_f64 (a_ .values [i ] * simde_math_pow (2 , n ));
234
+ r_ .values [i ] = simde_vcvtd_s64_f64 (a_ .values [i ] * (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
231
235
}
232
236
233
237
return simde_int64x1_from_private (r_ );
@@ -251,7 +255,7 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n)
251
255
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
252
256
r_ .values [i ] = simde_vcvth_u16_f16 (simde_float16_from_float32 (
253
257
simde_float16_to_float32 (a_ .values [i ]) *
254
- HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n ))));
258
+ HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n ))));
255
259
}
256
260
257
261
return simde_uint16x4_from_private (r_ );
@@ -273,7 +277,7 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n)
273
277
274
278
SIMDE_VECTORIZE
275
279
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
276
- r_ .values [i ] = simde_vcvts_u32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n )));
280
+ r_ .values [i ] = simde_vcvts_u32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n )));
277
281
}
278
282
279
283
return simde_uint32x2_from_private (r_ );
@@ -295,7 +299,7 @@ simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n)
295
299
296
300
SIMDE_VECTORIZE
297
301
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
298
- r_ .values [i ] = simde_vcvtd_u64_f64 (a_ .values [i ] * simde_math_pow (2 , n ));
302
+ r_ .values [i ] = simde_vcvtd_u64_f64 (a_ .values [i ] * (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
299
303
}
300
304
301
305
return simde_uint64x1_from_private (r_ );
@@ -317,7 +321,7 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n)
317
321
318
322
SIMDE_VECTORIZE
319
323
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
320
- r_ .values [i ] = simde_vcvts_s32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n )));
324
+ r_ .values [i ] = simde_vcvts_s32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n )));
321
325
}
322
326
323
327
return simde_int32x4_from_private (r_ );
@@ -339,7 +343,7 @@ simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n)
339
343
340
344
SIMDE_VECTORIZE
341
345
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
342
- r_ .values [i ] = simde_vcvtd_s64_f64 (a_ .values [i ] * simde_math_pow (2 , n ));
346
+ r_ .values [i ] = simde_vcvtd_s64_f64 (a_ .values [i ] * (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
343
347
}
344
348
345
349
return simde_int64x2_from_private (r_ );
@@ -363,7 +367,7 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n)
363
367
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
364
368
r_ .values [i ] = simde_vcvth_u16_f16 (simde_float16_from_float32 (
365
369
simde_float16_to_float32 (a_ .values [i ]) *
366
- HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n ))));
370
+ HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n ))));
367
371
}
368
372
369
373
return simde_uint16x8_from_private (r_ );
@@ -385,7 +389,7 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n)
385
389
386
390
SIMDE_VECTORIZE
387
391
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
388
- r_ .values [i ] = simde_vcvts_u32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , simde_math_pow ( 2 , n )));
392
+ r_ .values [i ] = simde_vcvts_u32_f32 (a_ .values [i ] * HEDLEY_STATIC_CAST (simde_float32_t , ( UINT64_C ( 1 ) << n )));
389
393
}
390
394
391
395
return simde_uint32x4_from_private (r_ );
@@ -407,7 +411,7 @@ simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n)
407
411
408
412
SIMDE_VECTORIZE
409
413
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
410
- r_ .values [i ] = simde_vcvtd_u64_f64 (a_ .values [i ] * simde_math_pow (2 , n ));
414
+ r_ .values [i ] = simde_vcvtd_u64_f64 (a_ .values [i ] * (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
411
415
}
412
416
413
417
return simde_uint64x2_from_private (r_ );
@@ -429,7 +433,7 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n)
429
433
430
434
SIMDE_VECTORIZE
431
435
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
432
- r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32 , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n )));
436
+ r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32 , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n )));
433
437
}
434
438
435
439
return simde_float16x4_from_private (r_ );
@@ -451,7 +455,7 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n)
451
455
452
456
SIMDE_VECTORIZE
453
457
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
454
- r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n )));
458
+ r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n )));
455
459
}
456
460
457
461
return simde_float16x4_from_private (r_ );
@@ -473,7 +477,7 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n)
473
477
474
478
SIMDE_VECTORIZE
475
479
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
476
- r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n )));
480
+ r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n )));
477
481
}
478
482
479
483
return simde_float16x8_from_private (r_ );
@@ -495,7 +499,7 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n)
495
499
496
500
SIMDE_VECTORIZE
497
501
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
498
- r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32_t , ( a_ .values [i ] / simde_math_pow ( 2 , n ) )));
502
+ r_ .values [i ] = simde_float16_from_float32 (HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST ( simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n )));
499
503
}
500
504
501
505
return simde_float16x8_from_private (r_ );
@@ -517,7 +521,7 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n)
517
521
518
522
SIMDE_VECTORIZE
519
523
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
520
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n ));
524
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n ));
521
525
}
522
526
523
527
return simde_float32x2_from_private (r_ );
@@ -539,7 +543,7 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n)
539
543
540
544
SIMDE_VECTORIZE
541
545
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
542
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n ));
546
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n ));
543
547
}
544
548
545
549
return simde_float32x2_from_private (r_ );
@@ -561,7 +565,7 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n)
561
565
562
566
SIMDE_VECTORIZE
563
567
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
564
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow (2 , n ));
568
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
565
569
}
566
570
567
571
return simde_float64x1_from_private (r_ );
@@ -583,7 +587,7 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n)
583
587
584
588
SIMDE_VECTORIZE
585
589
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
586
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow (2 , n ));
590
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
587
591
}
588
592
589
593
return simde_float64x2_from_private (r_ );
@@ -605,7 +609,7 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n)
605
609
606
610
SIMDE_VECTORIZE
607
611
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
608
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow (2 , n ));
612
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
609
613
}
610
614
611
615
return simde_float64x1_from_private (r_ );
@@ -627,7 +631,7 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n)
627
631
628
632
SIMDE_VECTORIZE
629
633
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
630
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow (2 , n ));
634
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float64_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / (( n == 64 ) ? simde_math_pow (2 , n ) : UINT64_C ( 1 ) << n ));
631
635
}
632
636
633
637
return simde_float64x2_from_private (r_ );
@@ -649,7 +653,7 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n)
649
653
650
654
SIMDE_VECTORIZE
651
655
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
652
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n ));
656
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n ));
653
657
}
654
658
655
659
return simde_float32x4_from_private (r_ );
@@ -671,7 +675,7 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n)
671
675
672
676
SIMDE_VECTORIZE
673
677
for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
674
- r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / simde_math_pow ( 2 , n ));
678
+ r_ .values [i ] = HEDLEY_STATIC_CAST (simde_float32_t , HEDLEY_STATIC_CAST (simde_float64_t , a_ .values [i ]) / ( UINT64_C ( 1 ) << n ));
675
679
}
676
680
677
681
return simde_float32x4_from_private (r_ );
0 commit comments