Skip to content

Commit fc2763a

Browse files
committed
Improve support for /Zc:arm64-aliased-neon-types-
1 parent f639e2d commit fc2763a

File tree

3 files changed

+26
-26
lines changed

3 files changed

+26
-26
lines changed

Inc/DirectXMath.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ namespace DirectX
411411
#elif defined(_XM_SSE_INTRINSICS_)
412412
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
413413
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
414-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
414+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
415415
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
416416
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
417417
#endif
@@ -430,7 +430,7 @@ namespace DirectX
430430
#elif defined(_XM_SSE_INTRINSICS_)
431431
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
432432
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
433-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
433+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
434434
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
435435
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
436436
#endif
@@ -449,7 +449,7 @@ namespace DirectX
449449
#elif defined(_XM_SSE_INTRINSICS_)
450450
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
451451
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
452-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
452+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
453453
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
454454
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
455455
#endif
@@ -468,7 +468,7 @@ namespace DirectX
468468
#elif defined(_XM_SSE_INTRINSICS_)
469469
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
470470
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
471-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__)
471+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES))
472472
inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); }
473473
inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); }
474474
#endif

Inc/DirectXMathConvert.inl

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2A(const uint32_t* pSource) noexcept
305305
V.vector4_u32[3] = 0;
306306
return V;
307307
#elif defined(_XM_ARM_NEON_INTRINSICS_)
308-
#if defined(_MSC_VER) && !defined(__clang__)
308+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
309309
uint32x2_t x = vld1_u32_ex(pSource, 64);
310310
#else
311311
uint32x2_t x = vld1_u32(pSource);
@@ -352,7 +352,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept
352352
V.vector4_f32[3] = 0.f;
353353
return V;
354354
#elif defined(_XM_ARM_NEON_INTRINSICS_)
355-
#if defined(_MSC_VER) && !defined(__clang__)
355+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
356356
float32x2_t x = vld1_f32_ex(reinterpret_cast<const float*>(pSource), 64);
357357
#else
358358
float32x2_t x = vld1_f32(reinterpret_cast<const float*>(pSource));
@@ -465,7 +465,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3A(const uint32_t* pSource) noexcept
465465
return V;
466466
#elif defined(_XM_ARM_NEON_INTRINSICS_)
467467
// Reads an extra integer which is zero'd
468-
#if defined(_MSC_VER) && !defined(__clang__)
468+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
469469
uint32x4_t V = vld1q_u32_ex(pSource, 128);
470470
#else
471471
uint32x4_t V = vld1q_u32(pSource);
@@ -525,7 +525,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept
525525
return V;
526526
#elif defined(_XM_ARM_NEON_INTRINSICS_)
527527
// Reads an extra float which is zero'd
528-
#if defined(_MSC_VER) && !defined(__clang__)
528+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
529529
float32x4_t V = vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
530530
#else
531531
float32x4_t V = vld1q_f32(reinterpret_cast<const float*>(pSource));
@@ -639,7 +639,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4A(const uint32_t* pSource) noexcept
639639
V.vector4_u32[3] = pSource[3];
640640
return V;
641641
#elif defined(_XM_ARM_NEON_INTRINSICS_)
642-
#if defined(_MSC_VER) && !defined(__clang__)
642+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
643643
return vld1q_u32_ex(pSource, 128);
644644
#else
645645
return vreinterpretq_f32_u32(vld1q_u32(pSource));
@@ -683,7 +683,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept
683683
V.vector4_f32[3] = pSource->w;
684684
return V;
685685
#elif defined(_XM_ARM_NEON_INTRINSICS_)
686-
#if defined(_MSC_VER) && !defined(__clang__)
686+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
687687
return vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
688688
#else
689689
return vld1q_f32(reinterpret_cast<const float*>(pSource));
@@ -919,7 +919,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept
919919
return M;
920920

921921
#elif defined(_XM_ARM_NEON_INTRINSICS_)
922-
#if defined(_MSC_VER) && !defined(__clang__)
922+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
923923
float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128);
924924
float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128);
925925
float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128);
@@ -1081,7 +1081,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept
10811081
return M;
10821082

10831083
#elif defined(_XM_ARM_NEON_INTRINSICS_)
1084-
#if defined(_MSC_VER) && !defined(__clang__)
1084+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
10851085
float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128);
10861086
float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128);
10871087
#else
@@ -1212,7 +1212,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept
12121212

12131213
#elif defined(_XM_ARM_NEON_INTRINSICS_)
12141214
XMMATRIX M;
1215-
#if defined(_MSC_VER) && !defined(__clang__)
1215+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
12161216
M.r[0] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_11), 128);
12171217
M.r[1] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_21), 128);
12181218
M.r[2] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_31), 128);
@@ -1309,7 +1309,7 @@ inline void XM_CALLCONV XMStoreInt2A
13091309
pDestination[1] = V.vector4_u32[1];
13101310
#elif defined(_XM_ARM_NEON_INTRINSICS_)
13111311
uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));
1312-
#if defined(_MSC_VER) && !defined(__clang__)
1312+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
13131313
vst1_u32_ex(pDestination, VL, 64);
13141314
#else
13151315
vst1_u32(pDestination, VL);
@@ -1354,7 +1354,7 @@ inline void XM_CALLCONV XMStoreFloat2A
13541354
pDestination->y = V.vector4_f32[1];
13551355
#elif defined(_XM_ARM_NEON_INTRINSICS_)
13561356
float32x2_t VL = vget_low_f32(V);
1357-
#if defined(_MSC_VER) && !defined(__clang__)
1357+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
13581358
vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
13591359
#else
13601360
vst1_f32(reinterpret_cast<float*>(pDestination), VL);
@@ -1473,7 +1473,7 @@ inline void XM_CALLCONV XMStoreInt3A
14731473
pDestination[2] = V.vector4_u32[2];
14741474
#elif defined(_XM_ARM_NEON_INTRINSICS_)
14751475
uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));
1476-
#if defined(_MSC_VER) && !defined(__clang__)
1476+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
14771477
vst1_u32_ex(pDestination, VL, 64);
14781478
#else
14791479
vst1_u32(pDestination, VL);
@@ -1530,7 +1530,7 @@ inline void XM_CALLCONV XMStoreFloat3A
15301530
pDestination->z = V.vector4_f32[2];
15311531
#elif defined(_XM_ARM_NEON_INTRINSICS_)
15321532
float32x2_t VL = vget_low_f32(V);
1533-
#if defined(_MSC_VER) && !defined(__clang__)
1533+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
15341534
vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
15351535
#else
15361536
vst1_f32(reinterpret_cast<float*>(pDestination), VL);
@@ -1660,7 +1660,7 @@ inline void XM_CALLCONV XMStoreInt4A
16601660
pDestination[2] = V.vector4_u32[2];
16611661
pDestination[3] = V.vector4_u32[3];
16621662
#elif defined(_XM_ARM_NEON_INTRINSICS_)
1663-
#if defined(_MSC_VER) && !defined(__clang__)
1663+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
16641664
vst1q_u32_ex(pDestination, V, 128);
16651665
#else
16661666
vst1q_u32(pDestination, vreinterpretq_u32_f32(V));
@@ -1707,7 +1707,7 @@ inline void XM_CALLCONV XMStoreFloat4A
17071707
pDestination->z = V.vector4_f32[2];
17081708
pDestination->w = V.vector4_f32[3];
17091709
#elif defined(_XM_ARM_NEON_INTRINSICS_)
1710-
#if defined(_MSC_VER) && !defined(__clang__)
1710+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
17111711
vst1q_f32_ex(reinterpret_cast<float*>(pDestination), V, 128);
17121712
#else
17131713
vst1q_f32(reinterpret_cast<float*>(pDestination), V);
@@ -1917,7 +1917,7 @@ inline void XM_CALLCONV XMStoreFloat4x3A
19171917
pDestination->m[3][2] = M.r[3].vector4_f32[2];
19181918

19191919
#elif defined(_XM_ARM_NEON_INTRINSICS_)
1920-
#if defined(_MSC_VER) && !defined(__clang__)
1920+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
19211921
float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1);
19221922
float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1);
19231923
vst1q_f32_ex(&pDestination->m[0][0], T2, 128);
@@ -2061,7 +2061,7 @@ inline void XM_CALLCONV XMStoreFloat3x4A
20612061
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
20622062
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
20632063

2064-
#if defined(_MSC_VER) && !defined(__clang__)
2064+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
20652065
vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128);
20662066
vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128);
20672067
vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128);
@@ -2170,7 +2170,7 @@ inline void XM_CALLCONV XMStoreFloat4x4A
21702170
pDestination->m[3][3] = M.r[3].vector4_f32[3];
21712171

21722172
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2173-
#if defined(_MSC_VER) && !defined(__clang__)
2173+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
21742174
vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_11), M.r[0], 128);
21752175
vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_21), M.r[1], 128);
21762176
vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_31), M.r[2], 128);

Inc/DirectXMathVector.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,7 +1734,7 @@ inline XMVECTOR XM_CALLCONV XMVectorNearEqual
17341734

17351735
#elif defined(_XM_ARM_NEON_INTRINSICS_)
17361736
float32x4_t vDelta = vsubq_f32(V1, V2);
1737-
#if defined(_MSC_VER) && !defined(__clang__)
1737+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
17381738
return vacleq_f32(vDelta, Epsilon);
17391739
#else
17401740
return vreinterpretq_f32_u32(vcleq_f32(vabsq_f32(vDelta), Epsilon));
@@ -6328,7 +6328,7 @@ inline bool XM_CALLCONV XMVector2NearEqual
63286328
(dy <= Epsilon.vector4_f32[1]));
63296329
#elif defined(_XM_ARM_NEON_INTRINSICS_)
63306330
float32x2_t vDelta = vsub_f32(vget_low_f32(V1), vget_low_f32(V2));
6331-
#if defined(_MSC_VER) && !defined(__clang__)
6331+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
63326332
uint32x2_t vTemp = vacle_f32(vDelta, vget_low_u32(Epsilon));
63336333
#else
63346334
uint32x2_t vTemp = vcle_f32(vabs_f32(vDelta), vget_low_f32(Epsilon));
@@ -9057,7 +9057,7 @@ inline bool XM_CALLCONV XMVector3NearEqual
90579057
(dz <= Epsilon.vector4_f32[2])) != 0);
90589058
#elif defined(_XM_ARM_NEON_INTRINSICS_)
90599059
float32x4_t vDelta = vsubq_f32(V1, V2);
9060-
#if defined(_MSC_VER) && !defined(__clang__)
9060+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
90619061
uint32x4_t vResult = vacleq_f32(vDelta, Epsilon);
90629062
#else
90639063
uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon);
@@ -12924,7 +12924,7 @@ inline bool XM_CALLCONV XMVector4NearEqual
1292412924
(dw <= Epsilon.vector4_f32[3])) != 0);
1292512925
#elif defined(_XM_ARM_NEON_INTRINSICS_)
1292612926
float32x4_t vDelta = vsubq_f32(V1, V2);
12927-
#if defined(_MSC_VER) && !defined(__clang__)
12927+
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_ARM64_DISTINCT_NEON_TYPES)
1292812928
uint32x4_t vResult = vacleq_f32(vDelta, Epsilon);
1292912929
#else
1293012930
uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon);

0 commit comments

Comments
 (0)