From 24aecf57115ff728c433a5ee50f7e99e8c12ccbe Mon Sep 17 00:00:00 2001 From: Antonio Maiorano Date: Mon, 23 Sep 2024 20:26:05 -0400 Subject: [PATCH 1/2] Fix assertion on splat of groupshared scalar When splatting a groupshared scalar, we would trip an "Invalid constantexpr cast!" assertion. This would happen while evaluating the ImplicitCastExpr to turn the groupshared scalar into a vector because the scalar expression was in a different address space (groupshared) vs the target vector (local). The fix is to ensure that when looking up the vector member expression, insert an lvalue-to-rvalue cast if necessary; i.e. when a swizzle contains duplicate elements. --- tools/clang/lib/Sema/SemaHLSL.cpp | 15 +++++++++++++-- .../test/CodeGenHLSL/SplatGroupSharedScalar.hlsl | 8 ++++++++ .../test/CodeGenSPIRV/op.vector.swizzle.hlsl | 12 ++++++------ .../CodeGenSPIRV/op.vector.swizzle.size1.hlsl | 8 ++++---- 4 files changed, 31 insertions(+), 12 deletions(-) create mode 100644 tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ac526b6d87..4b4f26983b 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -8444,8 +8444,19 @@ ExprResult HLSLExternalSource::LookupVectorMemberExprForHLSL( ExprValueKind VK = positions.ContainsDuplicateElements() ? VK_RValue : (IsArrow ? VK_LValue : BaseExpr.getValueKind()); - HLSLVectorElementExpr *vectorExpr = new (m_context) HLSLVectorElementExpr( - resultType, VK, &BaseExpr, *member, MemberLoc, positions); + + Expr *E = &BaseExpr; + // Insert an lvalue-to-rvalue cast if necessary + if (BaseExpr.getValueKind() == VK_LValue && VK == VK_RValue) { + // Remove qualifiers from result type and cast target type + resultType = resultType.getUnqualifiedType(); + auto targetType = E->getType().getUnqualifiedType(); + E = ImplicitCastExpr::Create(*m_context, targetType, + CastKind::CK_LValueToRValue, E, nullptr, + VK_RValue); + } + HLSLVectorElementExpr *vectorExpr = new (m_context) + HLSLVectorElementExpr(resultType, VK, E, *member, MemberLoc, positions); return vectorExpr; } diff --git a/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl b/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl new file mode 100644 index 0000000000..22b8625f19 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -E main -T cs_6_0 %s + +groupshared int a; +[numthreads(64, 1, 1)] +void main() { + a = 123; + int4 x = (a).xxxx; +} diff --git a/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.hlsl b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.hlsl index 7b49cb2e57..8b08aed84c 100644 --- a/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.hlsl +++ b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.hlsl @@ -118,18 +118,18 @@ void main() { // Keep lhs.1 // So final selectors to write to lhs.(0, 1, 2, 3): 6, 1, 4, 5 // CHECK-NEXT: [[v22:%[0-9]+]] = OpLoad %v2float %v2f -// CHECK-NEXT: [[vs15:%[0-9]+]] = OpVectorShuffle %v3float [[v22]] [[v22]] 0 1 0 +// CHECK-NEXT: [[vs15:%[0-9]+]] = OpVectorShuffle %v2float [[v22]] [[v22]] 1 0 +// CHECK-NEXT: [[vs16:%[0-9]+]] = OpVectorShuffle %v3float [[vs15]] [[vs15]] 1 0 1 // CHECK-NEXT: [[v23:%[0-9]+]] = OpLoad %v4float %v4f2 -// CHECK-NEXT: [[vs16:%[0-9]+]] = OpVectorShuffle %v4float [[v23]] [[vs15]] 6 1 4 5 -// CHECK-NEXT: OpStore %v4f2 [[vs16]] +// CHECK-NEXT: [[vs17:%[0-9]+]] = OpVectorShuffle %v4float [[v23]] [[vs16]] 6 1 4 5 +// CHECK-NEXT: OpStore %v4f2 [[vs17]] v4f2.wzx.grb = v2f.gr.yxy; // select more than original, write to a part // CHECK-NEXT: [[v24:%[0-9]+]] = OpLoad %v4float %v4f1 // CHECK-NEXT: OpStore %v4f2 [[v24]] v4f2.wzyx.abgr.xywz.rgab = v4f1.xyzw.xyzw.rgab.rgab; // from original vector to original vector - -// CHECK-NEXT: [[v24_0:%[0-9]+]] = OpAccessChain %_ptr_Function_float %v4f1 %int_2 -// CHECK-NEXT: [[ce1:%[0-9]+]] = OpLoad %float [[v24_0]] +// CHECK-NEXT: [[v24_0:%[0-9]+]] = OpLoad %v4float %v4f1 +// CHECK-NEXT: [[ce1:%[0-9]+]] = OpCompositeExtract %float [[v24_0]] 2 // CHECK-NEXT: [[ac4:%[0-9]+]] = OpAccessChain %_ptr_Function_float %v4f2 %int_1 // CHECK-NEXT: OpStore [[ac4]] [[ce1]] v4f2.wzyx.zy.x = v4f1.xzyx.y.x; // from one element (rvalue) to one element (lvalue) diff --git a/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.size1.hlsl b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.size1.hlsl index b439fd38b4..7421e41c40 100644 --- a/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.size1.hlsl +++ b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.size1.hlsl @@ -43,11 +43,11 @@ void main(float4 input: INPUT) { // Selecting from resources // CHECK: [[fptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4float %PerFrame %int_0 %uint_5 %int_0 -// CHECK-NEXT: [[elem:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float [[fptr]] %int_3 -// CHECK-NEXT: {{%[0-9]+}} = OpLoad %float [[elem]] +// CHECK-NEXT: [[val:%[0-9]+]] = OpLoad %v4float [[fptr]] +// CHECK-NEXT: {{%[0-9]+}} = OpCompositeExtract %float [[val]] 3 v4f = input * PerFrame[5].f.www.r; // CHECK: [[fptr_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4float %PerFrame %int_0 %uint_6 %int_0 -// CHECK-NEXT: [[elem_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float [[fptr_0]] %int_2 -// CHECK-NEXT: {{%[0-9]+}} = OpLoad %float [[elem_0]] +// CHECK-NEXT: [[val_0:%[0-9]+]] = OpLoad %v4float [[fptr_0]] +// CHECK-NEXT: {{%[0-9]+}} = OpCompositeExtract %float [[val_0]] 2 sf = PerFrame[6].f.zzz.r * input.y; } From 4a80f6bbb0882d0406436d5071b55cfb392194c2 Mon Sep 17 00:00:00 2001 From: Antonio Maiorano Date: Tue, 24 Sep 2024 15:11:45 -0400 Subject: [PATCH 2/2] Add verification to the HLSL test --- tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl b/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl index 22b8625f19..e588caea70 100644 --- a/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl +++ b/tools/clang/test/CodeGenHLSL/SplatGroupSharedScalar.hlsl @@ -1,4 +1,11 @@ -// RUN: %dxc -E main -T cs_6_0 %s +// RUN: %dxc -E main -T cs_6_0 -fcgl %s | FileCheck %s + +// Validate that when swizzling requires an r-value (i.e. duplicate elements), that the result is stored to +// and loaded from a temporary. +// CHECK: store <1 x i32> %splat.splat, <1 x i32>* %tmp +// CHECK-NEXT: %1 = load <1 x i32>, <1 x i32>* %tmp +// CHECK-NEXT: %2 = shufflevector <1 x i32> %1, <1 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %x groupshared int a; [numthreads(64, 1, 1)]