Skip to content

Commit 6bce7f6

Browse files
authored
[SYCL] [AMDGPU] Ignore incorrect sub-group size (#11687)
CDNA supports only 64 wave front size, for those GPUs allow subgroup size of 64. Some GPUs support both 32 and 64, for those (and the rest) only allow 32.
1 parent ff48612 commit 6bce7f6

File tree

5 files changed

+72
-8
lines changed

5 files changed

+72
-8
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error<
34383438
def warn_attribute_argument_n_negative : Warning<
34393439
"%0 attribute parameter %1 is negative and will be ignored">,
34403440
InGroup<CudaCompat>;
3441-
def warn_reqd_sub_group_attribute_cuda_n_32
3442-
: Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
3443-
"requires sub_group size 32">,
3444-
InGroup<CudaCompat>;
34453441
def err_property_function_in_objc_container : Error<
34463442
"use of Objective-C property in function nested in Objective-C "
34473443
"container not supported, move function outside its container">;
@@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
35403536
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
35413537
" only on a function directly called from a SYCL kernel function; attribute ignored">,
35423538
InGroup<IgnoredAttributes>;
3539+
def warn_reqd_sub_group_attribute_n
3540+
: Warning<"attribute argument %0 is invalid and will be ignored; %1 "
3541+
"requires sub_group size %2">,
3542+
InGroup<IgnoredAttributes>;
35433543
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
35443544
" exception specification; attribute ignored">,
35453545
InGroup<IgnoredAttributes>;

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4020,9 +4020,25 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
40204020
<< CI << /*positive*/ 0;
40214021
return;
40224022
}
4023-
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
4024-
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
4025-
<< ArgVal.getSExtValue();
4023+
auto &TI = Context.getTargetInfo();
4024+
if (TI.getTriple().isNVPTX() && ArgVal != 32)
4025+
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
4026+
<< ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32;
4027+
if (TI.getTriple().isAMDGPU()) {
4028+
const auto HasWaveFrontSize64 =
4029+
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
4030+
const auto HasWaveFrontSize32 =
4031+
TI.getTargetOpts().FeatureMap["wavefrontsize32"];
4032+
4033+
// CDNA supports only 64 wave front size, for those GPUs allow subgroup
4034+
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
4035+
// only allow 32. Warn on incompatible sizes.
4036+
const auto SupportedWaveFrontSize =
4037+
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
4038+
if (ArgVal != SupportedWaveFrontSize)
4039+
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
4040+
<< ArgVal.getSExtValue() << TI.getTriple().getArchName()
4041+
<< SupportedWaveFrontSize;
40264042
}
40274043

40284044
// Check to see if there's a duplicate attribute with different values
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
2+
3+
// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
4+
// size is not 32.
5+
#include "sycl.hpp"
6+
7+
int main() {
8+
9+
sycl::queue Q;
10+
11+
Q.submit([&](sycl::handler &h) {
12+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}}
13+
});
14+
15+
Q.submit([&](sycl::handler &h) {
16+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
17+
});
18+
19+
Q.submit([&](sycl::handler &h) {
20+
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}}
21+
});
22+
23+
return 0;
24+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
2+
3+
// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
4+
// size is not 64.
5+
#include "sycl.hpp"
6+
7+
int main() {
8+
9+
sycl::queue Q;
10+
11+
Q.submit([&](sycl::handler &h) {
12+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
13+
});
14+
15+
Q.submit([&](sycl::handler &h) {
16+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}}
17+
});
18+
19+
Q.submit([&](sycl::handler &h) {
20+
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}}
21+
});
22+
23+
return 0;
24+
}

clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ int main() {
99
sycl::queue Q;
1010

1111
Q.submit([&](sycl::handler &h) {
12-
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
12+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}}
1313
});
1414

1515
Q.submit([&](sycl::handler &h) {

0 commit comments

Comments
 (0)