Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3526,6 +3526,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
" only on a function directly called from a SYCL kernel function; attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_amd_reqd_sub_group_attribute_n
: Warning<"attribute argument %0 is invalid and will be ignored; AMD "
"requires sub_group size %1">,
InGroup<IgnoredAttributes>;
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
" exception specification; attribute ignored">,
InGroup<IgnoredAttributes>;
Expand Down
19 changes: 18 additions & 1 deletion clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4007,10 +4007,27 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
<< CI << /*positive*/ 0;
return;
}
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
auto &TI = Context.getTargetInfo();
if (TI.getTriple().isNVPTX() && ArgVal != 32) {
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
<< ArgVal.getSExtValue();
}
if (TI.getTriple().isAMDGPU()) {
const auto HasWaveFrontSize64 =
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
const auto HasWaveFrontSize32 =
TI.getTargetOpts().FeatureMap["wavefrontsize32"];

// CDNA supports only 64 wave front size, for those GPUs allow subgroup
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
// only allow 32.
const auto SupportedWaveFrontSize =
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
if (ArgVal != SupportedWaveFrontSize) {
Diag(E->getExprLoc(), diag::warn_amd_reqd_sub_group_attribute_n)
<< ArgVal.getSExtValue() << SupportedWaveFrontSize;
}
}

// Check to see if there's a duplicate attribute with different values
// already applied to the declaration.
Expand Down
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
// size is not 32.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; AMD requires sub_group size 32}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 32}}
});

return 0;
}
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
// size is not 64.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; AMD requires sub_group size 64}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 64}}
});

return 0;
}