diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9f52b277a4529..1c315d59b5564 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error< def warn_attribute_argument_n_negative : Warning< "%0 attribute parameter %1 is negative and will be ignored">, InGroup; -def warn_reqd_sub_group_attribute_cuda_n_32 - : Warning<"attribute argument %0 is invalid and will be ignored; CUDA " - "requires sub_group size 32">, - InGroup; def err_property_function_in_objc_container : Error< "use of Objective-C property in function nested in Objective-C " "container not supported, move function outside its container">; @@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning< def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed" " only on a function directly called from a SYCL kernel function; attribute ignored">, InGroup; +def warn_reqd_sub_group_attribute_n + : Warning<"attribute argument %0 is invalid and will be ignored; %1 " + "requires sub_group size %2">, + InGroup; def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with" " exception specification; attribute ignored">, InGroup; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index bc5f749b1eb93..0d587445f729d 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4020,9 +4020,25 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI, << CI << /*positive*/ 0; return; } - if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) { - Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32) - << ArgVal.getSExtValue(); + auto &TI = Context.getTargetInfo(); + if (TI.getTriple().isNVPTX() && ArgVal != 32) + Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n) + << ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32; + if (TI.getTriple().isAMDGPU()) { + const auto HasWaveFrontSize64 = + TI.getTargetOpts().FeatureMap["wavefrontsize64"]; + const auto HasWaveFrontSize32 = + TI.getTargetOpts().FeatureMap["wavefrontsize32"]; + + // CDNA supports only 64 wave front size, for those GPUs allow subgroup + // size of 64. Some GPUs support both 32 and 64, for those (and the rest) + // only allow 32. Warn on incompatible sizes. + const auto SupportedWaveFrontSize = + HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32; + if (ArgVal != SupportedWaveFrontSize) + Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n) + << ArgVal.getSExtValue() << TI.getTriple().getArchName() + << SupportedWaveFrontSize; } // Check to see if there's a duplicate attribute with different values diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp new file mode 100644 index 0000000000000..c97d428730dc8 --- /dev/null +++ b/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s + +// Sub-group size is optimized for 32, warn (and ignore the attribute) if the +// size is not 32. +#include "sycl.hpp" + +int main() { + + sycl::queue Q; + + Q.submit([&](sycl::handler &h) { + h.single_task([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}} + }); + + Q.submit([&](sycl::handler &h) { + h.single_task([=] [[sycl::reqd_sub_group_size(32)]] {}); + }); + + Q.submit([&](sycl::handler &h) { + h.single_task([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}} + }); + + return 0; +} diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp new file mode 100644 index 0000000000000..e5694c543a77d --- /dev/null +++ b/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s + +// Sub-group size is optimized for 64, warn (and ignore the attribute) if the +// size is not 64. +#include "sycl.hpp" + +int main() { + + sycl::queue Q; + + Q.submit([&](sycl::handler &h) { + h.single_task([=] [[sycl::reqd_sub_group_size(64)]] {}); + }); + + Q.submit([&](sycl::handler &h) { + h.single_task([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}} + }); + + Q.submit([&](sycl::handler &h) { + h.single_task([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}} + }); + + return 0; +} diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp index 2d9087ddee31d..c7800186a6786 100644 --- a/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp +++ b/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp @@ -9,7 +9,7 @@ int main() { sycl::queue Q; Q.submit([&](sycl::handler &h) { - h.single_task([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}} + h.single_task([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}} }); Q.submit([&](sycl::handler &h) {