[SYCL] [AMDGPU] Ignore incorrect sub-group size (#11687)

Jakub Chlanda · web-flow · commit 6bce7f64f51a · 2023-11-21T12:09:46.000-08:00
CDNA supports only 64 wave front size, for those GPUs allow subgroup
size of 64. Some GPUs support both 32 and 64, for those (and the rest)
only allow 32.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error<
 def warn_attribute_argument_n_negative : Warning<
   "%0 attribute parameter %1 is negative and will be ignored">,
   InGroup<CudaCompat>;
-def warn_reqd_sub_group_attribute_cuda_n_32
-    : Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
-              "requires sub_group size 32">,
-      InGroup<CudaCompat>;
 def err_property_function_in_objc_container : Error<
   "use of Objective-C property in function nested in Objective-C "
   "container not supported, move function outside its container">;
@@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
 def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
   " only on a function directly called from a SYCL kernel function; attribute ignored">,
   InGroup<IgnoredAttributes>;
+def warn_reqd_sub_group_attribute_n
+    : Warning<"attribute argument %0 is invalid and will be ignored; %1 "
+              "requires sub_group size %2">,
+  InGroup<IgnoredAttributes>;
 def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
   " exception specification; attribute ignored">,
   InGroup<IgnoredAttributes>;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4020,9 +4020,25 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
           << CI << /*positive*/ 0;
       return;
     }
-    if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
-      Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
-          << ArgVal.getSExtValue();
+    auto &TI = Context.getTargetInfo();
+    if (TI.getTriple().isNVPTX() && ArgVal != 32)
+      Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
+          << ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32;
+    if (TI.getTriple().isAMDGPU()) {
+      const auto HasWaveFrontSize64 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize64"];
+      const auto HasWaveFrontSize32 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize32"];
+
+      // CDNA supports only 64 wave front size, for those GPUs allow subgroup
+      // size of 64. Some GPUs support both 32 and 64, for those (and the rest)
+      // only allow 32. Warn on incompatible sizes.
+      const auto SupportedWaveFrontSize =
+          HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
+      if (ArgVal != SupportedWaveFrontSize)
+        Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
+            << ArgVal.getSExtValue() << TI.getTriple().getArchName()
+            << SupportedWaveFrontSize;
     }
 
     // Check to see if there's a duplicate attribute with different values
diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
+// size is not 32.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}}
+  });
+
+  return 0;
+}
diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
+// size is not 64.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}}
+  });
+
+  return 0;
+}
diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp
@@ -9,7 +9,7 @@ int main() {
   sycl::queue Q;
 
   Q.submit([&](sycl::handler &h) {
-    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}}
   });
 
   Q.submit([&](sycl::handler &h) {