intel · againull · Nov 21, 2023 · Oct 27, 2023 · Nov 1, 2023 · Nov 6, 2023
@@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error<
 def warn_attribute_argument_n_negative : Warning<
   "%0 attribute parameter %1 is negative and will be ignored">,
   InGroup<CudaCompat>;
-def warn_reqd_sub_group_attribute_cuda_n_32
-    : Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
-              "requires sub_group size 32">,
-      InGroup<CudaCompat>;
 def err_property_function_in_objc_container : Error<
   "use of Objective-C property in function nested in Objective-C "
   "container not supported, move function outside its container">;
@@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
 def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
   " only on a function directly called from a SYCL kernel function; attribute ignored">,
   InGroup<IgnoredAttributes>;
+def warn_reqd_sub_group_attribute_n
+    : Warning<"attribute argument %0 is invalid and will be ignored; %1 "
+              "requires sub_group size %2">,
+  InGroup<IgnoredAttributes>;
 def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
   " exception specification; attribute ignored">,
   InGroup<IgnoredAttributes>;

@@ -4020,9 +4020,25 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
           << CI << /*positive*/ 0;
       return;
     }
-    if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
-      Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
-          << ArgVal.getSExtValue();
+    auto &TI = Context.getTargetInfo();
+    if (TI.getTriple().isNVPTX() && ArgVal != 32)
+      Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
+          << ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32;
+    if (TI.getTriple().isAMDGPU()) {
+      const auto HasWaveFrontSize64 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize64"];
+      const auto HasWaveFrontSize32 =
+          TI.getTargetOpts().FeatureMap["wavefrontsize32"];
+
+      // CDNA supports only 64 wave front size, for those GPUs allow subgroup
+      // size of 64. Some GPUs support both 32 and 64, for those (and the rest)
+      // only allow 32. Warn on incompatible sizes.
+      const auto SupportedWaveFrontSize =
+          HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
+      if (ArgVal != SupportedWaveFrontSize)
+        Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
+            << ArgVal.getSExtValue() << TI.getTriple().getArchName()
+            << SupportedWaveFrontSize;
     }
 
     // Check to see if there's a duplicate attribute with different values

@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
+// size is not 32.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}}
+  });
+
+  return 0;
+}
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
+
+// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
+// size is not 64.
+#include "sycl.hpp"
+
+int main() {
+
+  sycl::queue Q;
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}}
+  });
+
+  Q.submit([&](sycl::handler &h) {
+    h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}}
+  });
+
+  return 0;
+}
@@ -9,7 +9,7 @@ int main() {
   sycl::queue Q;
 
   Q.submit([&](sycl::handler &h) {
-    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
+    h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}}
   });
 
   Q.submit([&](sycl::handler &h) {