Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error<
def warn_attribute_argument_n_negative : Warning<
"%0 attribute parameter %1 is negative and will be ignored">,
InGroup<CudaCompat>;
def warn_reqd_sub_group_attribute_cuda_n_32
: Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
"requires sub_group size 32">,
InGroup<CudaCompat>;
def err_property_function_in_objc_container : Error<
"use of Objective-C property in function nested in Objective-C "
"container not supported, move function outside its container">;
Expand Down Expand Up @@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
" only on a function directly called from a SYCL kernel function; attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_reqd_sub_group_attribute_n
: Warning<"attribute argument %0 is invalid and will be ignored; %1 "
"requires sub_group size %2">,
InGroup<IgnoredAttributes>;
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
" exception specification; attribute ignored">,
InGroup<IgnoredAttributes>;
Expand Down
26 changes: 23 additions & 3 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4020,9 +4020,29 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
<< CI << /*positive*/ 0;
return;
}
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
<< ArgVal.getSExtValue();
auto &TI = Context.getTargetInfo();
if (TI.getTriple().isNVPTX() && ArgVal != 32) {
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
<< ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32;
return;
}
if (TI.getTriple().isAMDGPU()) {
const auto HasWaveFrontSize64 =
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
const auto HasWaveFrontSize32 =
TI.getTargetOpts().FeatureMap["wavefrontsize32"];

// CDNA supports only 64 wave front size, for those GPUs allow subgroup
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
// only allow 32.
const auto SupportedWaveFrontSize =
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
if (ArgVal != SupportedWaveFrontSize) {
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
<< ArgVal.getSExtValue() << TI.getTriple().getArchName()
<< SupportedWaveFrontSize;
return;
}
}

// Check to see if there's a duplicate attribute with different values
Expand Down
52 changes: 52 additions & 0 deletions clang/test/CodeGenSYCL/reqd-sub-group-size_ignored_values.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_AMD_32 %s

// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple amdgcn-amd-amdhsa -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_AMD_64 %s

// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple nvptx-unknown-unknown -target-cpu sm_90 -S -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_CUDA_32 %s

// Check that incorrect values specified for reqd_sub_group_size are ignored.
// CDNA supports only 64 wave front size, for those GPUs allow subgroup size of
// 64. Some GPUs support both 32 and 64, for those (and the rest) only allow
// 32. For CUDA only allow 32.

#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class Kernel_1>([=] [[sycl::reqd_sub_group_size(64)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class Kernel_2>([=] [[sycl::reqd_sub_group_size(32)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class Kernel_3>([=] [[sycl::reqd_sub_group_size(8)]] {});
});

return 0;
}

// CHECK_AMD_32: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_1() #0 {{.*}}
// CHECK_AMD_32-NOT: intel_reqd_sub_group_size
// CHECK_AMD_32: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_2() #0 {{.*}} !intel_reqd_sub_group_size ![[IRSGS_32:[0-9]+]]
// CHECK_AMD_32: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_3() #0 {{.*}}
// CHECK_AMD_32-NOT: intel_reqd_sub_group_size
// CHECK_AMD_32: ![[IRSGS_32]] = !{i32 32}

// CHECK_AMD_64: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_1() #0 {{.*}} !intel_reqd_sub_group_size ![[IRSGS_64:[0-9]+]]
// CHECK_AMD_64: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_2() #0 {{.*}}
// CHECK_AMD_64-NOT: intel_reqd_sub_group_size
// CHECK_AMD_64: define {{.*}}amdgpu_kernel void @{{.*}}Kernel_3() #0 {{.*}}
// CHECK_AMD_64-NOT: intel_reqd_sub_group_size
// CHECK_AMD_64: ![[IRSGS_64]] = !{i32 64}

// CHECK_CUDA_32: define {{.*}} void @{{.*}}Kernel_1() #0 {{.*}}
// CHECK_CUDA_32-NOT: intel_reqd_sub_group_size
// CHECK_CUDA_32: define {{.*}} void @{{.*}}Kernel_2() #0 {{.*}} !intel_reqd_sub_group_size ![[IRSGS_32:[0-9]+]]
// CHECK_CUDA_32: define {{.*}} void @{{.*}}Kernel_3() #0 {{.*}}
// CHECK_CUDA_32-NOT: intel_reqd_sub_group_size
// CHECK_CUDA_32: ![[IRSGS_32]] = !{i32 32}
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
// size is not 32.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}}
});

return 0;
}
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
// size is not 64.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}}
});

return 0;
}
2 changes: 1 addition & 1 deletion clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int main() {
sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}}
});

Q.submit([&](sycl::handler &h) {
Expand Down