Skip to content

[SYCL] [AMDGPU] Ignore incorrect sub-group size #11687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error<
def warn_attribute_argument_n_negative : Warning<
"%0 attribute parameter %1 is negative and will be ignored">,
InGroup<CudaCompat>;
def warn_reqd_sub_group_attribute_cuda_n_32
: Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
"requires sub_group size 32">,
InGroup<CudaCompat>;
def err_property_function_in_objc_container : Error<
"use of Objective-C property in function nested in Objective-C "
"container not supported, move function outside its container">;
Expand Down Expand Up @@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
" only on a function directly called from a SYCL kernel function; attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_reqd_sub_group_attribute_n
: Warning<"attribute argument %0 is invalid and will be ignored; %1 "
"requires sub_group size %2">,
InGroup<IgnoredAttributes>;
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
" exception specification; attribute ignored">,
InGroup<IgnoredAttributes>;
Expand Down
22 changes: 19 additions & 3 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4020,9 +4020,25 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
<< CI << /*positive*/ 0;
return;
}
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
<< ArgVal.getSExtValue();
auto &TI = Context.getTargetInfo();
if (TI.getTriple().isNVPTX() && ArgVal != 32)
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
<< ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32;
if (TI.getTriple().isAMDGPU()) {
const auto HasWaveFrontSize64 =
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
const auto HasWaveFrontSize32 =
TI.getTargetOpts().FeatureMap["wavefrontsize32"];

// CDNA supports only 64 wave front size, for those GPUs allow subgroup
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
// only allow 32. Warn on incompatible sizes.
const auto SupportedWaveFrontSize =
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
if (ArgVal != SupportedWaveFrontSize)
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
<< ArgVal.getSExtValue() << TI.getTriple().getArchName()
<< SupportedWaveFrontSize;
}

// Check to see if there's a duplicate attribute with different values
Expand Down
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
// size is not 32.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}}
});

return 0;
}
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
// size is not 64.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}}
});

return 0;
}
2 changes: 1 addition & 1 deletion clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int main() {
sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}}
});

Q.submit([&](sycl::handler &h) {
Expand Down