Skip to content

[SYCL] [AMDGPU] Ignore incorrect sub-group size #11687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3526,6 +3526,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
" only on a function directly called from a SYCL kernel function; attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_amd_reqd_sub_group_attribute_n
: Warning<"attribute argument %0 is invalid and will be ignored; AMD "
"requires sub_group size %1">,
InGroup<IgnoredAttributes>;
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
" exception specification; attribute ignored">,
InGroup<IgnoredAttributes>;
Expand Down
19 changes: 18 additions & 1 deletion clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4007,10 +4007,27 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
<< CI << /*positive*/ 0;
return;
}
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
auto &TI = Context.getTargetInfo();
if (TI.getTriple().isNVPTX() && ArgVal != 32) {
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
<< ArgVal.getSExtValue();
}
if (TI.getTriple().isAMDGPU()) {
const auto HasWaveFrontSize64 =
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
const auto HasWaveFrontSize32 =
TI.getTargetOpts().FeatureMap["wavefrontsize32"];

// CDNA supports only 64 wave front size, for those GPUs allow subgroup
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
// only allow 32.
const auto SupportedWaveFrontSize =
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
if (ArgVal != SupportedWaveFrontSize) {
Diag(E->getExprLoc(), diag::warn_amd_reqd_sub_group_attribute_n)
<< ArgVal.getSExtValue() << SupportedWaveFrontSize;
}
}

// Check to see if there's a duplicate attribute with different values
// already applied to the declaration.
Expand Down
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
// size is not 32.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; AMD requires sub_group size 32}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 32}}
});

return 0;
}
24 changes: 24 additions & 0 deletions clang/test/SemaSYCL/reqd-sub-group-size-amd_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s

// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
// size is not 64.
#include "sycl.hpp"

int main() {

sycl::queue Q;

Q.submit([&](sycl::handler &h) {
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; AMD requires sub_group size 64}}
});

Q.submit([&](sycl::handler &h) {
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 64}}
});

return 0;
}