Skip to content

Commit ac489a5

Browse files
committed
[SYCL] [AMDGPU] Ignore incorrect sub-group size
CDNA supports only 64 wave front size, for those GPUs set subgroup size to 64. Some GPUS support both 32 and 64, for those (and the rest) only allow 32.
1 parent 73bba79 commit ac489a5

File tree

4 files changed

+70
-1
lines changed

4 files changed

+70
-1
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3526,6 +3526,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
35263526
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
35273527
" only on a function directly called from a SYCL kernel function; attribute ignored">,
35283528
InGroup<IgnoredAttributes>;
3529+
def warn_amd_reqd_sub_group_attribute_n
3530+
: Warning<"attribute argument %0 is invalid and will be ignored; AMD "
3531+
"requires sub_group size %1">,
3532+
InGroup<IgnoredAttributes>;
35293533
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
35303534
" exception specification; attribute ignored">,
35313535
InGroup<IgnoredAttributes>;

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4007,10 +4007,27 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
40074007
<< CI << /*positive*/ 0;
40084008
return;
40094009
}
4010-
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
4010+
auto &TI = Context.getTargetInfo();
4011+
if (TI.getTriple().isNVPTX() && ArgVal != 32) {
40114012
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
40124013
<< ArgVal.getSExtValue();
40134014
}
4015+
if (TI.getTriple().isAMDGPU()) {
4016+
const auto HasWaveFrontSize64 =
4017+
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
4018+
const auto HasWaveFrontSize32 =
4019+
TI.getTargetOpts().FeatureMap["wavefrontsize32"];
4020+
4021+
// CDNA supports only 64 wave front size, for those GPUs allow subgroup
4022+
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
4023+
// only allow 32.
4024+
const auto SupportedWaveFrontSize =
4025+
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
4026+
if (ArgVal != SupportedWaveFrontSize) {
4027+
Diag(E->getExprLoc(), diag::warn_amd_reqd_sub_group_attribute_n)
4028+
<< ArgVal.getSExtValue() << SupportedWaveFrontSize;
4029+
}
4030+
}
40144031

40154032
// Check to see if there's a duplicate attribute with different values
40164033
// already applied to the declaration.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
2+
3+
// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
4+
// size exceeds 32.
5+
#include "sycl.hpp"
6+
7+
int main() {
8+
9+
sycl::queue Q;
10+
11+
Q.submit([&](sycl::handler &h) {
12+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; AMD requires sub_group size 32}}
13+
});
14+
15+
Q.submit([&](sycl::handler &h) {
16+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
17+
});
18+
19+
Q.submit([&](sycl::handler &h) {
20+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 32}}
21+
});
22+
23+
return 0;
24+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
2+
3+
// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
4+
// size exceeds 32.
5+
#include "sycl.hpp"
6+
7+
int main() {
8+
9+
sycl::queue Q;
10+
11+
Q.submit([&](sycl::handler &h) {
12+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
13+
});
14+
15+
Q.submit([&](sycl::handler &h) {
16+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; AMD requires sub_group size 64}}
17+
});
18+
19+
Q.submit([&](sycl::handler &h) {
20+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; AMD requires sub_group size 64}}
21+
});
22+
23+
return 0;
24+
}

0 commit comments

Comments
 (0)