Skip to content

Commit ee5299d

Browse files
authored
[SYCL] Warn when number of kernel args exceeds maximum available on GPU (#2361)
Emit a warning when number of resulting kernel arguments exceeds 2k - maximum available number of kernel arguments on GPU device. Emit a warning only in GPU AOT mode since other devices don't have such limitation.
1 parent c7f915e commit ee5299d

File tree

3 files changed

+131
-2
lines changed

3 files changed

+131
-2
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10997,6 +10997,11 @@ def err_sycl_restrict : Error<
1099710997
"|use a const static or global variable that is neither zero-initialized "
1099810998
"nor constant-initialized"
1099910999
"}0">;
11000+
def warn_sycl_kernel_too_many_args : Warning<
11001+
"kernel argument count (%0) exceeds supported maximum of %1 on GPU">,
11002+
InGroup<SyclStrict>;
11003+
def note_sycl_kernel_args_count : Note<"array elements and fields of a "
11004+
"class/struct may be counted separately">;
1100011005
def err_sycl_virtual_types : Error<
1100111006
"No class with a vtable can be used in a SYCL kernel or any code included in the kernel">;
1100211007
def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">;

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ enum KernelInvocationKind {
5656

5757
const static std::string InitMethodName = "__init";
5858
const static std::string FinalizeMethodName = "__finalize";
59+
constexpr unsigned GPUMaxKernelArgsNum = 2000;
5960

6061
namespace {
6162

@@ -1657,6 +1658,83 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
16571658
using SyclKernelFieldHandler::leaveStruct;
16581659
};
16591660

1661+
class SyclKernelNumArgsChecker : public SyclKernelFieldHandler {
1662+
SourceLocation KernelLoc;
1663+
unsigned NumOfParams = 0;
1664+
1665+
bool handleSpecialType(QualType FieldTy) {
1666+
const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl();
1667+
assert(RecordDecl && "The accessor/sampler must be a RecordDecl");
1668+
CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName);
1669+
assert(InitMethod && "The accessor/sampler must have the __init method");
1670+
NumOfParams += InitMethod->getNumParams();
1671+
return true;
1672+
}
1673+
1674+
public:
1675+
SyclKernelNumArgsChecker(Sema &S, SourceLocation Loc)
1676+
: SyclKernelFieldHandler(S), KernelLoc(Loc) {}
1677+
1678+
~SyclKernelNumArgsChecker() {
1679+
if (SemaRef.Context.getTargetInfo().getTriple().getSubArch() ==
1680+
llvm::Triple::SPIRSubArch_gen) {
1681+
if (NumOfParams > GPUMaxKernelArgsNum) {
1682+
SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_many_args)
1683+
<< NumOfParams << GPUMaxKernelArgsNum;
1684+
SemaRef.Diag(KernelLoc, diag::note_sycl_kernel_args_count);
1685+
}
1686+
}
1687+
}
1688+
1689+
bool handleSyclAccessorType(FieldDecl *FD, QualType FieldTy) final {
1690+
return handleSpecialType(FieldTy);
1691+
}
1692+
1693+
bool handleSyclAccessorType(const CXXRecordDecl *, const CXXBaseSpecifier &,
1694+
QualType FieldTy) final {
1695+
return handleSpecialType(FieldTy);
1696+
}
1697+
1698+
bool handleSyclSamplerType(FieldDecl *FD, QualType FieldTy) final {
1699+
return handleSpecialType(FieldTy);
1700+
}
1701+
1702+
bool handleSyclSamplerType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
1703+
QualType FieldTy) final {
1704+
return handleSpecialType(FieldTy);
1705+
}
1706+
1707+
bool handlePointerType(FieldDecl *FD, QualType FieldTy) final {
1708+
NumOfParams++;
1709+
return true;
1710+
}
1711+
1712+
bool handleScalarType(FieldDecl *FD, QualType FieldTy) final {
1713+
NumOfParams++;
1714+
return true;
1715+
}
1716+
1717+
bool handleUnionType(FieldDecl *FD, QualType FieldTy) final {
1718+
return handleScalarType(FD, FieldTy);
1719+
}
1720+
1721+
bool handleSyclHalfType(FieldDecl *FD, QualType FieldTy) final {
1722+
NumOfParams++;
1723+
return true;
1724+
}
1725+
1726+
bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
1727+
NumOfParams++;
1728+
return true;
1729+
}
1730+
bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &,
1731+
QualType FieldTy) final {
1732+
NumOfParams++;
1733+
return true;
1734+
}
1735+
using SyclKernelFieldHandler::handleSyclHalfType;
1736+
};
1737+
16601738
class SyclKernelBodyCreator : public SyclKernelFieldHandler {
16611739
SyclKernelDeclCreator &DeclCreator;
16621740
llvm::SmallVector<Stmt *, 16> BodyStmts;
@@ -2351,6 +2429,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
23512429

23522430
SyclKernelFieldChecker FieldChecker(*this);
23532431
SyclKernelUnionChecker UnionChecker(*this);
2432+
SyclKernelNumArgsChecker NumArgsChecker(*this, Args[0]->getExprLoc());
23542433
// check that calling kernel conforms to spec
23552434
QualType KernelParamTy = KernelFunc->getParamDecl(0)->getType();
23562435
if (KernelParamTy->isReferenceType()) {
@@ -2365,8 +2444,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
23652444

23662445
KernelObjVisitor Visitor{*this};
23672446
DiagnosingSYCLKernel = true;
2368-
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker);
2369-
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker);
2447+
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker,
2448+
NumArgsChecker);
2449+
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker,
2450+
NumArgsChecker);
23702451
DiagnosingSYCLKernel = false;
23712452
if (!FieldChecker.isValid() || !UnionChecker.isValid())
23722453
KernelFunc->setInvalidDecl();
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -DGPU -fsycl-is-device -fsyntax-only -verify %s
2+
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64 -fsycl-is-device -fsyntax-only -verify %s
3+
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -Wno-sycl-strict -fsycl-is-device -fsyntax-only -verify %s
4+
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -Werror=sycl-strict -DERROR -fsycl-is-device -fsyntax-only -verify %s
5+
6+
#include <sycl.hpp>
7+
8+
template <typename Name, typename F>
9+
__attribute__((sycl_kernel)) void kernel(F KernelFunc) {
10+
KernelFunc();
11+
}
12+
13+
template <typename Name, typename F>
14+
void parallel_for(F KernelFunc) {
15+
#ifdef GPU
16+
// expected-warning@+8 {{kernel argument count (2001) exceeds supported maximum of 2000 on GPU}}
17+
// expected-note@+7 {{array elements and fields of a class/struct may be counted separately}}
18+
#elif ERROR
19+
// expected-error@+5 {{kernel argument count (2001) exceeds supported maximum of 2000 on GPU}}
20+
// expected-note@+4 {{array elements and fields of a class/struct may be counted separately}}
21+
#else
22+
// expected-no-diagnostics
23+
#endif
24+
kernel<Name>(KernelFunc);
25+
}
26+
27+
using Accessor =
28+
cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>;
29+
30+
void use() {
31+
struct S {
32+
int A;
33+
int B;
34+
Accessor AAcc;
35+
Accessor BAcc;
36+
int Array[1991];
37+
} Args;
38+
auto L = [=]() { (void)Args; };
39+
#if defined(GPU) || defined(ERROR)
40+
// expected-note@+2 {{in instantiation of function template specialization 'parallel_for<Foo}}
41+
#endif
42+
parallel_for<class Foo>(L);
43+
}

0 commit comments

Comments
 (0)