Skip to content

Commit b83a1a8

Browse files
[SYCL] Enable host optimization of work-item free functions (#2967)
The SYCL free functions ( this_item, this_id, etc) are expensive to support on host devices. They cause performance delays because every iteration through one of the parallel_for routines the various indexing values have to be updated in case the users code might call this_item or this_id (or the others). But with the new callsThisItem method added to the Kernel Information, the host device can avoid paying the performance penalty if the users code doesn't actually call this_item. We can detect at compile time whether or not any of the this_xxx free functions are used by the users code, and if not, don't bother storing the indexing data in each loop iteration. In this PR we add further expand the Kernel Information to support a callsAnyThisFreeFunction method, and we use it to avoid the sundry store_item etc. calls on the host. Signed-off-by: Chris Perkins <[email protected]>
1 parent db62fe7 commit b83a1a8

File tree

10 files changed

+155
-41
lines changed

10 files changed

+155
-41
lines changed

clang/include/clang/Sema/Sema.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,12 @@ class SYCLIntegrationHeader {
346346
/// Registers a specialization constant to emit info for it into the header.
347347
void addSpecConstant(StringRef IDName, QualType IDType);
348348

349-
/// Notes that this_item is called within the kernel.
349+
/// Note which free functions (this_id, this_item, etc) are called within the
350+
/// kernel
351+
void setCallsThisId(bool B);
350352
void setCallsThisItem(bool B);
353+
void setCallsThisNDItem(bool B);
354+
void setCallsThisGroup(bool B);
351355

352356
private:
353357
// Kernel actual parameter descriptor.
@@ -366,6 +370,15 @@ class SYCLIntegrationHeader {
366370
KernelParamDesc() = default;
367371
};
368372

373+
// there are four free functions the kernel may call (this_id, this_item,
374+
// this_nd_item, this_group)
375+
struct KernelCallsSYCLFreeFunction {
376+
bool CallsThisId;
377+
bool CallsThisItem;
378+
bool CallsThisNDItem;
379+
bool CallsThisGroup;
380+
};
381+
369382
// Kernel invocation descriptor
370383
struct KernelDesc {
371384
/// Kernel name.
@@ -385,8 +398,9 @@ class SYCLIntegrationHeader {
385398
/// Descriptor of kernel actual parameters.
386399
SmallVector<KernelParamDesc, 8> Params;
387400

388-
// Whether kernel calls this_item()
389-
bool CallsThisItem;
401+
// Whether kernel calls any of the SYCL free functions (this_item(),
402+
// this_id(), etc)
403+
KernelCallsSYCLFreeFunction FreeFunctionCalls;
390404

391405
KernelDesc() = default;
392406
};

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2729,11 +2729,24 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
27292729
if (!Visited.insert(FD).second)
27302730
continue; // We've already seen this Decl
27312731

2732-
// Check whether this call is to sycl::this_item().
2732+
// Check whether this call is to free functions (sycl::this_item(),
2733+
// this_id, etc.).
2734+
if (Util::isSyclFunction(FD, "this_id")) {
2735+
Header.setCallsThisId(true);
2736+
return;
2737+
}
27332738
if (Util::isSyclFunction(FD, "this_item")) {
27342739
Header.setCallsThisItem(true);
27352740
return;
27362741
}
2742+
if (Util::isSyclFunction(FD, "this_nd_item")) {
2743+
Header.setCallsThisNDItem(true);
2744+
return;
2745+
}
2746+
if (Util::isSyclFunction(FD, "this_group")) {
2747+
Header.setCallsThisGroup(true);
2748+
return;
2749+
}
27372750

27382751
CallGraphNode *N = SYCLCG.getNode(FD);
27392752
if (!N)
@@ -3938,7 +3951,14 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
39383951
<< "; }\n";
39393952
O << " __SYCL_DLL_LOCAL\n";
39403953
O << " static constexpr bool callsThisItem() { return ";
3941-
O << K.CallsThisItem << "; }\n";
3954+
O << K.FreeFunctionCalls.CallsThisItem << "; }\n";
3955+
O << " __SYCL_DLL_LOCAL\n";
3956+
O << " static constexpr bool callsAnyThisFreeFunction() { return ";
3957+
O << (K.FreeFunctionCalls.CallsThisId ||
3958+
K.FreeFunctionCalls.CallsThisItem ||
3959+
K.FreeFunctionCalls.CallsThisNDItem ||
3960+
K.FreeFunctionCalls.CallsThisGroup)
3961+
<< "; }\n";
39423962
O << "};\n";
39433963
CurStart += N;
39443964
}
@@ -3997,10 +4017,28 @@ void SYCLIntegrationHeader::addSpecConstant(StringRef IDName, QualType IDType) {
39974017
SpecConsts.emplace_back(std::make_pair(IDType, IDName.str()));
39984018
}
39994019

4020+
void SYCLIntegrationHeader::setCallsThisId(bool B) {
4021+
KernelDesc *K = getCurKernelDesc();
4022+
assert(K && "no kernel");
4023+
K->FreeFunctionCalls.CallsThisId = B;
4024+
}
4025+
40004026
void SYCLIntegrationHeader::setCallsThisItem(bool B) {
40014027
KernelDesc *K = getCurKernelDesc();
4002-
assert(K && "no kernels");
4003-
K->CallsThisItem = B;
4028+
assert(K && "no kernel");
4029+
K->FreeFunctionCalls.CallsThisItem = B;
4030+
}
4031+
4032+
void SYCLIntegrationHeader::setCallsThisNDItem(bool B) {
4033+
KernelDesc *K = getCurKernelDesc();
4034+
assert(K && "no kernel");
4035+
K->FreeFunctionCalls.CallsThisNDItem = B;
4036+
}
4037+
4038+
void SYCLIntegrationHeader::setCallsThisGroup(bool B) {
4039+
KernelDesc *K = getCurKernelDesc();
4040+
assert(K && "no kernel");
4041+
K->FreeFunctionCalls.CallsThisGroup = B;
40044042
}
40054043

40064044
SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag,

clang/test/CodeGenSYCL/Inputs/sycl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ template <int dim> struct item {
130130
template <int Dims> item<Dims>
131131
this_item() { return item<Dims>{}; }
132132

133+
template <int Dims> id<Dims>
134+
this_id() { return id<Dims>{}; }
135+
133136
template <int dim>
134137
struct range {
135138
template <typename... T>

clang/test/CodeGenSYCL/kernel-by-reference.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@ int simple_add(int i) {
1515
int main() {
1616
queue q;
1717
#if defined(SYCL2020)
18-
// expected-warning@Inputs/sycl.hpp:298 {{Passing kernel functions by value is deprecated in SYCL 2020}}
18+
// expected-warning@Inputs/sycl.hpp:301 {{Passing kernel functions by value is deprecated in SYCL 2020}}
1919
// expected-note@+3 {{in instantiation of function template specialization}}
2020
#endif
2121
q.submit([&](handler &h) {
2222
h.single_task_2017<class sycl2017>([]() { simple_add(10); });
2323
});
2424

2525
#if defined(SYCL2017)
26-
// expected-warning@Inputs/sycl.hpp:293 {{Passing of kernel functions by reference is a SYCL 2020 extension}}
26+
// expected-warning@Inputs/sycl.hpp:296 {{Passing of kernel functions by reference is a SYCL 2020 extension}}
2727
// expected-note@+3 {{in instantiation of function template specialization}}
2828
#endif
2929
q.submit([&](handler &h) {

clang/test/CodeGenSYCL/parallel_for_this_item.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3GNU",
1414
// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3EMU",
1515
// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3OWL",
16-
// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3RAT"
16+
// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3RAT",
17+
// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3FOX"
1718
// CHECK-NEXT: };
1819

1920
// CHECK:template <> struct KernelInfo<class GNU> {
@@ -29,6 +30,8 @@
2930
// CHECK-NEXT: static constexpr bool isESIMD() { return 0; }
3031
// CHECK-NEXT: __SYCL_DLL_LOCAL
3132
// CHECK-NEXT: static constexpr bool callsThisItem() { return 0; }
33+
// CHECK-NEXT: __SYCL_DLL_LOCAL
34+
// CHECK-NEXT: static constexpr bool callsAnyThisFreeFunction() { return 0; }
3235
// CHECK-NEXT:};
3336
// CHECK-NEXT:template <> struct KernelInfo<class EMU> {
3437
// CHECK-NEXT: __SYCL_DLL_LOCAL
@@ -43,6 +46,8 @@
4346
// CHECK-NEXT: static constexpr bool isESIMD() { return 0; }
4447
// CHECK-NEXT: __SYCL_DLL_LOCAL
4548
// CHECK-NEXT: static constexpr bool callsThisItem() { return 1; }
49+
// CHECK-NEXT: __SYCL_DLL_LOCAL
50+
// CHECK-NEXT: static constexpr bool callsAnyThisFreeFunction() { return 1; }
4651
// CHECK-NEXT:};
4752
// CHECK-NEXT:template <> struct KernelInfo<class OWL> {
4853
// CHECK-NEXT: __SYCL_DLL_LOCAL
@@ -57,6 +62,8 @@
5762
// CHECK-NEXT: static constexpr bool isESIMD() { return 0; }
5863
// CHECK-NEXT: __SYCL_DLL_LOCAL
5964
// CHECK-NEXT: static constexpr bool callsThisItem() { return 0; }
65+
// CHECK-NEXT: __SYCL_DLL_LOCAL
66+
// CHECK-NEXT: static constexpr bool callsAnyThisFreeFunction() { return 0; }
6067
// CHECK-NEXT:};
6168
// CHECK-NEXT:template <> struct KernelInfo<class RAT> {
6269
// CHECK-NEXT: __SYCL_DLL_LOCAL
@@ -71,6 +78,24 @@
7178
// CHECK-NEXT: static constexpr bool isESIMD() { return 0; }
7279
// CHECK-NEXT: __SYCL_DLL_LOCAL
7380
// CHECK-NEXT: static constexpr bool callsThisItem() { return 1; }
81+
// CHECK-NEXT: __SYCL_DLL_LOCAL
82+
// CHECK-NEXT: static constexpr bool callsAnyThisFreeFunction() { return 1; }
83+
// CHECK-NEXT:};
84+
// CHECK-NEXT:template <> struct KernelInfo<class FOX> {
85+
// CHECK-NEXT: __SYCL_DLL_LOCAL
86+
// CHECK-NEXT: static constexpr const char* getName() { return "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3FOX"; }
87+
// CHECK-NEXT: __SYCL_DLL_LOCAL
88+
// CHECK-NEXT: static constexpr unsigned getNumParams() { return 0; }
89+
// CHECK-NEXT: __SYCL_DLL_LOCAL
90+
// CHECK-NEXT: static constexpr const kernel_param_desc_t& getParamDesc(unsigned i) {
91+
// CHECK-NEXT: return kernel_signatures[i+0];
92+
// CHECK-NEXT: }
93+
// CHECK-NEXT: __SYCL_DLL_LOCAL
94+
// CHECK-NEXT: static constexpr bool isESIMD() { return 0; }
95+
// CHECK-NEXT: __SYCL_DLL_LOCAL
96+
// CHECK-NEXT: static constexpr bool callsThisItem() { return 0; }
97+
// CHECK-NEXT: __SYCL_DLL_LOCAL
98+
// CHECK-NEXT: static constexpr bool callsAnyThisFreeFunction() { return 1; }
7499
// CHECK-NEXT:};
75100

76101
#include "sycl.hpp"
@@ -108,6 +133,10 @@ int main() {
108133

109134
// This kernel calls sycl::this_item
110135
cgh.parallel_for<class RAT>(range<1>(1), [=](id<1> I) { f(); });
136+
137+
// This kernel does not call sycl::this_item, but does call this_id
138+
cgh.parallel_for<class FOX>(range<1>(1),
139+
[=](id<1> I) { this_id<1>(); });
111140
});
112141

113142
return 0;

sycl/include/CL/sycl/ONEAPI/reduction.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <CL/sycl/ONEAPI/group_algorithm.hpp>
1313
#include <CL/sycl/accessor.hpp>
1414
#include <CL/sycl/handler.hpp>
15+
#include <CL/sycl/kernel.hpp>
1516

1617
__SYCL_INLINE_NAMESPACE(cl) {
1718
namespace sycl {

sycl/include/CL/sycl/detail/cg_types.hpp

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class HostTask {
158158
};
159159

160160
// Class which stores specific lambda object.
161-
template <class KernelType, class KernelArgType, int Dims>
161+
template <class KernelType, class KernelArgType, int Dims, typename KernelName>
162162
class HostKernel : public HostKernelBase {
163163
using IDBuilder = sycl::detail::Builder;
164164
KernelType MKernel;
@@ -203,6 +203,9 @@ class HostKernel : public HostKernelBase {
203203
template <class ArgT = KernelArgType>
204204
typename detail::enable_if_t<std::is_same<ArgT, sycl::id<Dims>>::value>
205205
runOnHost(const NDRDescT &NDRDesc) {
206+
using KI = detail::KernelInfo<KernelName>;
207+
constexpr bool StoreLocation = KI::callsAnyThisFreeFunction();
208+
206209
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
207210
sycl::id<Dims> Offset;
208211
for (int I = 0; I < Dims; ++I) {
@@ -213,8 +216,11 @@ class HostKernel : public HostKernelBase {
213216
detail::NDLoop<Dims>::iterate(Range, [&](const sycl::id<Dims> &ID) {
214217
sycl::item<Dims, /*Offset=*/true> Item =
215218
IDBuilder::createItem<Dims, true>(Range, ID, Offset);
216-
store_id(&ID);
217-
store_item(&Item);
219+
220+
if (StoreLocation) {
221+
store_id(&ID);
222+
store_item(&Item);
223+
}
218224
MKernel(ID);
219225
});
220226
}
@@ -223,6 +229,9 @@ class HostKernel : public HostKernelBase {
223229
typename detail::enable_if_t<
224230
std::is_same<ArgT, item<Dims, /*Offset=*/false>>::value>
225231
runOnHost(const NDRDescT &NDRDesc) {
232+
using KI = detail::KernelInfo<KernelName>;
233+
constexpr bool StoreLocation = KI::callsAnyThisFreeFunction();
234+
226235
sycl::id<Dims> ID;
227236
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
228237
for (int I = 0; I < Dims; ++I)
@@ -232,8 +241,11 @@ class HostKernel : public HostKernelBase {
232241
sycl::item<Dims, /*Offset=*/false> Item =
233242
IDBuilder::createItem<Dims, false>(Range, ID);
234243
sycl::item<Dims, /*Offset=*/true> ItemWithOffset = Item;
235-
store_id(&ID);
236-
store_item(&ItemWithOffset);
244+
245+
if (StoreLocation) {
246+
store_id(&ID);
247+
store_item(&ItemWithOffset);
248+
}
237249
MKernel(Item);
238250
});
239251
}
@@ -242,6 +254,9 @@ class HostKernel : public HostKernelBase {
242254
typename detail::enable_if_t<
243255
std::is_same<ArgT, item<Dims, /*Offset=*/true>>::value>
244256
runOnHost(const NDRDescT &NDRDesc) {
257+
using KI = detail::KernelInfo<KernelName>;
258+
constexpr bool StoreLocation = KI::callsAnyThisFreeFunction();
259+
245260
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
246261
sycl::id<Dims> Offset;
247262
for (int I = 0; I < Dims; ++I) {
@@ -253,15 +268,21 @@ class HostKernel : public HostKernelBase {
253268
sycl::id<Dims> OffsetID = ID + Offset;
254269
sycl::item<Dims, /*Offset=*/true> Item =
255270
IDBuilder::createItem<Dims, true>(Range, OffsetID, Offset);
256-
store_id(&OffsetID);
257-
store_item(&Item);
271+
272+
if (StoreLocation) {
273+
store_id(&OffsetID);
274+
store_item(&Item);
275+
}
258276
MKernel(Item);
259277
});
260278
}
261279

262280
template <class ArgT = KernelArgType>
263281
typename detail::enable_if_t<std::is_same<ArgT, nd_item<Dims>>::value>
264282
runOnHost(const NDRDescT &NDRDesc) {
283+
using KI = detail::KernelInfo<KernelName>;
284+
constexpr bool StoreLocation = KI::callsAnyThisFreeFunction();
285+
265286
sycl::range<Dims> GroupSize(InitializedVal<Dims, range>::template get<0>());
266287
for (int I = 0; I < Dims; ++I) {
267288
if (NDRDesc.LocalSize[I] == 0 ||
@@ -294,11 +315,14 @@ class HostKernel : public HostKernelBase {
294315
IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
295316
const sycl::nd_item<Dims> NDItem =
296317
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
297-
store_id(&GlobalID);
298-
store_item(&GlobalItem);
299-
store_nd_item(&NDItem);
300-
auto g = NDItem.get_group();
301-
store_group(&g);
318+
319+
if (StoreLocation) {
320+
store_id(&GlobalID);
321+
store_item(&GlobalItem);
322+
store_nd_item(&NDItem);
323+
auto g = NDItem.get_group();
324+
store_group(&g);
325+
}
302326
MKernel(NDItem);
303327
});
304328
});

sycl/include/CL/sycl/detail/kernel_desc.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ template <class KernelNameType> struct KernelInfo {
5858
static constexpr const char *getName() { return ""; }
5959
static constexpr bool isESIMD() { return 0; }
6060
static constexpr bool callsThisItem() { return false; }
61+
static constexpr bool callsAnyThisFreeFunction() { return false; }
6162
};
6263
#else
6364
template <char...> struct KernelInfoData {
@@ -69,6 +70,7 @@ template <char...> struct KernelInfoData {
6970
static constexpr const char *getName() { return ""; }
7071
static constexpr bool isESIMD() { return 0; }
7172
static constexpr bool callsThisItem() { return false; }
73+
static constexpr bool callsAnyThisFreeFunction() { return false; }
7274
};
7375

7476
// C++14 like index_sequence and make_index_sequence

0 commit comments

Comments
 (0)