Skip to content

Commit a08674e

Browse files
authored
[SYCL] Fix not found kernel due to empty kernel name when using set_arg(s) (#2181)
Using set_arg(s) function in your code with the cacheable kernel leads to the runtime throws an exception "No kernel named was found -46 (CL_INVALID_KERNEL_NAME)" when the kernel is in process of submitting because the kernel cannot be found in the cache. This patch fixes the issue. Signed-off-by: Alexander Flegontov <[email protected]>
1 parent 1070d6c commit a08674e

File tree

2 files changed

+247
-9
lines changed

2 files changed

+247
-9
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ class __SYCL_EXPORT handler {
726726
MNDRDesc.set(std::move(NumWorkItems));
727727
MCGType = detail::CG::KERNEL;
728728
extractArgsAndReqs();
729+
MKernelName = getKernelName();
729730
}
730731

731732
#ifdef __SYCL_DEVICE_ONLY__
@@ -1214,6 +1215,7 @@ class __SYCL_EXPORT handler {
12141215
MKernel = detail::getSyclObjImpl(std::move(Kernel));
12151216
MCGType = detail::CG::KERNEL;
12161217
extractArgsAndReqs();
1218+
MKernelName = getKernelName();
12171219
}
12181220

12191221
void parallel_for(range<1> NumWorkItems, kernel Kernel) {
@@ -1247,6 +1249,7 @@ class __SYCL_EXPORT handler {
12471249
MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
12481250
MCGType = detail::CG::KERNEL;
12491251
extractArgsAndReqs();
1252+
MKernelName = getKernelName();
12501253
}
12511254

12521255
/// Defines and invokes a SYCL kernel function for the specified range and
@@ -1267,6 +1270,7 @@ class __SYCL_EXPORT handler {
12671270
MNDRDesc.set(std::move(NDRange));
12681271
MCGType = detail::CG::KERNEL;
12691272
extractArgsAndReqs();
1273+
MKernelName = getKernelName();
12701274
}
12711275

12721276
/// Defines and invokes a SYCL kernel function.
@@ -1289,9 +1293,10 @@ class __SYCL_EXPORT handler {
12891293
MNDRDesc.set(range<1>{1});
12901294
MKernel = detail::getSyclObjImpl(std::move(Kernel));
12911295
MCGType = detail::CG::KERNEL;
1292-
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
1296+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
12931297
extractArgsAndReqs();
1294-
else
1298+
MKernelName = getKernelName();
1299+
} else
12951300
StoreLambda<NameT, KernelType, /*Dims*/ 0, void>(std::move(KernelFunc));
12961301
#endif
12971302
}
@@ -1329,9 +1334,10 @@ class __SYCL_EXPORT handler {
13291334
MNDRDesc.set(std::move(NumWorkItems));
13301335
MKernel = detail::getSyclObjImpl(std::move(Kernel));
13311336
MCGType = detail::CG::KERNEL;
1332-
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
1337+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
13331338
extractArgsAndReqs();
1334-
else
1339+
MKernelName = getKernelName();
1340+
} else
13351341
StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
13361342
std::move(KernelFunc));
13371343
#endif
@@ -1365,9 +1371,10 @@ class __SYCL_EXPORT handler {
13651371
MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
13661372
MKernel = detail::getSyclObjImpl(std::move(Kernel));
13671373
MCGType = detail::CG::KERNEL;
1368-
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
1374+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
13691375
extractArgsAndReqs();
1370-
else
1376+
MKernelName = getKernelName();
1377+
} else
13711378
StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
13721379
std::move(KernelFunc));
13731380
#endif
@@ -1402,9 +1409,10 @@ class __SYCL_EXPORT handler {
14021409
MNDRDesc.set(std::move(NDRange));
14031410
MKernel = detail::getSyclObjImpl(std::move(Kernel));
14041411
MCGType = detail::CG::KERNEL;
1405-
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>())
1412+
if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
14061413
extractArgsAndReqs();
1407-
else
1414+
MKernelName = getKernelName();
1415+
} else
14081416
StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
14091417
std::move(KernelFunc));
14101418
#endif
@@ -1820,7 +1828,7 @@ class __SYCL_EXPORT handler {
18201828
unique_ptr_class<detail::HostKernelBase> MHostKernel;
18211829
/// Storage for lambda/function when using HostTask
18221830
unique_ptr_class<detail::HostTask> MHostTask;
1823-
detail::OSModuleHandle MOSModuleHandle;
1831+
detail::OSModuleHandle MOSModuleHandle = detail::OSUtil::ExeModuleHandle;
18241832
// Storage for a lambda or function when using InteropTasks
18251833
std::unique_ptr<detail::InteropTask> MInteropTask;
18261834
/// The list of events that order this operation.
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
//==--------------- handler_set_args.cpp -------------------==//
6+
//
7+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8+
// See https://llvm.org/LICENSE.txt for license information.
9+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10+
//
11+
//===----------------------------------------------------------------------===//
12+
#include <CL/sycl.hpp>
13+
#include <cassert>
14+
15+
constexpr bool UseOffset = true;
16+
constexpr bool NoOffset = false;
17+
const cl::sycl::range<1> Range = 1;
18+
19+
using AccessorT = cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
20+
cl::sycl::access::target::global_buffer>;
21+
22+
struct SingleTaskFunctor {
23+
SingleTaskFunctor(AccessorT acc) : MAcc(acc) {}
24+
25+
void operator()() { MAcc[0] = 10; }
26+
27+
AccessorT MAcc;
28+
};
29+
30+
template <bool useOffset> struct ParallelForRangeIdFunctor {
31+
ParallelForRangeIdFunctor(AccessorT acc) : MAcc(acc) {}
32+
33+
void operator()(cl::sycl::id<1> id) { MAcc[0] = 10; }
34+
35+
AccessorT MAcc;
36+
};
37+
38+
template <bool useOffset> struct ParallelForRangeItemFunctor {
39+
ParallelForRangeItemFunctor(AccessorT acc) : MAcc(acc) {}
40+
41+
void operator()(cl::sycl::item<1> item) { MAcc[0] = 10; }
42+
43+
AccessorT MAcc;
44+
};
45+
46+
struct ParallelForNdRangeFunctor {
47+
ParallelForNdRangeFunctor(AccessorT acc) : MAcc(acc) {}
48+
49+
void operator()(cl::sycl::nd_item<1> ndItem) { MAcc[0] = 10; }
50+
51+
AccessorT MAcc;
52+
};
53+
54+
template <class kernel_name>
55+
cl::sycl::kernel getPrebuiltKernel(cl::sycl::queue &queue) {
56+
cl::sycl::program program(queue.get_context());
57+
program.build_with_kernel_type<kernel_name>();
58+
return program.get_kernel<kernel_name>();
59+
}
60+
61+
template <class kernel_wrapper>
62+
void checkApiCall(cl::sycl::queue &queue, kernel_wrapper &&kernelWrapper) {
63+
int result = 0;
64+
{
65+
auto buf = cl::sycl::buffer<int, 1>(&result, Range);
66+
queue.submit([&](cl::sycl::handler &cgh) {
67+
auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
68+
kernelWrapper(cgh, acc);
69+
});
70+
}
71+
assert(result == 10);
72+
}
73+
74+
int main() {
75+
cl::sycl::queue Queue;
76+
const cl::sycl::id<1> Offset(0);
77+
const cl::sycl::nd_range<1> NdRange(Range, Range);
78+
79+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
80+
cgh.single_task(SingleTaskFunctor(acc));
81+
});
82+
83+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
84+
cgh.parallel_for(Range, ParallelForRangeIdFunctor<NoOffset>(acc));
85+
});
86+
87+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
88+
cgh.parallel_for(Range, Offset, ParallelForRangeIdFunctor<UseOffset>(acc));
89+
});
90+
91+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
92+
cgh.parallel_for(Range, ParallelForRangeItemFunctor<NoOffset>(acc));
93+
});
94+
95+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
96+
cgh.parallel_for(Range, Offset,
97+
ParallelForRangeItemFunctor<UseOffset>(acc));
98+
});
99+
100+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
101+
cgh.parallel_for(NdRange, ParallelForNdRangeFunctor(acc));
102+
});
103+
104+
{
105+
auto preBuiltKernel = getPrebuiltKernel<SingleTaskFunctor>(Queue);
106+
107+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
108+
cgh.set_args(acc);
109+
cgh.single_task(preBuiltKernel);
110+
});
111+
}
112+
113+
{
114+
auto preBuiltKernel =
115+
getPrebuiltKernel<ParallelForRangeIdFunctor<NoOffset>>(Queue);
116+
117+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
118+
cgh.set_args(acc);
119+
cgh.parallel_for(Range, preBuiltKernel);
120+
});
121+
}
122+
123+
{
124+
auto preBuiltKernel =
125+
getPrebuiltKernel<ParallelForRangeIdFunctor<UseOffset>>(Queue);
126+
127+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
128+
cgh.set_args(acc);
129+
cgh.parallel_for(Range, Offset, preBuiltKernel);
130+
});
131+
}
132+
133+
{
134+
auto preBuiltKernel =
135+
getPrebuiltKernel<ParallelForRangeItemFunctor<NoOffset>>(Queue);
136+
137+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
138+
cgh.set_args(acc);
139+
cgh.parallel_for(Range, preBuiltKernel);
140+
});
141+
}
142+
143+
{
144+
auto preBuiltKernel =
145+
getPrebuiltKernel<ParallelForRangeItemFunctor<UseOffset>>(Queue);
146+
147+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
148+
cgh.set_args(acc);
149+
cgh.parallel_for(Range, Offset, preBuiltKernel);
150+
});
151+
}
152+
153+
{
154+
auto preBuiltKernel = getPrebuiltKernel<ParallelForNdRangeFunctor>(Queue);
155+
156+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
157+
cgh.set_args(acc);
158+
cgh.parallel_for(NdRange, preBuiltKernel);
159+
});
160+
}
161+
162+
{
163+
auto preBuiltKernel = getPrebuiltKernel<SingleTaskFunctor>(Queue);
164+
165+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
166+
cgh.set_args(acc);
167+
cgh.single_task<class OtherKernelName1>(preBuiltKernel,
168+
[=]() { acc[0] = 10; });
169+
});
170+
}
171+
172+
{
173+
auto preBuiltKernel =
174+
getPrebuiltKernel<ParallelForRangeIdFunctor<NoOffset>>(Queue);
175+
176+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
177+
cgh.set_args(acc);
178+
cgh.parallel_for<class OtherKernelName2>(
179+
preBuiltKernel, Range, [=](cl::sycl::id<1> id) { acc[0] = 10; });
180+
});
181+
}
182+
183+
{
184+
auto preBuiltKernel =
185+
getPrebuiltKernel<ParallelForRangeIdFunctor<UseOffset>>(Queue);
186+
187+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
188+
cgh.set_args(acc);
189+
cgh.parallel_for<class OtherKernelName3>(
190+
preBuiltKernel, Range, Offset,
191+
[=](cl::sycl::id<1> id) { acc[0] = 10; });
192+
});
193+
}
194+
195+
{
196+
auto preBuiltKernel =
197+
getPrebuiltKernel<ParallelForRangeItemFunctor<NoOffset>>(Queue);
198+
199+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
200+
cgh.set_args(acc);
201+
cgh.parallel_for<class OtherKernelName4>(
202+
preBuiltKernel, Range, [=](cl::sycl::item<1> item) { acc[0] = 10; });
203+
});
204+
}
205+
206+
{
207+
auto preBuiltKernel =
208+
getPrebuiltKernel<ParallelForRangeItemFunctor<UseOffset>>(Queue);
209+
210+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
211+
cgh.set_args(acc);
212+
cgh.parallel_for<class OtherKernelName5>(
213+
preBuiltKernel, Range, Offset,
214+
[=](cl::sycl::item<1> item) { acc[0] = 10; });
215+
});
216+
}
217+
218+
{
219+
auto preBuiltKernel = getPrebuiltKernel<ParallelForNdRangeFunctor>(Queue);
220+
221+
checkApiCall(Queue, [&](cl::sycl::handler &cgh, AccessorT acc) {
222+
cgh.set_args(acc);
223+
cgh.parallel_for<class OtherKernelName6>(
224+
preBuiltKernel, NdRange,
225+
[=](cl::sycl::nd_item<1> ndItem) { acc[0] = 10; });
226+
});
227+
}
228+
229+
return 0;
230+
}

0 commit comments

Comments
 (0)