Skip to content

Commit 341e989

Browse files
[SYCL][Graph] Support for kernel-bundle (#11505)
- Adds support for using kernel-bundle in Graph. - Adds tests checking that kernel bundles can be used with the Record&Replay API and the Explicit API.
1 parent bb9244f commit 341e989

File tree

10 files changed

+303
-28
lines changed

10 files changed

+303
-28
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -681,9 +681,8 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
681681
NodeImpl->MCommandGroup.get());
682682
auto OutEvent = CreateNewEvent();
683683
pi_int32 Res = sycl::detail::enqueueImpKernel(
684-
Queue, CG->MNDRDesc, CG->MArgs,
685-
// TODO: Handler KernelBundles
686-
nullptr, CG->MSyclKernel, CG->MKernelName, RawEvents, OutEvent,
684+
Queue, CG->MNDRDesc, CG->MArgs, CG->MKernelBundle, CG->MSyclKernel,
685+
CG->MKernelName, RawEvents, OutEvent,
687686
// TODO: Pass accessor mem allocations
688687
nullptr,
689688
// TODO: Extract from handler

sycl/source/detail/scheduler/commands.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2388,8 +2388,31 @@ pi_int32 enqueueImpCommandBufferKernel(
23882388
pi_program PiProgram = nullptr;
23892389

23902390
auto Kernel = CommandGroup.MSyclKernel;
2391+
auto KernelBundleImplPtr = CommandGroup.MKernelBundle;
23912392
const KernelArgMask *EliminatedArgMask = nullptr;
2392-
if (Kernel != nullptr) {
2393+
2394+
// Use kernel_bundle if available unless it is interop.
2395+
// Interop bundles can't be used in the first branch, because the kernels
2396+
// in interop kernel bundles (if any) do not have kernel_id
2397+
// and can therefore not be looked up, but since they are self-contained
2398+
// they can simply be launched directly.
2399+
if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) {
2400+
std::shared_ptr<kernel_impl> SyclKernelImpl;
2401+
std::shared_ptr<device_image_impl> DeviceImageImpl;
2402+
auto KernelName = CommandGroup.MKernelName;
2403+
kernel_id KernelID =
2404+
detail::ProgramManager::getInstance().getSYCLKernelID(KernelName);
2405+
kernel SyclKernel =
2406+
KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr);
2407+
SyclKernelImpl = detail::getSyclObjImpl(SyclKernel);
2408+
PiKernel = SyclKernelImpl->getHandleRef();
2409+
DeviceImageImpl = SyclKernelImpl->getDeviceImage();
2410+
PiProgram = DeviceImageImpl->get_program_ref();
2411+
std::tie(PiKernel, KernelMutex, EliminatedArgMask) =
2412+
detail::ProgramManager::getInstance().getOrCreateKernel(
2413+
KernelBundleImplPtr->get_context(), KernelName,
2414+
/*PropList=*/{}, PiProgram);
2415+
} else if (Kernel != nullptr) {
23932416
PiKernel = Kernel->getHandleRef();
23942417
} else {
23952418
std::tie(PiKernel, KernelMutex, EliminatedArgMask, PiProgram) =

sycl/source/handler.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,8 @@ void handler::verifyUsedKernelBundle(const std::string &KernelName) {
820820
return;
821821

822822
kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
823-
device Dev = detail::getDeviceFromHandler(*this);
823+
device Dev =
824+
(MGraph) ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
824825
if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
825826
throw sycl::exception(
826827
make_error_code(errc::kernel_not_supported),
@@ -1158,13 +1159,10 @@ void handler::ext_oneapi_signal_external_semaphore(
11581159

11591160
void handler::use_kernel_bundle(
11601161
const kernel_bundle<bundle_state::executable> &ExecBundle) {
1161-
1162-
throwIfGraphAssociated<ext::oneapi::experimental::detail::
1163-
UnsupportedGraphFeatures::sycl_kernel_bundle>();
1164-
11651162
std::shared_ptr<detail::queue_impl> PrimaryQueue =
11661163
MImpl->MSubmissionPrimaryQueue;
1167-
if (PrimaryQueue->get_context() != ExecBundle.get_context())
1164+
if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) ||
1165+
(MGraph && (MGraph->getContext() != ExecBundle.get_context())))
11681166
throw sycl::exception(
11691167
make_error_code(errc::invalid),
11701168
"Context associated with the primary queue is different from the "
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// REQUIRES: level_zero, gpu
2+
// RUN: %{build} -o %t.out
3+
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s
4+
5+
// Checks the PI call trace to ensure that the bundle kernel of the single task
6+
// is used.
7+
8+
// CHECK:---> piProgramCreate
9+
// CHECK-NEXT: <unknown> : {{.*}}
10+
// CHECK-NEXT: <unknown> : {{.*}}
11+
// CHECK-NEXT: <unknown> : {{.*}}
12+
// CHECK-NEXT: <unknown> : {{.*}}
13+
// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS
14+
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]]
15+
//
16+
// CHECK:---> piProgramBuild(
17+
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
18+
//
19+
// CHECK:---> piProgramRetain(
20+
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
21+
// CHECK-NEXT:---> pi_result : PI_SUCCESS
22+
23+
// CHECK:---> piKernelCreate(
24+
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
25+
// CHECK-NEXT:<const char *>: _ZTS11Kernel1Name
26+
// CHECK-NEXT: <unknown> : {{.*}}
27+
// CHECK-NEXT: ---> pi_result : PI_SUCCESS
28+
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]]
29+
//
30+
// CHECK:---> piKernelRetain(
31+
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
32+
// CHECK-NEXT:---> pi_result : PI_SUCCESS
33+
//
34+
// CHECK:---> piextCommandBufferNDRangeKernel(
35+
// CHECK-NEXT:<unknown> : {{.*}}
36+
// CHECK-NEXT:<unknown> : [[KERNEL_HANDLE]]
37+
//
38+
// CHECK:---> piKernelRelease(
39+
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
40+
// CHECK-NEXT:---> pi_result : PI_SUCCESS
41+
42+
#define GRAPH_E2E_EXPLICIT
43+
44+
#include "../Inputs/kernel_bundle.cpp"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// REQUIRES: level_zero, gpu
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
// Extra run to check for leaks in Level Zero using ZE_DEBUG
5+
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
6+
//
7+
// CHECK-NOT: LEAK
8+
9+
#define GRAPH_E2E_EXPLICIT
10+
11+
#include "../Inputs/multiple_kernel_bundles.cpp"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Tests using a bundle in a graph.
2+
3+
#include "../graph_common.hpp"
4+
5+
class Kernel1Name;
6+
7+
int main() {
8+
using T = int;
9+
10+
const sycl::device Dev{sycl::default_selector_v};
11+
const sycl::context Ctx{Dev};
12+
13+
queue Queue{Ctx,
14+
Dev,
15+
{sycl::ext::intel::property::queue::no_immediate_command_list{}}};
16+
17+
sycl::kernel_id KernelID = sycl::get_kernel_id<Kernel1Name>();
18+
19+
sycl::kernel_bundle KernelBundleInput =
20+
sycl::get_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev});
21+
assert(KernelBundleInput.has_kernel(KernelID));
22+
assert(sycl::has_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev}));
23+
24+
sycl::kernel_bundle<sycl::bundle_state::executable> KernelBundleExecutable =
25+
sycl::build(KernelBundleInput, KernelBundleInput.get_devices());
26+
27+
sycl::buffer<T, 1> Buf(sycl::range<1>{1});
28+
Buf.set_write_back(false);
29+
{
30+
exp_ext::command_graph Graph{
31+
Queue.get_context(),
32+
Queue.get_device(),
33+
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
34+
35+
add_node(Graph, Queue, ([&](sycl::handler &CGH) {
36+
auto Acc = Buf.get_access<sycl::access::mode::write>(CGH);
37+
CGH.use_kernel_bundle(KernelBundleExecutable);
38+
CGH.single_task<Kernel1Name>([=]() { Acc[0] = 42; });
39+
}));
40+
41+
auto GraphExec = Graph.finalize();
42+
43+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
44+
Queue.wait_and_throw();
45+
}
46+
host_accessor HostAcc(Buf);
47+
assert(HostAcc[0] == 42);
48+
49+
return 0;
50+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// Tests using multiple kernel bundles in a graph.
2+
3+
#include "../graph_common.hpp"
4+
5+
class Kernel1Name;
6+
class Kernel2Name;
7+
8+
int main() {
9+
using T = int;
10+
11+
const sycl::device Dev{sycl::default_selector_v};
12+
const sycl::device Dev2{sycl::default_selector_v};
13+
14+
const sycl::context Ctx{Dev};
15+
const sycl::context Ctx2{Dev2};
16+
17+
queue Queue{Ctx,
18+
Dev,
19+
{sycl::ext::intel::property::queue::no_immediate_command_list{}}};
20+
21+
sycl::kernel_id Kernel1ID = sycl::get_kernel_id<Kernel1Name>();
22+
sycl::kernel_id Kernel2ID = sycl::get_kernel_id<Kernel2Name>();
23+
24+
sycl::kernel_bundle KernelBundleInput1 =
25+
sycl::get_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev},
26+
{Kernel1ID});
27+
sycl::kernel_bundle KernelBundleInput2 =
28+
sycl::get_kernel_bundle<sycl::bundle_state::input>(Ctx2, {Dev2},
29+
{Kernel2ID});
30+
assert(KernelBundleInput1.has_kernel(Kernel1ID));
31+
assert(sycl::has_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev}));
32+
assert(KernelBundleInput2.has_kernel(Kernel2ID));
33+
assert(sycl::has_kernel_bundle<sycl::bundle_state::input>(Ctx2, {Dev2}));
34+
35+
sycl::kernel_bundle<sycl::bundle_state::executable> KernelBundleExecutable1 =
36+
sycl::build(KernelBundleInput1, KernelBundleInput1.get_devices());
37+
38+
sycl::kernel_bundle<sycl::bundle_state::executable> KernelBundleExecutable2 =
39+
sycl::build(KernelBundleInput2, KernelBundleInput2.get_devices());
40+
41+
std::vector<T> DataA(Size);
42+
std::iota(DataA.begin(), DataA.end(), 1);
43+
std::vector<T> ReferenceA;
44+
for (size_t i = 0; i < Size; i++) {
45+
ReferenceA.push_back(DataA[i] + 1);
46+
}
47+
48+
buffer<T> BufferA{DataA.data(), range<1>{DataA.size()}};
49+
BufferA.set_write_back(false);
50+
sycl::buffer<T, 1> Buf1(sycl::range<1>{1});
51+
Buf1.set_write_back(false);
52+
sycl::buffer<T, 1> Buf2(sycl::range<1>{1});
53+
Buf2.set_write_back(false);
54+
{
55+
exp_ext::command_graph Graph{
56+
Queue.get_context(),
57+
Queue.get_device(),
58+
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
59+
60+
add_node(Graph, Queue, ([&](sycl::handler &CGH) {
61+
auto Acc = Buf1.get_access<sycl::access::mode::write>(CGH);
62+
CGH.use_kernel_bundle(KernelBundleExecutable1);
63+
CGH.single_task<Kernel1Name>([=]() { Acc[0] = 42; });
64+
}));
65+
66+
add_node(Graph, Queue, ([&](handler &CGH) {
67+
auto DataA =
68+
BufferA.template get_access<access::mode::read_write>(CGH);
69+
CGH.use_kernel_bundle(KernelBundleExecutable1);
70+
CGH.parallel_for(range<1>{Size},
71+
[=](item<1> Id) { DataA[Id]++; });
72+
}));
73+
74+
#ifdef GRAPH_E2E_EXPLICIT
75+
// KernelBundleExecutable2 and the Graph don't share the same context
76+
// We should therefore get a exception
77+
// Note we can't do the same test for Record&Replay interface since two
78+
// queues with different contexts cannot be recorded by the same Graph.
79+
std::error_code ExceptionCode = make_error_code(sycl::errc::success);
80+
try {
81+
Graph.add([&](sycl::handler &CGH) {
82+
auto Acc = Buf2.get_access<sycl::access::mode::write>(CGH);
83+
CGH.use_kernel_bundle(KernelBundleExecutable2);
84+
CGH.single_task<Kernel2Name>([=]() { Acc[0] = 24; });
85+
});
86+
} catch (exception &Exception) {
87+
ExceptionCode = Exception.code();
88+
}
89+
assert(ExceptionCode == sycl::errc::invalid);
90+
#else
91+
// If Explicit API is not used, we still need to add kernel2Name to the
92+
// bundle since this test expected to find it in the bundle whatever the
93+
// API used.
94+
if (0) {
95+
Queue.submit(
96+
[](sycl::handler &CGH) { CGH.single_task<Kernel2Name>([]() {}); });
97+
}
98+
#endif
99+
100+
auto GraphExec = Graph.finalize();
101+
102+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
103+
Queue.wait_and_throw();
104+
}
105+
host_accessor HostAcc1(Buf1);
106+
assert(HostAcc1[0] == 42);
107+
108+
host_accessor HostAccA(BufferA);
109+
for (size_t i = 0; i < Size; i++)
110+
assert(ReferenceA[i] == HostAccA[i]);
111+
112+
return 0;
113+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// REQUIRES: level_zero, gpu
2+
// RUN: %{build} -o %t.out
3+
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s
4+
5+
// Checks the PI call trace to ensure that the bundle kernel of the single task
6+
// is used.
7+
8+
// CHECK:---> piProgramCreate
9+
// CHECK-NEXT: <unknown> : {{.*}}
10+
// CHECK-NEXT: <unknown> : {{.*}}
11+
// CHECK-NEXT: <unknown> : {{.*}}
12+
// CHECK-NEXT: <unknown> : {{.*}}
13+
// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS
14+
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]]
15+
//
16+
// CHECK:---> piProgramBuild(
17+
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
18+
//
19+
// CHECK:---> piProgramRetain(
20+
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
21+
// CHECK-NEXT:---> pi_result : PI_SUCCESS
22+
23+
// CHECK:---> piKernelCreate(
24+
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
25+
// CHECK-NEXT:<const char *>: _ZTS11Kernel1Name
26+
// CHECK-NEXT: <unknown> : {{.*}}
27+
// CHECK-NEXT: ---> pi_result : PI_SUCCESS
28+
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]]
29+
//
30+
// CHECK:---> piKernelRetain(
31+
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
32+
// CHECK-NEXT:---> pi_result : PI_SUCCESS
33+
//
34+
// CHECK:---> piextCommandBufferNDRangeKernel(
35+
// CHECK-NEXT:<unknown> : {{.*}}
36+
// CHECK-NEXT:<unknown> : [[KERNEL_HANDLE]]
37+
//
38+
// CHECK:---> piKernelRelease(
39+
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
40+
// CHECK-NEXT:---> pi_result : PI_SUCCESS
41+
42+
#define GRAPH_E2E_RECORD_REPLAY
43+
44+
#include "../Inputs/kernel_bundle.cpp"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// REQUIRES: level_zero, gpu
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
// Extra run to check for leaks in Level Zero using ZE_DEBUG
5+
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
6+
//
7+
// CHECK-NOT: LEAK
8+
9+
#define GRAPH_E2E_RECORD_REPLAY
10+
11+
#include "../Inputs/multiple_kernel_bundles.cpp"

sycl/unittests/Extensions/CommandGraph.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,24 +1614,6 @@ TEST_F(CommandGraphTest, SpecializationConstant) {
16141614
sycl::exception);
16151615
}
16161616

1617-
// Tests that using kernel bundles in a graph will throw.
1618-
TEST_F(CommandGraphTest, KernelBundle) {
1619-
sycl::kernel_bundle KernelBundle =
1620-
sycl::get_kernel_bundle<sycl::bundle_state::executable>(
1621-
Queue.get_context(), {Dev});
1622-
1623-
ASSERT_THROW(
1624-
{
1625-
try {
1626-
Graph.add([&](handler &CGH) { CGH.use_kernel_bundle(KernelBundle); });
1627-
} catch (const sycl::exception &e) {
1628-
ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid));
1629-
throw;
1630-
}
1631-
},
1632-
sycl::exception);
1633-
}
1634-
16351617
// Tests that using reductions in a graph will throw.
16361618
TEST_F(CommandGraphTest, Reductions) {
16371619
int ReduVar = 0;

0 commit comments

Comments
 (0)