Skip to content

[SYCL][Graph] Support for kernel-bundle #11505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,9 +681,8 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
NodeImpl->MCommandGroup.get());
auto OutEvent = CreateNewEvent();
pi_int32 Res = sycl::detail::enqueueImpKernel(
Queue, CG->MNDRDesc, CG->MArgs,
// TODO: Handler KernelBundles
nullptr, CG->MSyclKernel, CG->MKernelName, RawEvents, OutEvent,
Queue, CG->MNDRDesc, CG->MArgs, CG->MKernelBundle, CG->MSyclKernel,
CG->MKernelName, RawEvents, OutEvent,
// TODO: Pass accessor mem allocations
nullptr,
// TODO: Extract from handler
Expand Down
25 changes: 24 additions & 1 deletion sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2388,8 +2388,31 @@ pi_int32 enqueueImpCommandBufferKernel(
pi_program PiProgram = nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sergey-semenov , would you be able to look at the changes in this file? I'm not familiar enough to provide comprehensive review.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes in this file look good to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aelovikov-intel @sergey-semenov Could one of you approve this PR if you are happy with the content, thanks.


auto Kernel = CommandGroup.MSyclKernel;
auto KernelBundleImplPtr = CommandGroup.MKernelBundle;
const KernelArgMask *EliminatedArgMask = nullptr;
if (Kernel != nullptr) {

// Use kernel_bundle if available unless it is interop.
// Interop bundles can't be used in the first branch, because the kernels
// in interop kernel bundles (if any) do not have kernel_id
// and can therefore not be looked up, but since they are self-contained
// they can simply be launched directly.
if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) {
std::shared_ptr<kernel_impl> SyclKernelImpl;
std::shared_ptr<device_image_impl> DeviceImageImpl;
auto KernelName = CommandGroup.MKernelName;
kernel_id KernelID =
detail::ProgramManager::getInstance().getSYCLKernelID(KernelName);
kernel SyclKernel =
KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr);
SyclKernelImpl = detail::getSyclObjImpl(SyclKernel);
PiKernel = SyclKernelImpl->getHandleRef();
DeviceImageImpl = SyclKernelImpl->getDeviceImage();
PiProgram = DeviceImageImpl->get_program_ref();
std::tie(PiKernel, KernelMutex, EliminatedArgMask) =
detail::ProgramManager::getInstance().getOrCreateKernel(
KernelBundleImplPtr->get_context(), KernelName,
/*PropList=*/{}, PiProgram);
} else if (Kernel != nullptr) {
PiKernel = Kernel->getHandleRef();
} else {
std::tie(PiKernel, KernelMutex, EliminatedArgMask, PiProgram) =
Expand Down
10 changes: 4 additions & 6 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,8 @@ void handler::verifyUsedKernelBundle(const std::string &KernelName) {
return;

kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
device Dev = detail::getDeviceFromHandler(*this);
device Dev =
(MGraph) ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
throw sycl::exception(
make_error_code(errc::kernel_not_supported),
Expand Down Expand Up @@ -1154,13 +1155,10 @@ void handler::ext_oneapi_signal_external_semaphore(

void handler::use_kernel_bundle(
const kernel_bundle<bundle_state::executable> &ExecBundle) {

throwIfGraphAssociated<ext::oneapi::experimental::detail::
UnsupportedGraphFeatures::sycl_kernel_bundle>();

std::shared_ptr<detail::queue_impl> PrimaryQueue =
MImpl->MSubmissionPrimaryQueue;
if (PrimaryQueue->get_context() != ExecBundle.get_context())
if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) ||
(MGraph && (MGraph->getContext() != ExecBundle.get_context())))
throw sycl::exception(
make_error_code(errc::invalid),
"Context associated with the primary queue is different from the "
Expand Down
44 changes: 44 additions & 0 deletions sycl/test-e2e/Graph/Explicit/kernel_bundle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// REQUIRES: level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Checks the PI call trace to ensure that the bundle kernel of the single task
// is used.

// CHECK:---> piProgramCreate
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]]
//
// CHECK:---> piProgramBuild(
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
//
// CHECK:---> piProgramRetain(
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
// CHECK-NEXT:---> pi_result : PI_SUCCESS

// CHECK:---> piKernelCreate(
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
// CHECK-NEXT:<const char *>: _ZTS11Kernel1Name
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: ---> pi_result : PI_SUCCESS
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]]
//
// CHECK:---> piKernelRetain(
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
// CHECK-NEXT:---> pi_result : PI_SUCCESS
//
// CHECK:---> piextCommandBufferNDRangeKernel(
// CHECK-NEXT:<unknown> : {{.*}}
// CHECK-NEXT:<unknown> : [[KERNEL_HANDLE]]
//
// CHECK:---> piKernelRelease(
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
// CHECK-NEXT:---> pi_result : PI_SUCCESS

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/kernel_bundle.cpp"
11 changes: 11 additions & 0 deletions sycl/test-e2e/Graph/Explicit/multiple_kernel_bundles.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// REQUIRES: level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using ZE_DEBUG
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
//
// CHECK-NOT: LEAK

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/multiple_kernel_bundles.cpp"
50 changes: 50 additions & 0 deletions sycl/test-e2e/Graph/Inputs/kernel_bundle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Tests using a bundle in a graph.

#include "../graph_common.hpp"

class Kernel1Name;

int main() {
using T = int;

const sycl::device Dev{sycl::default_selector_v};
const sycl::context Ctx{Dev};

queue Queue{Ctx,
Dev,
{sycl::ext::intel::property::queue::no_immediate_command_list{}}};

sycl::kernel_id KernelID = sycl::get_kernel_id<Kernel1Name>();

sycl::kernel_bundle KernelBundleInput =
sycl::get_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev});
assert(KernelBundleInput.has_kernel(KernelID));
assert(sycl::has_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev}));

sycl::kernel_bundle<sycl::bundle_state::executable> KernelBundleExecutable =
sycl::build(KernelBundleInput, KernelBundleInput.get_devices());

sycl::buffer<T, 1> Buf(sycl::range<1>{1});
Buf.set_write_back(false);
{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, ([&](sycl::handler &CGH) {
auto Acc = Buf.get_access<sycl::access::mode::write>(CGH);
CGH.use_kernel_bundle(KernelBundleExecutable);
CGH.single_task<Kernel1Name>([=]() { Acc[0] = 42; });
}));

auto GraphExec = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
Queue.wait_and_throw();
}
host_accessor HostAcc(Buf);
assert(HostAcc[0] == 42);

return 0;
}
113 changes: 113 additions & 0 deletions sycl/test-e2e/Graph/Inputs/multiple_kernel_bundles.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Tests using multiple kernel bundles in a graph.

#include "../graph_common.hpp"

class Kernel1Name;
class Kernel2Name;

int main() {
using T = int;

const sycl::device Dev{sycl::default_selector_v};
const sycl::device Dev2{sycl::default_selector_v};

const sycl::context Ctx{Dev};
const sycl::context Ctx2{Dev2};

queue Queue{Ctx,
Dev,
{sycl::ext::intel::property::queue::no_immediate_command_list{}}};

sycl::kernel_id Kernel1ID = sycl::get_kernel_id<Kernel1Name>();
sycl::kernel_id Kernel2ID = sycl::get_kernel_id<Kernel2Name>();

sycl::kernel_bundle KernelBundleInput1 =
sycl::get_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev},
{Kernel1ID});
sycl::kernel_bundle KernelBundleInput2 =
sycl::get_kernel_bundle<sycl::bundle_state::input>(Ctx2, {Dev2},
{Kernel2ID});
assert(KernelBundleInput1.has_kernel(Kernel1ID));
assert(sycl::has_kernel_bundle<sycl::bundle_state::input>(Ctx, {Dev}));
assert(KernelBundleInput2.has_kernel(Kernel2ID));
assert(sycl::has_kernel_bundle<sycl::bundle_state::input>(Ctx2, {Dev2}));

sycl::kernel_bundle<sycl::bundle_state::executable> KernelBundleExecutable1 =
sycl::build(KernelBundleInput1, KernelBundleInput1.get_devices());

sycl::kernel_bundle<sycl::bundle_state::executable> KernelBundleExecutable2 =
sycl::build(KernelBundleInput2, KernelBundleInput2.get_devices());

std::vector<T> DataA(Size);
std::iota(DataA.begin(), DataA.end(), 1);
std::vector<T> ReferenceA;
for (size_t i = 0; i < Size; i++) {
ReferenceA.push_back(DataA[i] + 1);
}

buffer<T> BufferA{DataA.data(), range<1>{DataA.size()}};
BufferA.set_write_back(false);
sycl::buffer<T, 1> Buf1(sycl::range<1>{1});
Buf1.set_write_back(false);
sycl::buffer<T, 1> Buf2(sycl::range<1>{1});
Buf2.set_write_back(false);
{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, ([&](sycl::handler &CGH) {
auto Acc = Buf1.get_access<sycl::access::mode::write>(CGH);
CGH.use_kernel_bundle(KernelBundleExecutable1);
CGH.single_task<Kernel1Name>([=]() { Acc[0] = 42; });
}));

add_node(Graph, Queue, ([&](handler &CGH) {
auto DataA =
BufferA.template get_access<access::mode::read_write>(CGH);
CGH.use_kernel_bundle(KernelBundleExecutable1);
CGH.parallel_for(range<1>{Size},
[=](item<1> Id) { DataA[Id]++; });
}));

#ifdef GRAPH_E2E_EXPLICIT
// KernelBundleExecutable2 and the Graph don't share the same context
// We should therefore get a exception
// Note we can't do the same test for Record&Replay interface since two
// queues with different contexts cannot be recorded by the same Graph.
std::error_code ExceptionCode = make_error_code(sycl::errc::success);
try {
Graph.add([&](sycl::handler &CGH) {
auto Acc = Buf2.get_access<sycl::access::mode::write>(CGH);
CGH.use_kernel_bundle(KernelBundleExecutable2);
CGH.single_task<Kernel2Name>([=]() { Acc[0] = 24; });
});
} catch (exception &Exception) {
ExceptionCode = Exception.code();
}
assert(ExceptionCode == sycl::errc::invalid);
#else
// If Explicit API is not used, we still need to add kernel2Name to the
// bundle since this test expected to find it in the bundle whatever the
// API used.
if (0) {
Queue.submit(
[](sycl::handler &CGH) { CGH.single_task<Kernel2Name>([]() {}); });
}
#endif

auto GraphExec = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
Queue.wait_and_throw();
}
host_accessor HostAcc1(Buf1);
assert(HostAcc1[0] == 42);

host_accessor HostAccA(BufferA);
for (size_t i = 0; i < Size; i++)
assert(ReferenceA[i] == HostAccA[i]);

return 0;
}
44 changes: 44 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/kernel_bundle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// REQUIRES: level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Checks the PI call trace to ensure that the bundle kernel of the single task
// is used.

// CHECK:---> piProgramCreate
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]]
//
// CHECK:---> piProgramBuild(
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
//
// CHECK:---> piProgramRetain(
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
// CHECK-NEXT:---> pi_result : PI_SUCCESS

// CHECK:---> piKernelCreate(
// CHECK-NEXT: <unknown> : [[PROGRAM_HANDLE1]]
// CHECK-NEXT:<const char *>: _ZTS11Kernel1Name
// CHECK-NEXT: <unknown> : {{.*}}
// CHECK-NEXT: ---> pi_result : PI_SUCCESS
// CHECK-NEXT: [out]<unknown> ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]]
//
// CHECK:---> piKernelRetain(
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
// CHECK-NEXT:---> pi_result : PI_SUCCESS
//
// CHECK:---> piextCommandBufferNDRangeKernel(
// CHECK-NEXT:<unknown> : {{.*}}
// CHECK-NEXT:<unknown> : [[KERNEL_HANDLE]]
//
// CHECK:---> piKernelRelease(
// CHECK-NEXT: <unknown> : [[KERNEL_HANDLE]]
// CHECK-NEXT:---> pi_result : PI_SUCCESS

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/kernel_bundle.cpp"
11 changes: 11 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/multiple_kernel_bundles.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// REQUIRES: level_zero, gpu
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using ZE_DEBUG
// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %}
//
// CHECK-NOT: LEAK

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/multiple_kernel_bundles.cpp"
18 changes: 0 additions & 18 deletions sycl/unittests/Extensions/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1614,24 +1614,6 @@ TEST_F(CommandGraphTest, SpecializationConstant) {
sycl::exception);
}

// Tests that using kernel bundles in a graph will throw.
TEST_F(CommandGraphTest, KernelBundle) {
sycl::kernel_bundle KernelBundle =
sycl::get_kernel_bundle<sycl::bundle_state::executable>(
Queue.get_context(), {Dev});

ASSERT_THROW(
{
try {
Graph.add([&](handler &CGH) { CGH.use_kernel_bundle(KernelBundle); });
} catch (const sycl::exception &e) {
ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid));
throw;
}
},
sycl::exception);
}

// Tests that using reductions in a graph will throw.
TEST_F(CommandGraphTest, Reductions) {
int ReduVar = 0;
Expand Down