Skip to content

Commit fd132cd

Browse files
committed
Iterate on spec and tests
1 parent b340233 commit fd132cd

17 files changed

+416
-191
lines changed

sycl/doc/extensions/experimental/sycl_ext_codeplay_enqueue_native_command.asciidoc

Lines changed: 106 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,12 +258,11 @@ interop_handle::ext_codeplay_get_native_graph() const;
258258

259259
Exceptions:
260260

261-
* Throws with error code `invalid` if there is no native graph object
262-
associated with the interop handle.
261+
* Throws with error code `invalid` if `ext_codeplay_has_graph()` returns
262+
`false`.
263263

264264
|===
265265

266-
267266
== Examples
268267

269268
=== HIP Native Task
@@ -284,27 +283,122 @@ q.submit([&](sycl::handler &cgh) {
284283

285284
// Can enqueue arbitrary backend operations. This could also be a kernel
286285
// launch or call to a library that enqueues operations on the stream etc
287-
//
288-
// Important: Enqueuing a *synchronous* backend operation results in
289-
// undefined behavior.
290286
hipMemcpyAsync(target_ptr, native_mem, test_size * sizeof(int),
291287
hipMemcpyDeviceToHost, stream);
292288
});
293289
});
294290
q.wait();
295291
```
296292

297-
=== CUDA Stream Record Native Task
293+
=== Level-Zero Add Native Graph Node
294+
295+
This example demonstrates how to use this extension to add a native command
296+
to a SYCL-Graph object on the Level-Zero backend. The command is doing a memory
297+
copy between two USM pointers.
298+
299+
```c++
300+
Graph.add([&](sycl::handler &CGH) {
301+
CGH.ext_codeplay_enqueue_native_command([=](sycl::interop_handle IH) {
302+
ze_command_list_handle_t NativeGraph =
303+
IH.ext_codeplay_get_native_graph<sycl::backend::ext_oneapi_level_zero>();
304+
305+
zeCommandListAppendMemoryCopy(
306+
NativeGraph, PtrY, PtrX, Size * sizeof(int), nullptr, 0, nullptr);
307+
});
308+
});
309+
```
310+
311+
=== OpenCL Add Native Graph Node
312+
313+
This example demonstrates how to use this extension to add a native command to
314+
a SYCL-Graph object on the OpenCL backend. The command is doing a copy between
315+
two buffer objects.
316+
317+
```c++
318+
sycl::queue Queue;
319+
auto Platform = get_native<sycl::backend::opencl>(Queue.get_context().get_platform());
320+
clCommandCopyBufferKHR_fn clCommandCopyBufferKHR =
321+
reinterpret_cast<clCommandCopyBufferKHR_fn>(
322+
clGetExtensionFunctionAddressForPlatform(Platform, "clCommandCopyBufferKHR"));
323+
324+
Graph.add([&](sycl::handler &CGH) {
325+
auto AccX = BufX.get_access(CGH);
326+
auto AccY = BufY.get_access(CGH);
327+
CGH.ext_codeplay_enqueue_native_command([=](sycl::interop_handle IH) {
328+
cl_command_buffer_khr NativeGraph =
329+
IH.ext_codeplay_get_native_graph<sycl::backend::opencl>();
330+
auto SrcBuffer = IH.get_native_mem<sycl::backend::opencl>(AccX);
331+
auto DstBuffer = IH.get_native_mem<sycl::backend::opencl>(AccY);
332+
333+
clCommandCopyBufferKHR(
334+
NativeGraph, nullptr, nullptr, SrcBuffer[0], DstBuffer[0], 0, 0,
335+
Size * sizeof(int), 0, nullptr, nullptr, nullptr);
336+
});
337+
});
338+
```
339+
340+
=== CUDA Add Native Graph Node
341+
342+
This example demonstrates how to use this extension to add a native command to
343+
a SYCL-Graph object on the CUDA backend. The command is doing a memory copy
344+
between two device USM pointers.
345+
346+
```c++
347+
Graph.add([&](sycl::handler &CGH) {
348+
CGH.ext_codeplay_enqueue_native_command([=](sycl::interop_handle IH) {
349+
CUgraph NativeGraph =
350+
IH.ext_codeplay_get_native_graph<sycl::backend::ext_oneapi_cuda>();
351+
352+
CUDA_MEMCPY3D Params;
353+
std::memset(&Params, 0, sizeof(CUDA_MEMCPY3D));
354+
Params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
355+
Params.srcDevice = (CUdeviceptr)PtrX;
356+
Params.srcHost = nullptr;
357+
Params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
358+
Params.dstDevice = (CUdeviceptr)PtrY;
359+
Params.dstHost = nullptr;
360+
Params.WidthInBytes = Size * sizeof(int);
361+
Params.Height = 1;
362+
Params.Depth = 1;
363+
364+
CUgraphNode Node;
365+
CUcontext Context = IH.get_native_context<sycl::backend::ext_oneapi_cuda>();
366+
cuGraphAddMemcpyNode(&Node, NativeGraph, nullptr, 0, &Params, Context);
367+
});
368+
});
369+
```
370+
371+
=== HIP Add Native Graph Node
372+
373+
This example demonstrates how to use this extension to add a native command to
374+
a SYCL-Graph object on the HIP backend. The command is doing a memory copy
375+
between two device USM pointers.
376+
377+
```c++
378+
Graph.add([&](sycl::handler &CGH) {
379+
CGH.ext_codeplay_enqueue_native_command([=](sycl::interop_handle IH) {
380+
HIPGraph NativeGraph =
381+
IH.ext_codeplay_get_native_graph<sycl::backend::ext_oneapi_hip>();
382+
383+
HIPGraphNode Node;
384+
hipGraphAddMemcpyNode1D(&Node, NativeGraph, nullptr, 0, PtrY, PtrX,
385+
Size * sizeof(int), hipMemcpyDefault);
386+
});
387+
});
388+
```
389+
390+
=== CUDA Stream Record Native Graph Nodes
298391

299392
This example demonstrates how to use this extension to add stream recorded
300393
native nodes to a SYCL-Graph object on the CUDA backend.
301394

302-
```
303-
q.submit([&](sycl::handler &cgh) {
304-
cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle h) {
395+
```c++
396+
q.submit([&](sycl::handler &CGH) {
397+
CGH.ext_codeplay_enqueue_native_command([=](sycl::interop_handle IH) {
305398
auto NativeStream = h.get_native_queue<cuda>();
306-
if (h.ext_codeplay_has_graph()) {
307-
auto NativeGraph = h.ext_codeplay_get_native_graph<cuda>();
399+
if (IH.ext_codeplay_has_graph()) {
400+
auto NativeGraph =
401+
IH.ext_codeplay_get_native_graph<sycl::backend::ext_oneapi_cuda>();
308402

309403
// Start capture stream calls into graph
310404
cuStreamBeginCaptureToGraph(NativeStream, NativeGraph, nullptr,

sycl/include/sycl/interop_handle.hpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -208,25 +208,22 @@ class interop_handle {
208208
friend class detail::DispatchHostTask;
209209
using ReqToMem = std::pair<detail::AccessorImplHost *, ur_mem_handle_t>;
210210

211-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
212-
// MGraph should become a member of this class on the next ABI breaking
213-
// window
214-
// TODO create and link GitHub Issue
215211
interop_handle(std::vector<ReqToMem> MemObjs,
216212
const std::shared_ptr<detail::queue_impl> &Queue,
217213
const std::shared_ptr<detail::device_impl> &Device,
218214
const std::shared_ptr<detail::context_impl> &Context,
215+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
216+
[[maybe_unused]]
217+
#endif
219218
ur_exp_command_buffer_handle_t Graph = nullptr)
220-
: MQueue(Queue), MDevice(Device), MContext(Context), MGraph(Graph),
221-
MMemObjs(std::move(MemObjs)) {}
222-
#else
223-
interop_handle(std::vector<ReqToMem> MemObjs,
224-
const std::shared_ptr<detail::queue_impl> &Queue,
225-
const std::shared_ptr<detail::device_impl> &Device,
226-
const std::shared_ptr<detail::context_impl> &Context)
227219
: MQueue(Queue), MDevice(Device), MContext(Context),
228-
MMemObjs(std::move(MemObjs)) {}
220+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
221+
// CMPLRLLVM-66082 - MGraph should become a member of this class on the
222+
// next ABI breaking window.
223+
MGraph(Graph),
229224
#endif
225+
MMemObjs(std::move(MemObjs)) {
226+
}
230227

231228
template <backend Backend, typename DataT, int Dims>
232229
backend_return_t<Backend, buffer<DataT, Dims>>
@@ -255,9 +252,8 @@ class interop_handle {
255252
std::shared_ptr<detail::device_impl> MDevice;
256253
std::shared_ptr<detail::context_impl> MContext;
257254
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
258-
// MGraph should become a member of this class on the next ABI breaking
259-
// window
260-
// TODO link github issue
255+
// CMPLRLLVM-66082 - MGraph should become a member of this class on the
256+
// next ABI breaking window.
261257
ur_exp_command_buffer_handle_t MGraph;
262258
#endif
263259

sycl/source/detail/queue_impl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,9 +754,9 @@ class queue_impl {
754754
}
755755

756756
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
757+
// CMPLRLLVM-66082
757758
// These methods are for accessing a member that should live in the
758759
// sycl::interop_handle class and will be moved on next ABI breaking window.
759-
// TODO create and link github issue
760760
ur_exp_command_buffer_handle_t getInteropGraph() const {
761761
return MInteropGraph;
762762
}
@@ -1014,10 +1014,10 @@ class queue_impl {
10141014
std::weak_ptr<ext::oneapi::experimental::detail::graph_impl> MGraph{};
10151015

10161016
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
1017+
// CMPLRLLVM-66082
10171018
// This member should be part of the sycl::interop_handle class, but it
10181019
// in an API breaking change. So member lives here temporarily where it can
10191020
// be accessed through the queue member of the interop_handle
1020-
// TODO create and link github issue
10211021
ur_exp_command_buffer_handle_t MInteropGraph{};
10221022
#endif
10231023

sycl/source/detail/scheduler/commands.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,12 +3066,12 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() {
30663066
// available if a user asks for them inside the interop task scope
30673067
std::vector<interop_handle::ReqToMem> ReqToMem;
30683068
const std::vector<Requirement *> &HandlerReq = HostTask->getRequirements();
3069-
auto ReqToMemConv = [&ReqToMem, HostTask](Requirement *Req) {
3069+
auto ReqToMemConv = [&ReqToMem, ContextImpl](Requirement *Req) {
30703070
const std::vector<AllocaCommandBase *> &AllocaCmds =
30713071
Req->MSYCLMemObj->MRecord->MAllocaCommands;
30723072

30733073
for (AllocaCommandBase *AllocaCmd : AllocaCmds)
3074-
if (getContext(HostTask->MQueue) == getContext(AllocaCmd->getQueue())) {
3074+
if (ContextImpl == getContext(AllocaCmd->getQueue())) {
30753075
auto MemArg =
30763076
reinterpret_cast<ur_mem_handle_t>(AllocaCmd->getMemAllocation());
30773077
ReqToMem.emplace_back(std::make_pair(Req, MemArg));
@@ -3090,10 +3090,10 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() {
30903090
ur_exp_command_buffer_handle_t InteropCommandBuffer =
30913091
ChildCommandBuffer ? ChildCommandBuffer : MCommandBuffer;
30923092
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
3093+
// CMPLRLLVM-66082
30933094
// The native command-buffer should be a member of the sycl::interop_handle
3094-
// class, but it is in an API breaking change to add it. So member lives in
3095+
// class, but it is in an ABI breaking change to add it. So member lives in
30953096
// the queue as a intermediate workaround.
3096-
// TODO create and link github issue
30973097
interop_handle IH{ReqToMem, MQueue, DeviceImpl, ContextImpl,
30983098
InteropCommandBuffer};
30993099
#else
@@ -3109,6 +3109,11 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() {
31093109
MSyncPointDeps.empty() ? nullptr : MSyncPointDeps.data(),
31103110
&OutSyncPoint);
31113111

3112+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
3113+
// See CMPLRLLVM-66082
3114+
MQueue->setInteropGraph(nullptr);
3115+
#endif
3116+
31123117
if (ChildCommandBuffer) {
31133118
ur_result_t Res = Adapter->call_nocheck<
31143119
sycl::detail::UrApiKind::urCommandBufferReleaseExp>(

sycl/source/interop_handle.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ backend interop_handle::get_backend() const noexcept {
2525

2626
bool interop_handle::ext_codeplay_has_graph() const noexcept {
2727
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
28-
// TODO create and link GitHub
28+
// CMPLRLLVM-66082 - MGraph should become a member of this class on the
29+
// next ABI breaking window.
2930
return MGraph != nullptr;
3031
#else
3132
return MQueue->getInteropGraph() != nullptr;
@@ -64,7 +65,8 @@ interop_handle::getNativeQueue(int32_t &NativeHandleDesc) const {
6465

6566
ur_native_handle_t interop_handle::getNativeGraph() const {
6667
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
67-
// TODO create and link github issue
68+
// CMPLRLLVM-66082 - MGraph should become a member of this class on the
69+
// next ABI breaking window.
6870
auto Graph = MGraph;
6971
#else
7072
auto Graph = MQueue->getInteropGraph();
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: %{build} -o %t.out %cuda_options
2+
// RUN: %{run} %t.out
3+
// RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %cuda_options %}
4+
// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %}
5+
// REQUIRES: cuda, cuda_dev_kit
6+
7+
#include <cuda.h>
8+
#include <sycl/backend.hpp>
9+
#include <sycl/ext/oneapi/experimental/graph.hpp>
10+
#include <sycl/interop_handle.hpp>
11+
#include <sycl/usm.hpp>
12+
13+
namespace exp_ext = sycl::ext::oneapi::experimental;
14+
using namespace sycl;
15+
16+
int main() {
17+
queue Queue;
18+
19+
const size_t Size = 128;
20+
int *PtrX = malloc_device<int>(Size, Queue);
21+
int *PtrY = malloc_device<int>(Size, Queue);
22+
23+
exp_ext::command_graph Graph{Queue};
24+
25+
auto NodeA = Graph.add([&](handler &CGH) {
26+
CGH.single_task([=]() {
27+
for (size_t i = 0; i < Size; i++) {
28+
PtrX[i] = i;
29+
PtrY[i] = 0;
30+
}
31+
});
32+
});
33+
34+
auto NodeB = Graph.add(
35+
[&](handler &CGH) {
36+
CGH.ext_codeplay_enqueue_native_command([=](interop_handle IH) {
37+
if (!IH.ext_codeplay_has_graph()) {
38+
assert(false && "Native Handle should have a graph");
39+
}
40+
CUgraph NativeGraph =
41+
IH.ext_codeplay_get_native_graph<backend::ext_oneapi_cuda>();
42+
43+
CUDA_MEMCPY3D Params;
44+
std::memset(&Params, 0, sizeof(CUDA_MEMCPY3D));
45+
Params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
46+
Params.srcDevice = (CUdeviceptr)PtrX;
47+
Params.srcHost = nullptr;
48+
Params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
49+
Params.dstDevice = (CUdeviceptr)PtrY;
50+
Params.dstHost = nullptr;
51+
Params.WidthInBytes = Size * sizeof(int);
52+
Params.Height = 1;
53+
Params.Depth = 1;
54+
55+
CUgraphNode Node;
56+
CUcontext Context = IH.get_native_context<backend::ext_oneapi_cuda>();
57+
auto Res = cuGraphAddMemcpyNode(&Node, NativeGraph, nullptr, 0,
58+
&Params, Context);
59+
assert(Res == CUDA_SUCCESS);
60+
});
61+
},
62+
exp_ext::property::node::depends_on(NodeA));
63+
64+
Graph.add(
65+
[&](handler &CGH) {
66+
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrY[it] *= 2; });
67+
},
68+
exp_ext::property::node::depends_on(NodeB));
69+
70+
auto ExecGraph = Graph.finalize();
71+
Queue.ext_oneapi_graph(ExecGraph).wait();
72+
73+
std::vector<int> HostData(Size);
74+
75+
Queue.copy(PtrY, HostData.data(), Size).wait();
76+
for (size_t i = 0; i < Size; i++) {
77+
const int Ref = i * 2;
78+
assert(Ref == HostData[i]);
79+
}
80+
81+
free(PtrX, Queue);
82+
free(PtrY, Queue);
83+
84+
return 0;
85+
}

0 commit comments

Comments
 (0)