Skip to content

Commit e367d35

Browse files
[SYCL][Graph] Implement graph creation and finalization for CUDA backend (#279)
* [SYCL][Graph] Implement graph creation and finalization for CUDA backend Implements CommandBuffer creation, retain, release and finalize functions for the cuda backend. These functions rely on the cuda graph feature which is part of cuda runtime. Consequently, this PR adds the required linking dependencies. Fixes a commandbuffer bug in the cuda pluging initialization. Reports cuda backend as supported for the graph extension. Adds a tests that creates and finalizes an empty graph.
1 parent 3395b33 commit e367d35

File tree

6 files changed

+64
-2
lines changed

6 files changed

+64
-2
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,8 +1204,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
12041204
#define _PI_API(api) \
12051205
(PluginInit->PiFunctionTable).api = (decltype(&::api))(&api);
12061206
#include <sycl/detail/pi.def>
1207-
#undef _PI_API
1208-
1207+
#undef _PI_API
12091208
return PI_SUCCESS;
12101209
}
12111210

sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,30 @@ static void setCopyParams(const void *SrcPtr, const CUmemorytype_enum SrcType,
106106
Params.Depth = 1;
107107
}
108108

109+
ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
110+
ur_context_handle_t hContext, ur_device_handle_t hDevice)
111+
: Context(hContext),
112+
Device(hDevice), cudaGraph{nullptr}, cudaGraphExec{nullptr}, RefCount{1} {
113+
urContextRetain(hContext);
114+
urDeviceRetain(hDevice);
115+
}
116+
117+
// The ur_exp_command_buffer_handle_t_ destructor release all the memory objects
118+
// allocated for command_buffer managment
119+
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
120+
// Release the memory allocated to the Context stored in the command_buffer
121+
urContextRelease(Context);
122+
123+
// Release the device
124+
urDeviceRelease(Device);
125+
126+
// Release the memory allocated to the CudaGraph
127+
cuGraphDestroy(cudaGraph);
128+
129+
// Release the memory allocated to the CudaGraphExec
130+
cuGraphExecDestroy(cudaGraphExec);
131+
}
132+
109133
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
110134
ur_context_handle_t hContext, ur_device_handle_t hDevice,
111135
const ur_exp_command_buffer_desc_t *hCommandBufferDesc,

sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <ur/ur.hpp>
10+
#include <ur_api.h>
1011

1112
#include "context.hpp"
1213
#include <cuda.h>

sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
610610

611611
std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
612612
SupportedExtensions += "pi_ext_intel_devicelib_assert ";
613+
// Return supported for the UR command-buffer experimental feature
614+
SupportedExtensions += ("ur_exp_command_buffer ");
613615
SupportedExtensions += " ";
614616

615617
int Major = 0;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// REQUIRES: cuda, gpu
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// Tests the ability to finalize a empty command graph
6+
// without submitting the graph.
7+
8+
#include "graph_common.hpp"
9+
10+
int GetCudaBackend(const sycl::device &Dev) {
11+
// Return 1 if the device backend is "cuda" or 0 else.
12+
// 0 does not prevent another device to be picked as a second choice
13+
return Dev.get_backend() == backend::ext_oneapi_cuda;
14+
}
15+
16+
int main() {
17+
sycl::device CudaDev{GetCudaBackend};
18+
queue Queue{CudaDev};
19+
20+
// Skip the test if no cuda backend found
21+
if (CudaDev.get_backend() != backend::ext_oneapi_cuda)
22+
return 0;
23+
24+
std::error_code ErrorCode = make_error_code(sycl::errc::success);
25+
// This should not throw an exception
26+
try {
27+
exp_ext::command_graph Graph{Queue.get_context(), CudaDev};
28+
auto GraphExec = Graph.finalize();
29+
} catch (const sycl::exception &e) {
30+
ErrorCode = e.code();
31+
}
32+
assert(ErrorCode == sycl::errc::success);
33+
34+
return 0;
35+
}

sycl/test-e2e/Graph/device_query.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// REQUIRES: level_zero, gpu
12
// RUN: %{build} -o %t.out
23
// RUN: %{run} %t.out
34

0 commit comments

Comments
 (0)