Skip to content

Commit caed4b7

Browse files
BensuoEwanCmfrancepillois
authored andcommitted
[SYCL][Graph] L0 Backend support for SYCL Graphs (2/4) (#9992)
# Level Zero Backend Support for SYCL Graphs This is the second patch of a series that adds support for an [experimental command graph extension](intel/llvm#5626) A snapshot of the complete work can be seen in draft PR #9375 which has support all the specification defined ways of adding nodes and edges to the graph, including both Explicit and Record & Replay graph construction. The two types of nodes currently implemented are kernel execution and memcpy commands. See https://github.com/reble/llvm#implementation-status for the status of our total work. ## Scope This second patch focuses on the required PI/UR support for the experimental command-buffer feature in the Level Zero adapter: * PI stubs for all adapters to enable compilation, no functionality. * Command-buffer implementation for the Level Zero UR adapter. * Stubs for the CUDA UR adapter to enable compilation, no functionality. ## Following Split PRs Future follow-up PRs with the remainder of our work on the extension will include: * Hooking up backend to graphs runtime, bugfixes and other feature additions, will add symbols but not break the ABI. (3/4) * Add end-to-end tests for SYCL Graph extension. (4/4) * NFC changes - Design doc and codeowner update. ## Authors Co-authored-by: Pablo Reble <[email protected]> Co-authored-by: Julian Miller <[email protected]> Co-authored-by: Ben Tracy <[email protected]> Co-authored-by: Ewan Crawford <[email protected]> Co-authored-by: Maxime France-Pillois <[email protected]> --------- Co-authored-by: Ewan Crawford <[email protected]> Co-authored-by: Maxime France-Pillois <[email protected]>
1 parent 9d1ea51 commit caed4b7

File tree

3 files changed

+293
-0
lines changed

3 files changed

+293
-0
lines changed

command_buffer.cpp

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
//===--------- command_buffer.cpp - CUDA Adapter ---------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===-----------------------------------------------------------------===//
8+
9+
#include "command_buffer.hpp"
10+
#include "common.hpp"
11+
12+
/// Stub implementations of UR experimental feature command-buffers
13+
14+
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
15+
ur_context_handle_t hContext, ur_device_handle_t hDevice,
16+
const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
17+
ur_exp_command_buffer_handle_t *phCommandBuffer) {
18+
(void)hContext;
19+
(void)hDevice;
20+
(void)pCommandBufferDesc;
21+
(void)phCommandBuffer;
22+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
23+
"implemented for CUDA adapter.");
24+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
25+
}
26+
27+
UR_APIEXPORT ur_result_t UR_APICALL
28+
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
29+
(void)hCommandBuffer;
30+
31+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
32+
"implemented for CUDA adapter.");
33+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
34+
}
35+
36+
UR_APIEXPORT ur_result_t UR_APICALL
37+
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
38+
(void)hCommandBuffer;
39+
40+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
41+
"implemented for CUDA adapter.");
42+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
43+
}
44+
45+
UR_APIEXPORT ur_result_t UR_APICALL
46+
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
47+
(void)hCommandBuffer;
48+
49+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
50+
"implemented for CUDA adapter.");
51+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
52+
}
53+
54+
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
55+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
56+
uint32_t workDim, const size_t *pGlobalWorkOffset,
57+
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
58+
uint32_t numSyncPointsInWaitList,
59+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
60+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
61+
(void)hCommandBuffer;
62+
(void)hKernel;
63+
(void)workDim;
64+
(void)pGlobalWorkOffset;
65+
(void)pGlobalWorkSize;
66+
(void)pLocalWorkSize;
67+
(void)numSyncPointsInWaitList;
68+
(void)pSyncPointWaitList;
69+
(void)pSyncPoint;
70+
71+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
72+
"implemented for CUDA adapter.");
73+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
74+
}
75+
76+
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp(
77+
ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
78+
size_t size, uint32_t numSyncPointsInWaitList,
79+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
80+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
81+
(void)hCommandBuffer;
82+
(void)pDst;
83+
(void)pSrc;
84+
(void)size;
85+
(void)numSyncPointsInWaitList;
86+
(void)pSyncPointWaitList;
87+
(void)pSyncPoint;
88+
89+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
90+
"implemented for CUDA adapter.");
91+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
92+
}
93+
94+
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
95+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
96+
ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size,
97+
uint32_t numSyncPointsInWaitList,
98+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
99+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
100+
(void)hCommandBuffer;
101+
(void)hSrcMem;
102+
(void)hDstMem;
103+
(void)srcOffset;
104+
(void)dstOffset;
105+
(void)size;
106+
(void)numSyncPointsInWaitList;
107+
(void)pSyncPointWaitList;
108+
(void)pSyncPoint;
109+
110+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
111+
"implemented for CUDA adapter.");
112+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
113+
}
114+
115+
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(
116+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
117+
ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin,
118+
ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
119+
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
120+
uint32_t numSyncPointsInWaitList,
121+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
122+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
123+
(void)hCommandBuffer;
124+
(void)hSrcMem;
125+
(void)hDstMem;
126+
(void)srcOrigin;
127+
(void)dstOrigin;
128+
(void)region;
129+
(void)srcRowPitch;
130+
(void)numSyncPointsInWaitList;
131+
(void)pSyncPointWaitList;
132+
(void)pSyncPoint;
133+
134+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
135+
"implemented for CUDA adapter.");
136+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
137+
}
138+
139+
UR_APIEXPORT
140+
ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp(
141+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
142+
size_t offset, size_t size, const void *pSrc,
143+
uint32_t numSyncPointsInWaitList,
144+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
145+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
146+
(void)hCommandBuffer;
147+
(void)hBuffer;
148+
(void)offset;
149+
(void)size;
150+
(void)pSrc;
151+
(void)numSyncPointsInWaitList;
152+
(void)pSyncPointWaitList;
153+
(void)pSyncPoint;
154+
155+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
156+
"implemented for CUDA adapter.");
157+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
158+
}
159+
160+
UR_APIEXPORT
161+
ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp(
162+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
163+
size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList,
164+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
165+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
166+
(void)hCommandBuffer;
167+
(void)hBuffer;
168+
(void)offset;
169+
(void)size;
170+
(void)pDst;
171+
(void)numSyncPointsInWaitList;
172+
(void)pSyncPointWaitList;
173+
(void)pSyncPoint;
174+
175+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
176+
"implemented for CUDA adapter.");
177+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
178+
}
179+
180+
UR_APIEXPORT
181+
ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp(
182+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
183+
ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
184+
ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
185+
size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
186+
uint32_t numSyncPointsInWaitList,
187+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
188+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
189+
(void)hCommandBuffer;
190+
(void)hBuffer;
191+
(void)bufferOffset;
192+
(void)hostOffset;
193+
(void)region;
194+
(void)bufferRowPitch;
195+
(void)bufferSlicePitch;
196+
(void)hostRowPitch;
197+
(void)hostSlicePitch;
198+
(void)pSrc;
199+
(void)numSyncPointsInWaitList;
200+
(void)pSyncPointWaitList;
201+
(void)pSyncPoint;
202+
203+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
204+
"implemented for CUDA adapter.");
205+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
206+
}
207+
208+
UR_APIEXPORT
209+
ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp(
210+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
211+
ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
212+
ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
213+
size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
214+
uint32_t numSyncPointsInWaitList,
215+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
216+
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
217+
(void)hCommandBuffer;
218+
(void)hBuffer;
219+
(void)bufferOffset;
220+
(void)hostOffset;
221+
(void)region;
222+
(void)bufferRowPitch;
223+
(void)bufferSlicePitch;
224+
(void)hostRowPitch;
225+
(void)hostSlicePitch;
226+
(void)pDst;
227+
228+
(void)numSyncPointsInWaitList;
229+
(void)pSyncPointWaitList;
230+
(void)pSyncPoint;
231+
232+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
233+
"implemented for CUDA adapter.");
234+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
235+
}
236+
237+
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
238+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
239+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
240+
ur_event_handle_t *phEvent) {
241+
(void)hCommandBuffer;
242+
(void)hQueue;
243+
(void)numEventsInWaitList;
244+
(void)phEventWaitList;
245+
(void)phEvent;
246+
247+
sycl::detail::ur::die("Experimental Command-buffer feature is not "
248+
"implemented for CUDA adapter.");
249+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
250+
}

command_buffer.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//===--------- command_buffer.hpp - CUDA Adapter ---------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===-----------------------------------------------------------------===//
8+
9+
#include <ur/ur.hpp>
10+
11+
/// Stub implementation of command-buffers for CUDA
12+
13+
struct ur_exp_command_buffer_handle_t_ {};

ur_interface_loader.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,36 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable(
258258
return UR_RESULT_SUCCESS;
259259
}
260260

261+
UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
262+
ur_api_version_t version, ///< [in] API version requested
263+
ur_command_buffer_exp_dditable_t
264+
*pDdiTable ///< [in,out] pointer to table of DDI function pointers
265+
) {
266+
auto retVal = validateProcInputs(version, pDdiTable);
267+
if (UR_RESULT_SUCCESS != retVal) {
268+
return retVal;
269+
}
270+
pDdiTable->pfnCreateExp = urCommandBufferCreateExp;
271+
pDdiTable->pfnRetainExp = urCommandBufferRetainExp;
272+
pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp;
273+
pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp;
274+
pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp;
275+
pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp;
276+
pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp;
277+
pDdiTable->pfnAppendMembufferCopyRectExp =
278+
urCommandBufferAppendMembufferCopyRectExp;
279+
pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp;
280+
pDdiTable->pfnAppendMembufferReadRectExp =
281+
urCommandBufferAppendMembufferReadRectExp;
282+
pDdiTable->pfnAppendMembufferWriteExp =
283+
urCommandBufferAppendMembufferWriteExp;
284+
pDdiTable->pfnAppendMembufferWriteRectExp =
285+
urCommandBufferAppendMembufferWriteRectExp;
286+
pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp;
287+
288+
return retVal;
289+
}
290+
261291
#if defined(__cplusplus)
262292
} // extern "C"
263293
#endif

0 commit comments

Comments
 (0)