Skip to content

Commit 028657a

Browse files
committed
[SYCL][Graphs] Squashed Graphs UR changes
- Squashed commit of command-buffer UR changes - Level Zero implementation - Stubs for all other adapters
1 parent 83f8779 commit 028657a

28 files changed

+1757
-4
lines changed

sycl/include/sycl/detail/pi.def

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,16 @@ _PI_API(piextKernelSetArgSampler)
141141

142142
_PI_API(piextPluginGetOpaqueData)
143143

144+
/// command-buffer Extension
145+
_PI_API(piextCommandBufferCreate)
146+
_PI_API(piextCommandBufferRetain)
147+
_PI_API(piextCommandBufferRelease)
148+
_PI_API(piextCommandBufferFinalize)
149+
_PI_API(piextCommandBufferNDRangeKernel)
150+
_PI_API(piextCommandBufferMemcpyUSM)
151+
_PI_API(piextCommandBufferMemBufferCopy)
152+
_PI_API(piextCommandBufferMemBufferCopyRect)
153+
_PI_API(piextEnqueueCommandBuffer)
144154
_PI_API(piPluginGetLastError)
145155

146156
_PI_API(piTearDown)

sycl/include/sycl/detail/pi.h

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@
9494
// info query.
9595
// 12.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and
9696
// piextQueueGetNativeHandle
97-
97+
// 12.33 Added command-buffer extension methods
9898
#define _PI_H_VERSION_MAJOR 13
99-
#define _PI_H_VERSION_MINOR 32
99+
#define _PI_H_VERSION_MINOR 33
100100

101101
#define _PI_STRING_HELPER(a) #a
102102
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -474,6 +474,7 @@ typedef enum {
474474
PI_COMMAND_TYPE_SVM_MEMFILL = 0x120B,
475475
PI_COMMAND_TYPE_SVM_MAP = 0x120C,
476476
PI_COMMAND_TYPE_SVM_UNMAP = 0x120D,
477+
PI_COMMAND_TYPE_EXT_COMMAND_BUFFER = 0x12A8,
477478
PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_READ = 0x418E,
478479
PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_WRITE = 0x418F
479480
} _pi_command_type;
@@ -2100,6 +2101,144 @@ __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device,
21002101
uint64_t *DeviceTime,
21012102
uint64_t *HostTime);
21022103

2104+
/// Command buffer extension
2105+
struct _pi_ext_command_buffer;
2106+
struct _pi_ext_sync_point;
2107+
using pi_ext_command_buffer = _pi_ext_command_buffer *;
2108+
using pi_ext_sync_point = pi_uint32;
2109+
2110+
typedef enum {
2111+
PI_EXT_STRUCTURE_TYPE_COMMAND_BUFFER_DESC = 0
2112+
} pi_ext_structure_type;
2113+
2114+
struct pi_ext_command_buffer_desc final {
2115+
pi_ext_structure_type stype;
2116+
const void *pNext;
2117+
pi_queue_properties *properties;
2118+
};
2119+
2120+
/// API to create a command-buffer.
2121+
/// \param context The context to associate the command-buffer with.
2122+
/// \param device The device to associate the command-buffer with.
2123+
/// \param desc Descriptor for the new command-buffer.
2124+
/// \param ret_command_buffer Pointer to fill with the address of the new
2125+
/// command-buffer.
2126+
__SYCL_EXPORT pi_result
2127+
piextCommandBufferCreate(pi_context context, pi_device device,
2128+
const pi_ext_command_buffer_desc *desc,
2129+
pi_ext_command_buffer *ret_command_buffer);
2130+
2131+
/// API to increment the reference count of the command-buffer
2132+
/// \param command_buffer The command_buffer to retain.
2133+
__SYCL_EXPORT pi_result
2134+
piextCommandBufferRetain(pi_ext_command_buffer command_buffer);
2135+
2136+
/// API to decrement the reference count of the command-buffer. After the
2137+
/// command_buffer reference count becomes zero and has finished execution, the
2138+
/// command-buffer is deleted. \param command_buffer The command_buffer to
2139+
/// release.
2140+
__SYCL_EXPORT pi_result
2141+
piextCommandBufferRelease(pi_ext_command_buffer command_buffer);
2142+
2143+
/// API to stop command-buffer recording such that no more commands can be
2144+
/// appended, and makes the command-buffer ready to enqueue on a command-queue.
2145+
/// \param command_buffer The command_buffer to finalize.
2146+
__SYCL_EXPORT pi_result
2147+
piextCommandBufferFinalize(pi_ext_command_buffer command_buffer);
2148+
2149+
/// API to append a kernel execution command to the command-buffer.
2150+
/// \param command_buffer The command-buffer to append onto.
2151+
/// \param kernel The kernel to append.
2152+
/// \param work_dim Dimension of the kernel execution.
2153+
/// \param global_work_offset Offset to use when executing kernel.
2154+
/// \param global_work_size Global work size to use when executing kernel.
2155+
/// \param local_work_size Local work size to use when executing kernel.
2156+
/// \param num_sync_points_in_wait_list The number of sync points in the
2157+
/// provided wait list.
2158+
/// \param sync_point_wait_list A list of sync points that this command must
2159+
/// wait on.
2160+
/// \param sync_point The sync_point associated with this kernel execution.
2161+
__SYCL_EXPORT pi_result piextCommandBufferNDRangeKernel(
2162+
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
2163+
const size_t *global_work_offset, const size_t *global_work_size,
2164+
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
2165+
const pi_ext_sync_point *sync_point_wait_list,
2166+
pi_ext_sync_point *sync_point);
2167+
2168+
/// API to append a USM memcpy command to the command-buffer.
2169+
/// \param command_buffer The command-buffer to append onto.
2170+
/// \param dst_ptr is the location the data will be copied
2171+
/// \param src_ptr is the data to be copied
2172+
/// \param size is number of bytes to copy
2173+
/// \param num_sync_points_in_wait_list The number of sync points in the
2174+
/// provided wait list.
2175+
/// \param sync_point_wait_list A list of sync points that this command must
2176+
/// wait on.
2177+
/// \param sync_point The sync_point associated with this memory operation.
2178+
__SYCL_EXPORT pi_result piextCommandBufferMemcpyUSM(
2179+
pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr,
2180+
size_t size, pi_uint32 num_sync_points_in_wait_list,
2181+
const pi_ext_sync_point *sync_point_wait_list,
2182+
pi_ext_sync_point *sync_point);
2183+
2184+
/// API to append a mem buffer copy command to the command-buffer.
2185+
/// \param command_buffer The command-buffer to append onto.
2186+
/// \param src_buffer is the data to be copied
2187+
/// \param dst_buffer is the location the data will be copied
2188+
/// \param src_offset offset into \p src_buffer
2189+
/// \param dst_offset offset into \p dst_buffer
2190+
/// \param size is number of bytes to copy
2191+
/// \param num_sync_points_in_wait_list The number of sync points in the
2192+
/// provided wait list.
2193+
/// \param sync_point_wait_list A list of sync points that this command must
2194+
/// wait on.
2195+
/// \param sync_point The sync_point associated with this memory operation.
2196+
__SYCL_EXPORT pi_result piextCommandBufferMemBufferCopy(
2197+
pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
2198+
size_t src_offset, size_t dst_offset, size_t size,
2199+
pi_uint32 num_sync_points_in_wait_list,
2200+
const pi_ext_sync_point *sync_point_wait_list,
2201+
pi_ext_sync_point *sync_point);
2202+
2203+
/// API to append a rectangular mem buffer copy command to the command-buffer.
2204+
/// \param command_buffer The command-buffer to append onto.
2205+
/// \param src_buffer is the data to be copied
2206+
/// \param dst_buffer is the location the data will be copied
2207+
/// \param src_origin offset for the start of the region to copy in src_buffer
2208+
/// \param dst_origin offset for the start of the region to copy in dst_buffer
2209+
/// \param region The size of the region to be copied
2210+
/// \param src_row_pitch Row pitch for the src data
2211+
/// \param src_slice_pitch Slice pitch for the src data
2212+
/// \param dst_row_pitch Row pitch for the dst data
2213+
/// \param dst_slice_pitch Slice pitch for the dst data
2214+
/// \param num_sync_points_in_wait_list The number of sync points in the
2215+
/// provided wait list.
2216+
/// \param sync_point_wait_list A list of sync points that this command must
2217+
/// wait on.
2218+
/// \param sync_point The sync_point associated with this memory operation.
2219+
__SYCL_EXPORT pi_result piextCommandBufferMemBufferCopyRect(
2220+
pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
2221+
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
2222+
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
2223+
size_t dst_row_pitch, size_t dst_slice_pitch,
2224+
pi_uint32 num_sync_points_in_wait_list,
2225+
const pi_ext_sync_point *sync_point_wait_list,
2226+
pi_ext_sync_point *sync_point);
2227+
2228+
/// API to submit the command-buffer to queue for execution, returns an error if
2229+
/// command-buffer not finalized or another instance of same command-buffer
2230+
/// currently executing.
2231+
/// \param command_buffer The command-buffer to be submitted.
2232+
/// \param queue The PI queue to submit on.
2233+
/// \param num_events_in_wait_list The number of events that this execution
2234+
/// depends on.
2235+
/// \param event_wait_list List of pi_events to wait on.
2236+
/// \param event The pi_event associated with this enqueue.
2237+
__SYCL_EXPORT pi_result
2238+
piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue,
2239+
pi_uint32 num_events_in_wait_list,
2240+
const pi_event *event_wait_list, pi_event *event);
2241+
21032242
struct _pi_plugin {
21042243
// PI version supported by host passed to the plugin. The Plugin
21052244
// checks and writes the appropriate Function Pointers in

sycl/include/sycl/detail/pi.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ using PiMemObjectType = ::pi_mem_type;
148148
using PiMemImageChannelOrder = ::pi_image_channel_order;
149149
using PiMemImageChannelType = ::pi_image_channel_type;
150150
using PiKernelCacheConfig = ::pi_kernel_cache_config;
151+
using PiExtSyncPoint = ::pi_ext_sync_point;
152+
using PiExtCommandBuffer = ::pi_ext_command_buffer;
153+
using PiExtCommandBufferDesc = ::pi_ext_command_buffer_desc;
151154

152155
__SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext,
153156
pi_context_extended_deleter func,

sycl/plugins/cuda/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ add_sycl_plugin(cuda
7979
"../unified_runtime/ur/adapters/cuda/tracing.cpp"
8080
"../unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp"
8181
"../unified_runtime/ur/adapters/cuda/usm.cpp"
82+
"../unified_runtime/ur/adapters/cuda/command_buffer.hpp"
83+
"../unified_runtime/ur/adapters/cuda/command_buffer.cpp"
8284
# ---
8385
"${sycl_inc_dir}/sycl/detail/pi.h"
8486
"${sycl_inc_dir}/sycl/detail/pi.hpp"

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,19 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
186186
_PI_CL(piGetDeviceAndHostTimer, pi2ur::piGetDeviceAndHostTimer)
187187
_PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption)
188188

189+
// command-buffer
190+
_PI_CL(piextCommandBufferCreate, pi2ur::piextCommandBufferCreate)
191+
_PI_CL(piextCommandBufferRetain, pi2ur::piextCommandBufferRetain)
192+
_PI_CL(piextCommandBufferRelease, pi2ur::piextCommandBufferRelease)
193+
_PI_CL(piextCommandBufferNDRangeKernel,
194+
pi2ur::piextCommandBufferNDRangeKernel)
195+
_PI_CL(piextCommandBufferMemcpyUSM, pi2ur::piextCommandBufferMemcpyUSM)
196+
_PI_CL(piextCommandBufferMemBufferCopy,
197+
pi2ur::piextCommandBufferMemBufferCopy)
198+
_PI_CL(piextCommandBufferMemBufferCopyRect,
199+
pi2ur::piextCommandBufferMemBufferCopyRect)
200+
_PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer)
201+
189202
#undef _PI_CL
190203

191204
return PI_SUCCESS;

sycl/plugins/cuda/pi_cuda.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#define _PI_CUDA_PLUGIN_VERSION_STRING \
2626
_PI_PLUGIN_VERSION_STRING(_PI_CUDA_PLUGIN_VERSION)
2727

28+
#include <ur/adapters/cuda/command_buffer.hpp>
2829
#include <ur/adapters/cuda/context.hpp>
2930
#include <ur/adapters/cuda/device.hpp>
3031
#include <ur/adapters/cuda/event.hpp>
@@ -76,4 +77,8 @@ struct _pi_sampler : ur_sampler_handle_t_ {
7677
using ur_sampler_handle_t_::ur_sampler_handle_t_;
7778
};
7879

80+
struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ {
81+
using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_;
82+
};
83+
7984
#endif // PI_CUDA_HPP

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2098,6 +2098,70 @@ pi_result piextEnqueueDeviceGlobalVariableRead(pi_queue, pi_program,
20982098
DIE_NO_IMPLEMENTATION;
20992099
}
21002100

2101+
pi_result piextCommandBufferCreate(pi_context context, pi_device device,
2102+
const pi_ext_command_buffer_desc *desc,
2103+
pi_ext_command_buffer *ret_command_buffer) {
2104+
DIE_NO_IMPLEMENTATION;
2105+
}
2106+
2107+
pi_result piextCommandBufferRetain(pi_ext_command_buffer command_buffer) {
2108+
DIE_NO_IMPLEMENTATION;
2109+
}
2110+
2111+
pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer) {
2112+
DIE_NO_IMPLEMENTATION;
2113+
}
2114+
2115+
pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) {
2116+
DIE_NO_IMPLEMENTATION;
2117+
}
2118+
2119+
pi_result piextCommandBufferNDRangeKernel(
2120+
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
2121+
const size_t *global_work_offset, const size_t *global_work_size,
2122+
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
2123+
const pi_ext_sync_point *sync_point_wait_list,
2124+
pi_ext_sync_point *sync_point) {
2125+
DIE_NO_IMPLEMENTATION;
2126+
}
2127+
2128+
pi_result
2129+
piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr,
2130+
const void *src_ptr, size_t size,
2131+
pi_uint32 num_sync_points_in_wait_list,
2132+
const pi_ext_sync_point *sync_point_wait_list,
2133+
pi_ext_sync_point *sync_point) {
2134+
DIE_NO_IMPLEMENTATION;
2135+
}
2136+
2137+
pi_result piextCommandBufferMemBufferCopy(
2138+
pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
2139+
size_t src_offset, size_t dst_offset, size_t size,
2140+
pi_uint32 num_sync_points_in_wait_list,
2141+
const pi_ext_sync_point *sync_point_wait_list,
2142+
pi_ext_sync_point *sync_point) {
2143+
DIE_NO_IMPLEMENTATION;
2144+
}
2145+
2146+
pi_result piextCommandBufferMemBufferCopyRect(
2147+
pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
2148+
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
2149+
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
2150+
size_t dst_row_pitch, size_t dst_slice_pitch,
2151+
pi_uint32 num_sync_points_in_wait_list,
2152+
const pi_ext_sync_point *sync_point_wait_list,
2153+
pi_ext_sync_point *sync_point) {
2154+
DIE_NO_IMPLEMENTATION;
2155+
}
2156+
2157+
pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer,
2158+
pi_queue queue,
2159+
pi_uint32 num_events_in_wait_list,
2160+
const pi_event *event_wait_list,
2161+
pi_event *event) {
2162+
DIE_NO_IMPLEMENTATION;
2163+
}
2164+
21012165
pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) {
21022166
*OpaqueDataReturn = reinterpret_cast<void *>(PiESimdDeviceAccess);
21032167
return PI_SUCCESS;

sycl/plugins/esimd_emulator/pi_esimd_emulator.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,4 +218,6 @@ struct _pi_kernel : _pi_object {
218218
_pi_kernel() {}
219219
};
220220

221+
struct _pi_ext_command_buffer {};
222+
221223
#include <sycl/ext/intel/esimd/emu/detail/esimd_emulator_device_interface.hpp>

0 commit comments

Comments
 (0)