Skip to content

[SYCL][Graph] L0 Backend support for SYCL Graphs (2/4) #9992

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,19 @@ _PI_API(piextEnqueueDeviceGlobalVariableRead)

_PI_API(piPluginGetBackendOption)

// command-buffer Extension
_PI_API(piextCommandBufferCreate)
_PI_API(piextCommandBufferRetain)
_PI_API(piextCommandBufferRelease)
_PI_API(piextCommandBufferFinalize)
_PI_API(piextCommandBufferNDRangeKernel)
_PI_API(piextCommandBufferMemcpyUSM)
_PI_API(piextCommandBufferMemBufferCopy)
_PI_API(piextCommandBufferMemBufferCopyRect)
_PI_API(piextCommandBufferMemBufferWrite)
_PI_API(piextCommandBufferMemBufferWriteRect)
_PI_API(piextCommandBufferMemBufferRead)
_PI_API(piextCommandBufferMemBufferReadRect)
_PI_API(piextEnqueueCommandBuffer)

#undef _PI_API
226 changes: 225 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@
// piextQueueGetNativeHandle
// 14.33 Added new parameter (memory object properties) to
// piextKernelSetArgMemObj
// 14.34 Added command-buffer extension methods

#define _PI_H_VERSION_MAJOR 14
#define _PI_H_VERSION_MINOR 33
#define _PI_H_VERSION_MINOR 34

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -477,6 +478,7 @@ typedef enum {
PI_COMMAND_TYPE_SVM_MEMFILL = 0x120B,
PI_COMMAND_TYPE_SVM_MAP = 0x120C,
PI_COMMAND_TYPE_SVM_UNMAP = 0x120D,
PI_COMMAND_TYPE_EXT_COMMAND_BUFFER = 0x12A8,
PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_READ = 0x418E,
PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_WRITE = 0x418F
} _pi_command_type;
Expand Down Expand Up @@ -2128,6 +2130,228 @@ __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device,
uint64_t *DeviceTime,
uint64_t *HostTime);

/// Command buffer extension
struct _pi_ext_command_buffer;
struct _pi_ext_sync_point;
using pi_ext_command_buffer = _pi_ext_command_buffer *;
using pi_ext_sync_point = pi_uint32;

typedef enum {
PI_EXT_STRUCTURE_TYPE_COMMAND_BUFFER_DESC = 0
} pi_ext_structure_type;

struct pi_ext_command_buffer_desc final {
pi_ext_structure_type stype;
const void *pNext;
pi_queue_properties *properties;
};

/// API to create a command-buffer.
/// \param context The context to associate the command-buffer with.
/// \param device The device to associate the command-buffer with.
/// \param desc Descriptor for the new command-buffer.
/// \param ret_command_buffer Pointer to fill with the address of the new
/// command-buffer.
__SYCL_EXPORT pi_result
piextCommandBufferCreate(pi_context context, pi_device device,
const pi_ext_command_buffer_desc *desc,
pi_ext_command_buffer *ret_command_buffer);

/// API to increment the reference count of the command-buffer
/// \param command_buffer The command_buffer to retain.
__SYCL_EXPORT pi_result
piextCommandBufferRetain(pi_ext_command_buffer command_buffer);

/// API to decrement the reference count of the command-buffer. After the
/// command_buffer reference count becomes zero and has finished execution, the
/// command-buffer is deleted.
/// \param command_buffer The command_buffer to release.
__SYCL_EXPORT pi_result
piextCommandBufferRelease(pi_ext_command_buffer command_buffer);

/// API to stop command-buffer recording such that no more commands can be
/// appended, and makes the command-buffer ready to enqueue on a command-queue.
/// \param command_buffer The command_buffer to finalize.
__SYCL_EXPORT pi_result
piextCommandBufferFinalize(pi_ext_command_buffer command_buffer);

/// API to append a kernel execution command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param kernel The kernel to append.
/// \param work_dim Dimension of the kernel execution.
/// \param global_work_offset Offset to use when executing kernel.
/// \param global_work_size Global work size to use when executing kernel.
/// \param local_work_size Local work size to use when executing kernel.
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this kernel execution.
__SYCL_EXPORT pi_result piextCommandBufferNDRangeKernel(
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a USM memcpy command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param dst_ptr is the location the data will be copied
/// \param src_ptr is the data to be copied
/// \param size is number of bytes to copy
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemcpyUSM(
pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr,
size_t size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a mem buffer copy command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param src_buffer is the data to be copied
/// \param dst_buffer is the location the data will be copied
/// \param src_offset offset into \p src_buffer
/// \param dst_offset offset into \p dst_buffer
/// \param size is number of bytes to copy
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferCopy(
pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
size_t src_offset, size_t dst_offset, size_t size,
pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a rectangular mem buffer copy command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param src_buffer is the data to be copied
/// \param dst_buffer is the location the data will be copied
/// \param src_origin offset for the start of the region to copy in src_buffer
/// \param dst_origin offset for the start of the region to copy in dst_buffer
/// \param region The size of the region to be copied
/// \param src_row_pitch Row pitch for the src data
/// \param src_slice_pitch Slice pitch for the src data
/// \param dst_row_pitch Row pitch for the dst data
/// \param dst_slice_pitch Slice pitch for the dst data
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferCopyRect(
pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
size_t dst_row_pitch, size_t dst_slice_pitch,
pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a mem buffer read command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param buffer is the data to be read
/// \param offset offset into \p buffer
/// \param size is number of bytes to read
/// \param dst is the pointer to the destination
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferRead(
pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
size_t size, void *dst, pi_uint32 num_events_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a rectangular mem buffer read command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param buffer is the data to be read
/// \param buffer_offset offset for the start of the region to read in buffer
/// \param host_offset offset for the start of the region to be written from ptr
/// \param region The size of the region to read
/// \param buffer_row_pitch Row pitch for the source buffer data
/// \param buffer_slice_pitch Slice pitch for the source buffer data
/// \param host_row_pitch Row pitch for the destination data ptr
/// \param host_slice_pitch Slice pitch for the destination data ptr
/// \param ptr is the location the data will be written
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferReadRect(
pi_ext_command_buffer command_buffer, pi_mem buffer,
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
void *ptr, pi_uint32 num_events_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a mem buffer write command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param buffer is the location to write the data
/// \param offset offset into \p buffer
/// \param size is number of bytes to write
/// \param ptr is the pointer to the source
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferWrite(
pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
size_t size, const void *ptr, pi_uint32 num_events_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to append a rectangular mem buffer write command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param buffer is the location to write the data
/// \param buffer_offset offset for the start of the region to write in buffer
/// \param host_offset offset for the start of the region to be read from ptr
/// \param region The size of the region to write
/// \param buffer_row_pitch Row pitch for the buffer data
/// \param buffer_slice_pitch Slice pitch for the buffer data
/// \param host_row_pitch Row pitch for the source data ptr
/// \param host_slice_pitch Slice pitch for the source data ptr
/// \param ptr is the pointer to the source
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this command must
/// wait on.
/// \param sync_point The sync_point associated with this memory operation.
__SYCL_EXPORT pi_result piextCommandBufferMemBufferWriteRect(
pi_ext_command_buffer command_buffer, pi_mem buffer,
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
const void *ptr, pi_uint32 num_events_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to submit the command-buffer to queue for execution, returns an error if
/// the command-buffer is not finalized or another instance of the same
/// command-buffer is currently executing.
/// \param command_buffer The command-buffer to be submitted.
/// \param queue The PI queue to submit on.
/// \param num_events_in_wait_list The number of events that this execution
/// depends on.
/// \param event_wait_list List of pi_events to wait on.
/// \param event The pi_event associated with this enqueue.
__SYCL_EXPORT pi_result
piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

struct _pi_plugin {
// PI version supported by host passed to the plugin. The Plugin
// checks and writes the appropriate Function Pointers in
Expand Down
3 changes: 3 additions & 0 deletions sycl/include/sycl/detail/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ using PiMemObjectType = ::pi_mem_type;
using PiMemImageChannelOrder = ::pi_image_channel_order;
using PiMemImageChannelType = ::pi_image_channel_type;
using PiKernelCacheConfig = ::pi_kernel_cache_config;
using PiExtSyncPoint = ::pi_ext_sync_point;
using PiExtCommandBuffer = ::pi_ext_command_buffer;
using PiExtCommandBufferDesc = ::pi_ext_command_buffer_desc;

__SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext,
pi_context_extended_deleter func,
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ add_sycl_plugin(cuda
"../unified_runtime/ur/adapters/cuda/tracing.cpp"
"../unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp"
"../unified_runtime/ur/adapters/cuda/usm.cpp"
"../unified_runtime/ur/adapters/cuda/command_buffer.hpp"
"../unified_runtime/ur/adapters/cuda/command_buffer.cpp"
# ---
"${sycl_inc_dir}/sycl/detail/pi.h"
"${sycl_inc_dir}/sycl/detail/pi.hpp"
Expand Down
13 changes: 13 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,19 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piGetDeviceAndHostTimer, pi2ur::piGetDeviceAndHostTimer)
_PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption)

// command-buffer
_PI_CL(piextCommandBufferCreate, pi2ur::piextCommandBufferCreate)
_PI_CL(piextCommandBufferRetain, pi2ur::piextCommandBufferRetain)
_PI_CL(piextCommandBufferRelease, pi2ur::piextCommandBufferRelease)
_PI_CL(piextCommandBufferNDRangeKernel,
pi2ur::piextCommandBufferNDRangeKernel)
_PI_CL(piextCommandBufferMemcpyUSM, pi2ur::piextCommandBufferMemcpyUSM)
_PI_CL(piextCommandBufferMemBufferCopy,
pi2ur::piextCommandBufferMemBufferCopy)
_PI_CL(piextCommandBufferMemBufferCopyRect,
pi2ur::piextCommandBufferMemBufferCopyRect)
_PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer)

#undef _PI_CL

return PI_SUCCESS;
Expand Down
5 changes: 5 additions & 0 deletions sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define _PI_CUDA_PLUGIN_VERSION_STRING \
_PI_PLUGIN_VERSION_STRING(_PI_CUDA_PLUGIN_VERSION)

#include <ur/adapters/cuda/command_buffer.hpp>
#include <ur/adapters/cuda/context.hpp>
#include <ur/adapters/cuda/device.hpp>
#include <ur/adapters/cuda/event.hpp>
Expand Down Expand Up @@ -76,4 +77,8 @@ struct _pi_sampler : ur_sampler_handle_t_ {
using ur_sampler_handle_t_::ur_sampler_handle_t_;
};

struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ {
using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_;
};

#endif // PI_CUDA_HPP
Loading