Skip to content

Commit 807bc9b

Browse files
jbrodmanbader
authored andcommitted
[SYCL][USM][PI] Initial Commit to move all USM implementation detail inside the PI in… (#937)
* Removes need for USM Dispatcher. CLUSM is also now unused as CL USM is supported on CPU * Cache Extension Function Pointers per context using thread local storage * Enable capability queries for various USM APIs * Gate all USM tests by the appropriate capability check Signed-off-by: James Brodman <[email protected]>
1 parent 756deb8 commit 807bc9b

35 files changed

+1226
-526
lines changed

sycl/include/CL/sycl/detail/context_impl.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,6 @@ class context_impl {
111111
return MDevices;
112112
}
113113

114-
/// Gets USM dispatcher.
115-
///
116-
/// @return a pointer to USM dispatcher.
117-
std::shared_ptr<usm::USMDispatcher> getUSMDispatch() const;
118-
119114
/// In contrast to user programs, which are compiled from user code, library
120115
/// programs come from the SYCL runtime. They are identified by the
121116
/// corresponding extension:
@@ -141,7 +136,6 @@ class context_impl {
141136
PlatformImplPtr MPlatform;
142137
bool MPluginInterop;
143138
bool MHostContext;
144-
std::shared_ptr<usm::USMDispatcher> MUSMDispatch;
145139
std::map<DeviceLibExt, RT::PiProgram> MCachedLibPrograms;
146140
mutable KernelProgramCache MKernelProgramCache;
147141
};

sycl/include/CL/sycl/detail/device_info.hpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,75 @@ get_device_info_host() = delete;
326326

327327
cl_uint get_native_vector_width(size_t idx);
328328

329+
// USM
330+
331+
// Specialization for device usm query.
332+
template <>
333+
struct get_device_info<bool, info::device::usm_device_allocations> {
334+
static bool get(RT::PiDevice dev) {
335+
pi_usm_capabilities caps;
336+
pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)(
337+
dev, pi::cast<RT::PiDeviceInfo>(info::device::usm_device_allocations),
338+
sizeof(pi_usm_capabilities), &caps, nullptr);
339+
340+
return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
341+
}
342+
};
343+
344+
// Specialization for host usm query.
345+
template <>
346+
struct get_device_info<bool, info::device::usm_host_allocations> {
347+
static bool get(RT::PiDevice dev) {
348+
pi_usm_capabilities caps;
349+
pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)(
350+
dev, pi::cast<RT::PiDeviceInfo>(info::device::usm_host_allocations),
351+
sizeof(pi_usm_capabilities), &caps, nullptr);
352+
353+
return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
354+
}
355+
};
356+
357+
// Specialization for shared usm query.
358+
template <>
359+
struct get_device_info<bool, info::device::usm_shared_allocations> {
360+
static bool get(RT::PiDevice dev) {
361+
pi_usm_capabilities caps;
362+
pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)(
363+
dev, pi::cast<RT::PiDeviceInfo>(info::device::usm_shared_allocations),
364+
sizeof(pi_usm_capabilities), &caps, nullptr);
365+
return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
366+
}
367+
};
368+
369+
// Specialization for restricted usm query
370+
template <>
371+
struct get_device_info<bool, info::device::usm_restricted_shared_allocations> {
372+
static bool get(RT::PiDevice dev) {
373+
pi_usm_capabilities caps;
374+
pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)(
375+
dev,
376+
pi::cast<RT::PiDeviceInfo>(
377+
info::device::usm_restricted_shared_allocations),
378+
sizeof(pi_usm_capabilities), &caps, nullptr);
379+
// Check that we don't support any cross device sharing
380+
return (Err != PI_SUCCESS)
381+
? false
382+
: !(caps & (PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS));
383+
}
384+
};
385+
386+
// Specialization for system usm query
387+
template <>
388+
struct get_device_info<bool, info::device::usm_system_allocator> {
389+
static bool get(RT::PiDevice dev) {
390+
pi_usm_capabilities caps;
391+
pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)(
392+
dev, pi::cast<RT::PiDeviceInfo>(info::device::usm_system_allocator),
393+
sizeof(pi_usm_capabilities), &caps, nullptr);
394+
return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS);
395+
}
396+
};
397+
329398
} // namespace detail
330399
} // namespace sycl
331400
} // namespace cl

sycl/include/CL/sycl/detail/pi.def

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ _PI_API(piKernelGetGroupInfo)
6363
_PI_API(piKernelGetSubGroupInfo)
6464
_PI_API(piKernelRetain)
6565
_PI_API(piKernelRelease)
66+
_PI_API(piextKernelSetArgPointer)
67+
_PI_API(piKernelSetExecInfo)
6668
// Event
6769
_PI_API(piEventCreate)
6870
_PI_API(piEventGetInfo)
@@ -94,5 +96,15 @@ _PI_API(piEnqueueMemImageCopy)
9496
_PI_API(piEnqueueMemImageFill)
9597
_PI_API(piEnqueueMemBufferMap)
9698
_PI_API(piEnqueueMemUnmap)
99+
// USM
100+
_PI_API(piextUSMHostAlloc)
101+
_PI_API(piextUSMDeviceAlloc)
102+
_PI_API(piextUSMSharedAlloc)
103+
_PI_API(piextUSMFree)
104+
_PI_API(piextUSMEnqueueMemset)
105+
_PI_API(piextUSMEnqueueMemcpy)
106+
_PI_API(piextUSMEnqueuePrefetch)
107+
_PI_API(piextUSMEnqueueMemAdvise)
108+
_PI_API(piextUSMGetMemAllocInfo)
97109

98110
#undef _PI_API

sycl/include/CL/sycl/detail/pi.h

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
// done here, for efficiency and simplicity.
3333
//
3434
#include <CL/opencl.h>
35+
#include <CL/cl_usm_ext.h>
3536
#include <cstdint>
3637

3738
#ifdef __cplusplus
@@ -639,6 +640,16 @@ pi_result piProgramRelease(pi_program program);
639640
//
640641
// Kernel
641642
//
643+
644+
typedef enum {
645+
/// indicates that the kernel might access data through USM ptrs
646+
PI_USM_INDIRECT_ACCESS,
647+
/// provides an explicit list of pointers that the kernel will access
648+
PI_USM_PTRS = CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL
649+
} _pi_kernel_exec_info;
650+
651+
typedef _pi_kernel_exec_info pi_kernel_exec_info;
652+
642653
pi_result piKernelCreate(
643654
pi_program program,
644655
const char * kernel_name,
@@ -679,6 +690,33 @@ pi_result piKernelRetain(pi_kernel kernel);
679690

680691
pi_result piKernelRelease(pi_kernel kernel);
681692

693+
/// Sets up pointer arguments for CL kernels. An extra indirection
694+
/// is required due to CL argument conventions.
695+
///
696+
/// @param kernel is the kernel to be launched
697+
/// @param arg_index is the index of the kernel argument
698+
/// @param arg_size is the size in bytes of the argument (ignored in CL)
699+
/// @param arg_value is the pointer argument
700+
pi_result piextKernelSetArgPointer(
701+
pi_kernel kernel,
702+
pi_uint32 arg_index,
703+
size_t arg_size,
704+
const void * arg_value);
705+
706+
/// API to set attributes controlling kernel execution
707+
///
708+
/// @param kernel is the pi kernel to execute
709+
/// @param param_name is a pi_kernel_exec_info value that specifies the info
710+
/// passed to the kernel
711+
/// @param param_value_size is the size of the value in bytes
712+
/// @param param_value is a pointer to the value to set for the kernel
713+
///
714+
/// If param_name is PI_USM_INDIRECT_ACCESS, the value will be a ptr to
715+
/// the pi_bool value PI_TRUE
716+
/// If param_name is PI_USM_PTRS, the value will be an array of ptrs
717+
pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name,
718+
size_t param_value_size, const void *param_value);
719+
682720
//
683721
// Events
684722
//
@@ -929,6 +967,204 @@ pi_result piEnqueueMemUnmap(
929967
const pi_event * event_wait_list,
930968
pi_event * event);
931969

970+
///
971+
// USM
972+
///
973+
typedef enum {
974+
PI_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL,
975+
PI_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL,
976+
PI_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
977+
PI_USM_CROSS_SHARED_SUPPORT = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
978+
PI_USM_SYSTEM_SHARED_SUPPORT = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL
979+
} _pi_usm_capability_query;
980+
981+
typedef enum : pi_bitfield {
982+
PI_USM_ACCESS = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL,
983+
PI_USM_ATOMIC_ACCESS = CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL,
984+
PI_USM_CONCURRENT_ACCESS = CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL,
985+
PI_USM_CONCURRENT_ATOMIC_ACCESS = CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL
986+
} _pi_usm_capabilities;
987+
988+
typedef enum {
989+
PI_MEM_ALLOC_TYPE = CL_MEM_ALLOC_TYPE_INTEL,
990+
PI_MEM_ALLOC_BASE_PTR = CL_MEM_ALLOC_BASE_PTR_INTEL,
991+
PI_MEM_ALLOC_SIZE = CL_MEM_ALLOC_SIZE_INTEL,
992+
PI_MEM_ALLOC_DEVICE = CL_MEM_ALLOC_DEVICE_INTEL,
993+
PI_MEM_ALLOC_INFO_TBD0 = CL_MEM_ALLOC_INFO_TBD0_INTEL,
994+
PI_MEM_ALLOC_INFO_TBD1 = CL_MEM_ALLOC_INFO_TBD1_INTEL,
995+
} _pi_mem_info;
996+
997+
typedef enum {
998+
PI_MEM_TYPE_UNKNOWN = CL_MEM_TYPE_UNKNOWN_INTEL,
999+
PI_MEM_TYPE_HOST = CL_MEM_TYPE_HOST_INTEL,
1000+
PI_MEM_TYPE_DEVICE = CL_MEM_TYPE_DEVICE_INTEL,
1001+
PI_MEM_TYPE_SHARED = CL_MEM_TYPE_SHARED_INTEL
1002+
} _pi_usm_type;
1003+
1004+
typedef enum : pi_bitfield {
1005+
PI_MEM_ALLOC_FLAGS = CL_MEM_ALLOC_FLAGS_INTEL
1006+
} _pi_usm_mem_properties;
1007+
1008+
typedef enum : pi_bitfield {
1009+
PI_USM_MIGRATION_TBD0 = (1 << 0)
1010+
} _pi_usm_migration_flags;
1011+
1012+
typedef _pi_usm_capability_query pi_usm_capability_query;
1013+
typedef _pi_usm_capabilities pi_usm_capabilities;
1014+
typedef _pi_mem_info pi_mem_info;
1015+
typedef _pi_usm_type pi_usm_type;
1016+
typedef _pi_usm_mem_properties pi_usm_mem_properties;
1017+
typedef _pi_usm_migration_flags pi_usm_migration_flags;
1018+
1019+
/// Allocates host memory accessible by the device.
1020+
///
1021+
/// @param result_ptr contains the allocated memory
1022+
/// @param context is the pi_context
1023+
/// @param pi_usm_mem_properties are optional allocation properties
1024+
/// @param size_t is the size of the allocation
1025+
/// @param alignment is the desired alignment of the allocation
1026+
pi_result piextUSMHostAlloc(
1027+
void ** result_ptr,
1028+
pi_context context,
1029+
pi_usm_mem_properties * properties,
1030+
size_t size,
1031+
pi_uint32 alignment);
1032+
1033+
/// Allocates device memory
1034+
///
1035+
/// @param result_ptr contains the allocated memory
1036+
/// @param context is the pi_context
1037+
/// @param device is the device the memory will be allocated on
1038+
/// @param pi_usm_mem_properties are optional allocation properties
1039+
/// @param size_t is the size of the allocation
1040+
/// @param alignment is the desired alignment of the allocation
1041+
pi_result piextUSMDeviceAlloc(
1042+
void ** result_ptr,
1043+
pi_context context,
1044+
pi_device device,
1045+
pi_usm_mem_properties * properties,
1046+
size_t size,
1047+
pi_uint32 alignment);
1048+
1049+
/// Allocates memory accessible on both host and device
1050+
///
1051+
/// @param result_ptr contains the allocated memory
1052+
/// @param context is the pi_context
1053+
/// @param device is the device the memory will be allocated on
1054+
/// @param pi_usm_mem_properties are optional allocation properties
1055+
/// @param size_t is the size of the allocation
1056+
/// @param alignment is the desired alignment of the allocation
1057+
pi_result piextUSMSharedAlloc(
1058+
void ** result_ptr,
1059+
pi_context context,
1060+
pi_device device,
1061+
pi_usm_mem_properties * properties,
1062+
size_t size,
1063+
pi_uint32 alignment);
1064+
1065+
/// Frees allocated USM memory
1066+
///
1067+
/// @param context is the pi_context of the allocation
1068+
/// @param ptr is the memory to be freed
1069+
pi_result piextUSMFree(
1070+
pi_context context,
1071+
void * ptr);
1072+
1073+
/// USM Memset API
1074+
///
1075+
/// @param queue is the queue to submit to
1076+
/// @param ptr is the ptr to memset
1077+
/// @param value is value to set. It is interpreted as an 8-bit value and the upper
1078+
/// 24 bits are ignored
1079+
/// @param count is the size in bytes to memset
1080+
/// @param num_events_in_waitlist is the number of events to wait on
1081+
/// @param events_waitlist is an array of events to wait on
1082+
/// @param event is the event that represents this operation
1083+
pi_result piextUSMEnqueueMemset(
1084+
pi_queue queue,
1085+
void * ptr,
1086+
pi_int32 value,
1087+
size_t count,
1088+
pi_uint32 num_events_in_waitlist,
1089+
const pi_event * events_waitlist,
1090+
pi_event * event);
1091+
1092+
/// USM Memcpy API
1093+
///
1094+
/// @param queue is the queue to submit to
1095+
/// @param blocking is whether this operation should block the host
1096+
/// @param src_ptr is the data to be copied
1097+
/// @param dst_ptr is the location the data will be copied
1098+
/// @param size is number of bytes to copy
1099+
/// @param num_events_in_waitlist is the number of events to wait on
1100+
/// @param events_waitlist is an array of events to wait on
1101+
/// @param event is the event that represents this operation
1102+
pi_result piextUSMEnqueueMemcpy(
1103+
pi_queue queue,
1104+
pi_bool blocking,
1105+
void * dst_ptr,
1106+
const void * src_ptr,
1107+
size_t size,
1108+
pi_uint32 num_events_in_waitlist,
1109+
const pi_event * events_waitlist,
1110+
pi_event * event);
1111+
1112+
/// Hint to migrate memory to the device
1113+
///
1114+
/// @param queue is the queue to submit to
1115+
/// @param ptr points to the memory to migrate
1116+
/// @param size is the number of bytes to migrate
1117+
/// @param flags is a bitfield used to specify memory migration options
1118+
/// @param num_events_in_waitlist is the number of events to wait on
1119+
/// @param events_waitlist is an array of events to wait on
1120+
/// @param event is the event that represents this operation
1121+
pi_result piextUSMEnqueuePrefetch(
1122+
pi_queue queue,
1123+
const void * ptr,
1124+
size_t size,
1125+
pi_usm_migration_flags flags,
1126+
pi_uint32 num_events_in_waitlist,
1127+
const pi_event * events_waitlist,
1128+
pi_event * event);
1129+
1130+
/// USM Memadvise API
1131+
///
1132+
/// @param queue is the queue to submit to
1133+
/// @param ptr is the data to be advised
1134+
/// @param length is the size in bytes of the meory to advise
1135+
/// @param advice is device specific advice
1136+
/// @param event is the event that represents this operation
1137+
// USM memadvise API to govern behavior of automatic migration mechanisms
1138+
pi_result piextUSMEnqueueMemAdvise(
1139+
pi_queue queue,
1140+
const void * ptr,
1141+
size_t length,
1142+
int advice,
1143+
pi_event * event);
1144+
1145+
/// API to query information about USM allocated pointers
1146+
/// Valid Queries:
1147+
/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value
1148+
/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if
1149+
/// the queried pointer fell inside an allocation.
1150+
/// Result must fit in void *
1151+
/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's
1152+
/// allocation is in bytes. Result is a size_t.
1153+
/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against
1154+
///
1155+
/// @param context is the pi_context
1156+
/// @param ptr is the pointer to query
1157+
/// @param param_name is the type of query to perform
1158+
/// @param param_value_size is the size of the result in bytes
1159+
/// @param param_value is the result
1160+
/// @param param_value_ret is how many bytes were written
1161+
pi_result piextUSMGetMemAllocInfo(
1162+
pi_context context,
1163+
const void * ptr,
1164+
pi_mem_info param_name,
1165+
size_t param_value_size,
1166+
void * param_value,
1167+
size_t * param_value_size_ret);
9321168

9331169
struct _pi_plugin {
9341170
// PI version supported by host passed to the plugin. The Plugin

sycl/include/CL/sycl/info/device_traits.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,8 @@ PARAM_TRAITS_SPEC(device, max_num_sub_groups, pi_uint32)
8080
PARAM_TRAITS_SPEC(device, sub_group_independent_forward_progress, bool)
8181
PARAM_TRAITS_SPEC(device, sub_group_sizes, vector_class<size_t>)
8282
PARAM_TRAITS_SPEC(device, kernel_kernel_pipe_support, bool)
83+
PARAM_TRAITS_SPEC(device, usm_device_allocations, bool)
84+
PARAM_TRAITS_SPEC(device, usm_host_allocations, bool)
85+
PARAM_TRAITS_SPEC(device, usm_shared_allocations, bool)
86+
PARAM_TRAITS_SPEC(device, usm_restricted_shared_allocations, bool)
87+
PARAM_TRAITS_SPEC(device, usm_system_allocator, bool)

0 commit comments

Comments
 (0)