Skip to content

[SYCL] Untie PI functions from OpenCL #1717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ typedef enum {
PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS,
PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM,
PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT,
PI_DEVICE_INFO_IL_VERSION = CL_DEVICE_IL_VERSION_KHR,
PI_DEVICE_INFO_NAME = CL_DEVICE_NAME,
PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR,
PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION,
Expand All @@ -241,6 +242,10 @@ typedef enum {
PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN =
CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE,
PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS = CL_DEVICE_MAX_NUM_SUB_GROUPS,
PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS =
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = CL_DEVICE_SUB_GROUP_SIZES_INTEL,
PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL,
PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL,
PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT =
Expand Down Expand Up @@ -299,6 +304,16 @@ typedef enum {
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
} _pi_kernel_group_info;

typedef enum {
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT,
PI_FP_ROUND_TO_NEAREST = CL_FP_ROUND_TO_NEAREST,
PI_FP_ROUND_TO_ZERO = CL_FP_ROUND_TO_ZERO,
PI_FP_ROUND_TO_INF = CL_FP_ROUND_TO_INF,
PI_FP_INF_NAN = CL_FP_INF_NAN,
PI_FP_DENORM = CL_FP_DENORM,
PI_FP_FMA = CL_FP_FMA
} _pi_fp_capabilities;

typedef enum {
PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT,
PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE,
Expand Down Expand Up @@ -512,6 +527,7 @@ using pi_image_info = _pi_image_info;
using pi_kernel_info = _pi_kernel_info;
using pi_kernel_group_info = _pi_kernel_group_info;
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
using pi_fp_capabilities = _pi_fp_capabilities;
using pi_event_info = _pi_event_info;
using pi_command_type = _pi_command_type;
using pi_mem_type = _pi_mem_type;
Expand Down Expand Up @@ -678,6 +694,13 @@ struct pi_device_binary_struct {
};
using pi_device_binary = pi_device_binary_struct *;

// pi_buffer_region structure repeats cl_buffer_region
struct pi_buffer_region_struct {
size_t origin;
size_t size;
};
using pi_buffer_region = pi_buffer_region_struct *;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, all of pi.h is meant to be C-only interface


// Offload binaries descriptor version supported by this library.
static const uint16_t PI_DEVICE_BINARIES_VERSION = 1;

Expand Down Expand Up @@ -1118,10 +1141,10 @@ __SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel kernel,
//
__SYCL_EXPORT pi_result piEventCreate(pi_context context, pi_event *ret_event);

__SYCL_EXPORT pi_result piEventGetInfo(
pi_event event,
cl_event_info param_name, // TODO: untie from OpenCL
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
__SYCL_EXPORT pi_result piEventGetInfo(pi_event event, pi_event_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);

__SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event event,
pi_profiling_info param_name,
Expand Down Expand Up @@ -1439,7 +1462,8 @@ __SYCL_EXPORT pi_result piextUSMEnqueuePrefetch(
// USM memadvise API to govern behavior of automatic migration mechanisms
__SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue queue,
const void *ptr, size_t length,
int advice, pi_event *event);
pi_mem_advice advice,
pi_event *event);

/// API to query information about USM allocated pointers
/// Valid Queries:
Expand Down
10 changes: 6 additions & 4 deletions sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,18 +117,20 @@ enum class device : cl_device_info {
partition_affinity_domains = CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
partition_type_affinity_domain = CL_DEVICE_PARTITION_TYPE,
reference_count = CL_DEVICE_REFERENCE_COUNT,
il_version =
CL_DEVICE_IL_VERSION_KHR, // Same as CL_DEVICE_IL_VERSION for >=OpenCL 2.1
max_num_sub_groups = CL_DEVICE_MAX_NUM_SUB_GROUPS,
sub_group_independent_forward_progress =
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
sub_group_sizes = CL_DEVICE_SUB_GROUP_SIZES_INTEL,
partition_type_property,
kernel_kernel_pipe_support,
// USM
usm_device_allocations = PI_USM_DEVICE_SUPPORT,
usm_host_allocations = PI_USM_HOST_SUPPORT,
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
usm_device_allocations = PI_USM_DEVICE_SUPPORT,
usm_host_allocations = PI_USM_HOST_SUPPORT,
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
};

enum class device_type : pi_uint64 {
Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,15 +1029,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
}
case PI_DEVICE_INFO_SINGLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA |
CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA |
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
return getInfo(param_value_size, param_value, param_value_size_ret, config);
}
case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA;
auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA;
return getInfo(param_value_size, param_value, param_value_size_ret, config);
}
case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
Expand Down Expand Up @@ -1674,7 +1674,7 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags,
assert(memObj != nullptr);

const auto bufferRegion =
*reinterpret_cast<const cl_buffer_region *>(buffer_create_info);
*reinterpret_cast<const pi_buffer_region>(buffer_create_info);
assert((bufferRegion.size != 0u) && "PI_INVALID_BUFFER_SIZE");

assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) &&
Expand Down Expand Up @@ -3596,7 +3596,7 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,

/// USM: memadvise API to govern behavior of automatic migration mechanisms
pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
size_t length, int advice,
size_t length, pi_mem_advice advice,
pi_event *event) {
assert(queue != nullptr);
assert(ptr != nullptr);
Expand Down
3 changes: 2 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size,
/// \param event is the event that represents this operation
// USM memadvise API to govern behavior of automatic migration mechanisms
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
size_t length, int advice, pi_event *event) {
size_t length, pi_mem_advice advice,
pi_event *event) {

return cast<pi_result>(
clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0, nullptr,
Expand Down
3 changes: 1 addition & 2 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
SizeInBytes *= Range[I];

RT::PiResult Error = PI_SUCCESS;
// TODO replace with pi_buffer_region
cl_buffer_region Region{Offset, SizeInBytes};
pi_buffer_region_struct Region{Offset, SizeInBytes};
RT::PiMem NewMem;
const detail::plugin &Plugin = TargetContext->getPlugin();
Error = Plugin.call_nocheck<PiApiKind::piMemBufferPartition>(
Expand Down