Skip to content

[SYCL][CUDA] PI API Sampler Support for CUDA #1993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,6 @@ _PI_API(piextUSMEnqueueMemAdvise)
_PI_API(piextUSMGetMemAllocInfo)

_PI_API(piextKernelSetArgMemObj)
_PI_API(piextKernelSetArgSampler)

#undef _PI_API
12 changes: 12 additions & 0 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1332,10 +1332,22 @@ __SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
const pi_event *event_wait_list,
pi_event *event);

// Extension to allow backends to process a PI memory object before adding it
// as an argument for a kernel.
// Note: This is needed by the CUDA backend to extract the device pointer to
// the memory as the kernels uses it rather than the PI object itself.
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel kernel,
pi_uint32 arg_index,
const pi_mem *arg_value);

// Extension to allow backends to process a PI sampler object before adding it
// as an argument for a kernel.
// Note: This is needed by the CUDA backend to extract the properties of the
// sampler as the kernels uses it rather than the PI object itself.
__SYCL_EXPORT pi_result piextKernelSetArgSampler(pi_kernel kernel,
pi_uint32 arg_index,
const pi_sampler *arg_value);

///
// USM
///
Expand Down
148 changes: 136 additions & 12 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2210,6 +2210,22 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
return retErr;
}

pi_result cuda_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index,
const pi_sampler *arg_value) {

assert(kernel != nullptr);
assert(arg_value != nullptr);

pi_result retErr = PI_SUCCESS;
try {
pi_uint32 samplerProps = (*arg_value)->props_;
kernel->set_kernel_arg(arg_index, sizeof(pi_uint32), (void *)&samplerProps);
} catch (pi_result err) {
retErr = err;
}
return retErr;
}

pi_result cuda_piEnqueueKernelLaunch(
pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
Expand Down Expand Up @@ -2989,32 +3005,139 @@ pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle,
return {};
}

/// \TODO Not implemented in CUDA, need untie from OpenCL
/// Creates a PI sampler object
///
/// \param[in] context The context the sampler is created for.
/// \param[in] sampler_properties The properties for the sampler.
/// \param[out] result_sampler Set to the resulting sampler object.
///
/// \return PI_SUCCESS on success. PI_INVALID_VALUE if given an invalid property
/// or if there is multiple of properties from the same category.
pi_result cuda_piSamplerCreate(pi_context context,
const cl_sampler_properties *sampler_properties,
const pi_sampler_properties *sampler_properties,
pi_sampler *result_sampler) {
cl::sycl::detail::pi::die("cuda_piSamplerCreate not implemented");
return {};
std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler(context)};

bool propSeen[3] = {false, false, false};
for (size_t i = 0; sampler_properties[i] != 0; i += 2) {
switch (sampler_properties[i]) {
case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS:
if (propSeen[0]) {
return PI_INVALID_VALUE;
}
propSeen[0] = true;
retImplSampl->props_ |= sampler_properties[i + 1];
break;
case PI_SAMPLER_PROPERTIES_FILTER_MODE:
if (propSeen[1]) {
return PI_INVALID_VALUE;
}
propSeen[1] = true;
retImplSampl->props_ |=
(sampler_properties[i + 1] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1;
break;
case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE:
if (propSeen[2]) {
return PI_INVALID_VALUE;
}
propSeen[2] = true;
retImplSampl->props_ |=
(sampler_properties[i + 1] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2;
break;
default:
return PI_INVALID_VALUE;
}
}

if (!propSeen[0]) {
retImplSampl->props_ |= CL_TRUE;
}
// Default filter mode to CL_FILTER_NEAREST
if (!propSeen[2]) {
retImplSampl->props_ |= (CL_ADDRESS_CLAMP % CL_ADDRESS_NONE) << 2;
}

*result_sampler = retImplSampl.release();
return PI_SUCCESS;
}

/// \TODO Not implemented in CUDA, need untie from OpenCL
/// Gets information from a PI sampler object
///
/// \param[in] sampler The sampler to get the information from.
/// \param[in] param_name The name of the information to get.
/// \param[in] param_value_size The size of the param_value.
/// \param[out] param_value Set to information value.
/// \param[out] param_value_size_ret Set to the size of the information value.
///
/// \return PI_SUCCESS on success.
pi_result cuda_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {
cl::sycl::detail::pi::die("cuda_piSamplerGetInfo not implemented");
assert(sampler != nullptr);

switch (param_name) {
case PI_SAMPLER_INFO_REFERENCE_COUNT:
return getInfo(param_value_size, param_value, param_value_size_ret,
sampler->get_reference_count());
case PI_SAMPLER_INFO_CONTEXT:
return getInfo(param_value_size, param_value, param_value_size_ret,
sampler->context_);
case PI_SAMPLER_INFO_NORMALIZED_COORDS: {
pi_bool norm_coords_prop = static_cast<pi_bool>(sampler->props_ & 0x1);
return getInfo(param_value_size, param_value, param_value_size_ret,
norm_coords_prop);
}
case PI_SAMPLER_INFO_FILTER_MODE: {
pi_sampler_filter_mode filter_prop = static_cast<pi_sampler_filter_mode>(
((sampler->props_ >> 1) & 0x1) + PI_SAMPLER_FILTER_MODE_NEAREST);
return getInfo(param_value_size, param_value, param_value_size_ret,
filter_prop);
}
case PI_SAMPLER_INFO_ADDRESSING_MODE: {
pi_sampler_addressing_mode addressing_prop =
static_cast<pi_sampler_addressing_mode>(
(sampler->props_ >> 2) + PI_SAMPLER_ADDRESSING_MODE_NONE);
return getInfo(param_value_size, param_value, param_value_size_ret,
addressing_prop);
}
default:
PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
return {};
}

/// \TODO Not implemented in CUDA, need untie from OpenCL
/// Retains a PI sampler object, incrementing its reference count.
///
/// \param[in] sampler The sampler to increment the reference count of.
///
/// \return PI_SUCCESS.
pi_result cuda_piSamplerRetain(pi_sampler sampler) {
cl::sycl::detail::pi::die("cuda_piSamplerRetain not implemented");
return {};
assert(sampler != nullptr);
sampler->increment_reference_count();
return PI_SUCCESS;
}

/// \TODO Not implemented in CUDA, need untie from OpenCL
/// Releases a PI sampler object, decrementing its reference count. If the
/// reference count reaches zero, the sampler object is destroyed.
///
/// \param[in] sampler The sampler to decrement the reference count of.
///
/// \return PI_SUCCESS.
pi_result cuda_piSamplerRelease(pi_sampler sampler) {
cl::sycl::detail::pi::die("cuda_piSamplerRelease not implemented");
return {};
assert(sampler != nullptr);

// double delete or someone is messing with the ref count.
// either way, cannot safely proceed.
cl::sycl::detail::pi::assertion(
sampler->get_reference_count() != 0,
"Reference count overflow detected in cuda_piSamplerRelease.");

// decrement ref count. If it is 0, delete the sampler.
if (sampler->decrement_reference_count() == 0) {
delete sampler;
}

return PI_SUCCESS;
}

/// General 3D memory copy operation.
Expand Down Expand Up @@ -3934,6 +4057,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo)

_PI_CL(piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj)
_PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler)

#undef _PI_CL

Expand Down
20 changes: 20 additions & 0 deletions sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,26 @@ struct _pi_kernel {
void clear_local_size() { args_.clear_local_size(); }
};

/// Implementation of samplers for CUDA
///
/// Sampler property layout:
/// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 |
/// | N/A | addressing mode | fiter mode | normalize coords |
struct _pi_sampler {
std::atomic_uint32_t refCount_;
pi_uint32 props_;
pi_context context_;

_pi_sampler(pi_context context)
: refCount_(1), props_(0), context_(context) {}

pi_uint32 increment_reference_count() noexcept { return ++refCount_; }

pi_uint32 decrement_reference_count() noexcept { return --refCount_; }

pi_uint32 get_reference_count() const noexcept { return refCount_; }
};

// -------------------------------------------------------------
// Helper types and functions
//
Expand Down
9 changes: 8 additions & 1 deletion sycl/plugins/level_zero/pi_level0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2003,7 +2003,7 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,
return PI_SUCCESS;
}

// Special version of piKernelSetArg to accept pi_mem and pi_sampler.
// Special version of piKernelSetArg to accept pi_mem.
pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
const pi_mem *ArgValue) {
// TODO: the better way would probably be to add a new PI API for
Expand All @@ -2020,6 +2020,13 @@ pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
return PI_SUCCESS;
}

// Special version of piKernelSetArg to accept pi_sampler.
pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex,
const pi_sampler *ArgValue) {
die("piextKernelSetArgSampler: not implemented");
return {};
}

pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName,
size_t ParamValueSize, void *ParamValue,
size_t *ParamValueSizeRet) {
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,13 @@ pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
sizeof(arg_value), cast<const cl_mem *>(arg_value)));
}

pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index,
const pi_sampler *arg_value) {
return cast<pi_result>(
clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
}

pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program,
const char *func_name,
pi_uint64 *function_pointer_ret) {
Expand Down Expand Up @@ -1234,6 +1241,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextUSMGetMemAllocInfo, piextUSMGetMemAllocInfo)

_PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj)
_PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler)

#undef _PI_CL

Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1657,8 +1657,8 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
sampler *SamplerPtr = (sampler *)Arg.MPtr;
RT::PiSampler Sampler = detail::getSyclObjImpl(*SamplerPtr)
->getOrCreateSampler(MQueue->get_context());
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex,
sizeof(cl_sampler), &Sampler);
Plugin.call<PiApiKind::piextKernelSetArgSampler>(Kernel, Arg.MIndex,
&Sampler);
break;
}
case kernel_param_kind_t::kind_pointer: {
Expand Down
1 change: 1 addition & 0 deletions sycl/test/abi/pi_level0_symbol_check.dump
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ piProgramGetInfo
piextGetDeviceFunctionPointer
piEnqueueMemUnmap
piextKernelSetArgMemObj
piextKernelSetArgSampler
piQueueCreate
piEventCreate
piKernelGetInfo
Expand Down
1 change: 1 addition & 0 deletions sycl/test/abi/pi_opencl_symbol_check.dump
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ piextEventCreateWithNativeHandle
piextGetDeviceFunctionPointer
piextProgramGetNativeHandle
piextKernelSetArgMemObj
piextKernelSetArgSampler
piextKernelSetArgPointer
piextMemCreateWithNativeHandle
piextMemGetNativeHandle
Expand Down