Skip to content

Commit 81ec4f7

Browse files
committed
Implemented CUDA pi_sampler
Added piextKernelSetArgSampler PI API function
1 parent 768f74f commit 81ec4f7

File tree

9 files changed

+183
-17
lines changed

9 files changed

+183
-17
lines changed

sycl/include/CL/sycl/detail/pi.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,5 +125,6 @@ _PI_API(piextUSMEnqueueMemAdvise)
125125
_PI_API(piextUSMGetMemAllocInfo)
126126

127127
_PI_API(piextKernelSetArgMemObj)
128+
_PI_API(piextKernelSetArgSampler)
128129

129130
#undef _PI_API

sycl/include/CL/sycl/detail/pi.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,10 @@ __SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel kernel,
13361336
pi_uint32 arg_index,
13371337
const pi_mem *arg_value);
13381338

1339+
__SYCL_EXPORT pi_result piextKernelSetArgSampler(pi_kernel kernel,
1340+
pi_uint32 arg_index,
1341+
const pi_sampler *arg_value);
1342+
13391343
///
13401344
// USM
13411345
///

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 136 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,6 +2210,22 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
22102210
return retErr;
22112211
}
22122212

2213+
pi_result cuda_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index,
2214+
const pi_sampler *arg_value) {
2215+
2216+
assert(kernel != nullptr);
2217+
assert(arg_value != nullptr);
2218+
2219+
pi_result retErr = PI_SUCCESS;
2220+
try {
2221+
pi_uint32 samplerProps = (*arg_value)->props_;
2222+
kernel->set_kernel_arg(arg_index, sizeof(pi_uint32), (void *)&samplerProps);
2223+
} catch (pi_result err) {
2224+
retErr = err;
2225+
}
2226+
return retErr;
2227+
}
2228+
22132229
pi_result cuda_piEnqueueKernelLaunch(
22142230
pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim,
22152231
const size_t *global_work_offset, const size_t *global_work_size,
@@ -2989,32 +3005,139 @@ pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle,
29893005
return {};
29903006
}
29913007

2992-
/// \TODO Not implemented in CUDA, need untie from OpenCL
3008+
/// Creates a PI sampler object
3009+
///
3010+
/// \param[in] context The context the sampler is created for.
3011+
/// \param[in] sampler_properties The properties for the sampler.
3012+
/// \param[out] result_sampler Set to the resulting sampler object.
3013+
///
3014+
/// \return PI_SUCCESS on success. PI_INVALID_VALUE if given an invalid property
3015+
/// or if there is multiple of properties from the same category.
29933016
pi_result cuda_piSamplerCreate(pi_context context,
2994-
const cl_sampler_properties *sampler_properties,
3017+
const pi_sampler_properties *sampler_properties,
29953018
pi_sampler *result_sampler) {
2996-
cl::sycl::detail::pi::die("cuda_piSamplerCreate not implemented");
2997-
return {};
3019+
std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler(context)};
3020+
3021+
bool propSeen[3] = {false, false, false};
3022+
for (size_t i = 0; sampler_properties[i] != 0; i += 2) {
3023+
switch (sampler_properties[i]) {
3024+
case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS:
3025+
if (propSeen[0]) {
3026+
return PI_INVALID_VALUE;
3027+
}
3028+
propSeen[0] = true;
3029+
retImplSampl->props_ |= sampler_properties[i + 1];
3030+
break;
3031+
case PI_SAMPLER_PROPERTIES_FILTER_MODE:
3032+
if (propSeen[1]) {
3033+
return PI_INVALID_VALUE;
3034+
}
3035+
propSeen[1] = true;
3036+
retImplSampl->props_ |=
3037+
(sampler_properties[i + 1] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1;
3038+
break;
3039+
case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE:
3040+
if (propSeen[2]) {
3041+
return PI_INVALID_VALUE;
3042+
}
3043+
propSeen[2] = true;
3044+
retImplSampl->props_ |=
3045+
(sampler_properties[i + 1] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2;
3046+
break;
3047+
default:
3048+
return PI_INVALID_VALUE;
3049+
}
3050+
}
3051+
3052+
if (!propSeen[0]) {
3053+
retImplSampl->props_ |= CL_TRUE;
3054+
}
3055+
// Default filter mode to CL_FILTER_NEAREST
3056+
if (!propSeen[2]) {
3057+
retImplSampl->props_ |= (CL_ADDRESS_CLAMP % CL_ADDRESS_NONE) << 2;
3058+
}
3059+
3060+
*result_sampler = retImplSampl.release();
3061+
return PI_SUCCESS;
29983062
}
29993063

3000-
/// \TODO Not implemented in CUDA, need untie from OpenCL
3064+
/// Gets information from a PI sampler object
3065+
///
3066+
/// \param[in] sampler The sampler to get the information from.
3067+
/// \param[in] param_name The name of the information to get.
3068+
/// \param[in] param_value_size The size of the param_value.
3069+
/// \param[out] param_value Set to information value.
3070+
/// \param[out] param_value_size_ret Set to the size of the information value.
3071+
///
3072+
/// \return PI_SUCCESS on success.
30013073
pi_result cuda_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name,
30023074
size_t param_value_size, void *param_value,
30033075
size_t *param_value_size_ret) {
3004-
cl::sycl::detail::pi::die("cuda_piSamplerGetInfo not implemented");
3076+
assert(sampler != nullptr);
3077+
3078+
switch (param_name) {
3079+
case PI_SAMPLER_INFO_REFERENCE_COUNT:
3080+
return getInfo(param_value_size, param_value, param_value_size_ret,
3081+
sampler->get_reference_count());
3082+
case PI_SAMPLER_INFO_CONTEXT:
3083+
return getInfo(param_value_size, param_value, param_value_size_ret,
3084+
sampler->context_);
3085+
case PI_SAMPLER_INFO_NORMALIZED_COORDS: {
3086+
pi_bool norm_coords_prop = static_cast<pi_bool>(sampler->props_ & 0x1);
3087+
return getInfo(param_value_size, param_value, param_value_size_ret,
3088+
norm_coords_prop);
3089+
}
3090+
case PI_SAMPLER_INFO_FILTER_MODE: {
3091+
pi_sampler_filter_mode filter_prop = static_cast<pi_sampler_filter_mode>(
3092+
((sampler->props_ >> 1) & 0x1) + PI_SAMPLER_FILTER_MODE_NEAREST);
3093+
return getInfo(param_value_size, param_value, param_value_size_ret,
3094+
filter_prop);
3095+
}
3096+
case PI_SAMPLER_INFO_ADDRESSING_MODE: {
3097+
pi_sampler_addressing_mode addressing_prop =
3098+
static_cast<pi_sampler_addressing_mode>(
3099+
(sampler->props_ >> 2) + PI_SAMPLER_ADDRESSING_MODE_NONE);
3100+
return getInfo(param_value_size, param_value, param_value_size_ret,
3101+
addressing_prop);
3102+
}
3103+
default:
3104+
PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
3105+
}
30053106
return {};
30063107
}
30073108

3008-
/// \TODO Not implemented in CUDA, need untie from OpenCL
3109+
/// Retains a PI sampler object, incrementing its reference count.
3110+
///
3111+
/// \param[in] sampler The sampler to increment the reference count of.
3112+
///
3113+
/// \return PI_SUCCESS.
30093114
pi_result cuda_piSamplerRetain(pi_sampler sampler) {
3010-
cl::sycl::detail::pi::die("cuda_piSamplerRetain not implemented");
3011-
return {};
3115+
assert(sampler != nullptr);
3116+
sampler->increment_reference_count();
3117+
return PI_SUCCESS;
30123118
}
30133119

3014-
/// \TODO Not implemented in CUDA, need untie from OpenCL
3120+
/// Releases a PI sampler object, decrementing its reference count. If the
3121+
/// reference count reaches zero, the sampler object is destroyed.
3122+
///
3123+
/// \param[in] sampler The sampler to decrement the reference count of.
3124+
///
3125+
/// \return PI_SUCCESS.
30153126
pi_result cuda_piSamplerRelease(pi_sampler sampler) {
3016-
cl::sycl::detail::pi::die("cuda_piSamplerRelease not implemented");
3017-
return {};
3127+
assert(sampler != nullptr);
3128+
3129+
// double delete or someone is messing with the ref count.
3130+
// either way, cannot safely proceed.
3131+
cl::sycl::detail::pi::assertion(
3132+
sampler->get_reference_count() != 0,
3133+
"Reference count overflow detected in cuda_piSamplerRelease.");
3134+
3135+
// decrement ref count. If it is 0, delete the sampler.
3136+
if (sampler->decrement_reference_count() == 0) {
3137+
delete sampler;
3138+
}
3139+
3140+
return PI_SUCCESS;
30183141
}
30193142

30203143
/// General 3D memory copy operation.
@@ -3934,6 +4057,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
39344057
_PI_CL(piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo)
39354058

39364059
_PI_CL(piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj)
4060+
_PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler)
39374061

39384062
#undef _PI_CL
39394063

sycl/plugins/cuda/pi_cuda.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,26 @@ struct _pi_kernel {
617617
void clear_local_size() { args_.clear_local_size(); }
618618
};
619619

620+
/// Implementation of samplers for CUDA
621+
///
622+
/// Sampler property layout:
623+
/// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 |
624+
/// | N/A | addressing mode | fiter mode | normalize coords |
625+
struct _pi_sampler {
626+
std::atomic_uint32_t refCount_;
627+
pi_uint32 props_;
628+
pi_context context_;
629+
630+
_pi_sampler(pi_context context)
631+
: refCount_(1), props_(0), context_(context) {}
632+
633+
pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
634+
635+
pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
636+
637+
pi_uint32 get_reference_count() const noexcept { return refCount_; }
638+
};
639+
620640
// -------------------------------------------------------------
621641
// Helper types and functions
622642
//

sycl/plugins/level_zero/pi_level0.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2003,7 +2003,7 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,
20032003
return PI_SUCCESS;
20042004
}
20052005

2006-
// Special version of piKernelSetArg to accept pi_mem and pi_sampler.
2006+
// Special version of piKernelSetArg to accept pi_mem.
20072007
pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
20082008
const pi_mem *ArgValue) {
20092009
// TODO: the better way would probably be to add a new PI API for
@@ -2020,6 +2020,13 @@ pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
20202020
return PI_SUCCESS;
20212021
}
20222022

2023+
// Special version of piKernelSetArg to accept pi_sampler.
2024+
pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex,
2025+
const pi_sampler *ArgValue) {
2026+
die("piextKernelSetArgSampler: not implemented");
2027+
return {};
2028+
}
2029+
20232030
pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName,
20242031
size_t ParamValueSize, void *ParamValue,
20252032
size_t *ParamValueSizeRet) {

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,13 @@ pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
449449
sizeof(arg_value), cast<const cl_mem *>(arg_value)));
450450
}
451451

452+
pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index,
453+
const pi_sampler *arg_value) {
454+
return cast<pi_result>(
455+
clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
456+
sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
457+
}
458+
452459
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program,
453460
const char *func_name,
454461
pi_uint64 *function_pointer_ret) {
@@ -1234,6 +1241,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
12341241
_PI_CL(piextUSMGetMemAllocInfo, piextUSMGetMemAllocInfo)
12351242

12361243
_PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj)
1244+
_PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler)
12371245

12381246
#undef _PI_CL
12391247

sycl/source/detail/scheduler/commands.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1655,10 +1655,10 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
16551655
}
16561656
case kernel_param_kind_t::kind_sampler: {
16571657
sampler *SamplerPtr = (sampler *)Arg.MPtr;
1658-
RT::PiSampler Sampler = detail::getSyclObjImpl(*SamplerPtr)
1659-
->getOrCreateSampler(MQueue->get_context());
1660-
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex,
1661-
sizeof(cl_sampler), &Sampler);
1658+
RT::PiSampler Sampler =
1659+
detail::getSyclObjImpl(*SamplerPtr)->getOrCreateSampler(MQueue->get_context());
1660+
Plugin.call<PiApiKind::piextKernelSetArgSampler>(Kernel, Arg.MIndex,
1661+
&Sampler);
16621662
break;
16631663
}
16641664
case kernel_param_kind_t::kind_pointer: {

sycl/test/abi/pi_level0_symbol_check.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ piProgramGetInfo
7777
piextGetDeviceFunctionPointer
7878
piEnqueueMemUnmap
7979
piextKernelSetArgMemObj
80+
piextKernelSetArgSampler
8081
piQueueCreate
8182
piEventCreate
8283
piKernelGetInfo

sycl/test/abi/pi_opencl_symbol_check.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ piextEventCreateWithNativeHandle
2828
piextGetDeviceFunctionPointer
2929
piextProgramGetNativeHandle
3030
piextKernelSetArgMemObj
31+
piextKernelSetArgSampler
3132
piextKernelSetArgPointer
3233
piextMemCreateWithNativeHandle
3334
piextMemGetNativeHandle

0 commit comments

Comments
 (0)