@@ -2210,6 +2210,22 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
2210
2210
return retErr;
2211
2211
}
2212
2212
2213
+ pi_result cuda_piextKernelSetArgSampler (pi_kernel kernel, pi_uint32 arg_index,
2214
+ const pi_sampler *arg_value) {
2215
+
2216
+ assert (kernel != nullptr );
2217
+ assert (arg_value != nullptr );
2218
+
2219
+ pi_result retErr = PI_SUCCESS;
2220
+ try {
2221
+ pi_uint32 samplerProps = (*arg_value)->props_ ;
2222
+ kernel->set_kernel_arg (arg_index, sizeof (pi_uint32), (void *)&samplerProps);
2223
+ } catch (pi_result err) {
2224
+ retErr = err;
2225
+ }
2226
+ return retErr;
2227
+ }
2228
+
2213
2229
pi_result cuda_piEnqueueKernelLaunch (
2214
2230
pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim,
2215
2231
const size_t *global_work_offset, const size_t *global_work_size,
@@ -2989,32 +3005,139 @@ pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle,
2989
3005
return {};
2990
3006
}
2991
3007
2992
- // / \TODO Not implemented in CUDA, need untie from OpenCL
3008
+ // / Creates a PI sampler object
3009
+ // /
3010
+ // / \param[in] context The context the sampler is created for.
3011
+ // / \param[in] sampler_properties The properties for the sampler.
3012
+ // / \param[out] result_sampler Set to the resulting sampler object.
3013
+ // /
3014
+ // / \return PI_SUCCESS on success. PI_INVALID_VALUE if given an invalid property
3015
+ // / or if there is multiple of properties from the same category.
2993
3016
pi_result cuda_piSamplerCreate (pi_context context,
2994
- const cl_sampler_properties *sampler_properties,
3017
+ const pi_sampler_properties *sampler_properties,
2995
3018
pi_sampler *result_sampler) {
2996
- cl::sycl::detail::pi::die (" cuda_piSamplerCreate not implemented" );
2997
- return {};
3019
+ std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler (context)};
3020
+
3021
+ bool propSeen[3 ] = {false , false , false };
3022
+ for (size_t i = 0 ; sampler_properties[i] != 0 ; i += 2 ) {
3023
+ switch (sampler_properties[i]) {
3024
+ case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS:
3025
+ if (propSeen[0 ]) {
3026
+ return PI_INVALID_VALUE;
3027
+ }
3028
+ propSeen[0 ] = true ;
3029
+ retImplSampl->props_ |= sampler_properties[i + 1 ];
3030
+ break ;
3031
+ case PI_SAMPLER_PROPERTIES_FILTER_MODE:
3032
+ if (propSeen[1 ]) {
3033
+ return PI_INVALID_VALUE;
3034
+ }
3035
+ propSeen[1 ] = true ;
3036
+ retImplSampl->props_ |=
3037
+ (sampler_properties[i + 1 ] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1 ;
3038
+ break ;
3039
+ case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE:
3040
+ if (propSeen[2 ]) {
3041
+ return PI_INVALID_VALUE;
3042
+ }
3043
+ propSeen[2 ] = true ;
3044
+ retImplSampl->props_ |=
3045
+ (sampler_properties[i + 1 ] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2 ;
3046
+ break ;
3047
+ default :
3048
+ return PI_INVALID_VALUE;
3049
+ }
3050
+ }
3051
+
3052
+ if (!propSeen[0 ]) {
3053
+ retImplSampl->props_ |= CL_TRUE;
3054
+ }
3055
+ // Default filter mode to CL_FILTER_NEAREST
3056
+ if (!propSeen[2 ]) {
3057
+ retImplSampl->props_ |= (CL_ADDRESS_CLAMP % CL_ADDRESS_NONE) << 2 ;
3058
+ }
3059
+
3060
+ *result_sampler = retImplSampl.release ();
3061
+ return PI_SUCCESS;
2998
3062
}
2999
3063
3000
- // / \TODO Not implemented in CUDA, need untie from OpenCL
3064
+ // / Gets information from a PI sampler object
3065
+ // /
3066
+ // / \param[in] sampler The sampler to get the information from.
3067
+ // / \param[in] param_name The name of the information to get.
3068
+ // / \param[in] param_value_size The size of the param_value.
3069
+ // / \param[out] param_value Set to information value.
3070
+ // / \param[out] param_value_size_ret Set to the size of the information value.
3071
+ // /
3072
+ // / \return PI_SUCCESS on success.
3001
3073
pi_result cuda_piSamplerGetInfo (pi_sampler sampler, cl_sampler_info param_name,
3002
3074
size_t param_value_size, void *param_value,
3003
3075
size_t *param_value_size_ret) {
3004
- cl::sycl::detail::pi::die (" cuda_piSamplerGetInfo not implemented" );
3076
+ assert (sampler != nullptr );
3077
+
3078
+ switch (param_name) {
3079
+ case PI_SAMPLER_INFO_REFERENCE_COUNT:
3080
+ return getInfo (param_value_size, param_value, param_value_size_ret,
3081
+ sampler->get_reference_count ());
3082
+ case PI_SAMPLER_INFO_CONTEXT:
3083
+ return getInfo (param_value_size, param_value, param_value_size_ret,
3084
+ sampler->context_ );
3085
+ case PI_SAMPLER_INFO_NORMALIZED_COORDS: {
3086
+ pi_bool norm_coords_prop = static_cast <pi_bool>(sampler->props_ & 0x1 );
3087
+ return getInfo (param_value_size, param_value, param_value_size_ret,
3088
+ norm_coords_prop);
3089
+ }
3090
+ case PI_SAMPLER_INFO_FILTER_MODE: {
3091
+ pi_sampler_filter_mode filter_prop = static_cast <pi_sampler_filter_mode>(
3092
+ ((sampler->props_ >> 1 ) & 0x1 ) + PI_SAMPLER_FILTER_MODE_NEAREST);
3093
+ return getInfo (param_value_size, param_value, param_value_size_ret,
3094
+ filter_prop);
3095
+ }
3096
+ case PI_SAMPLER_INFO_ADDRESSING_MODE: {
3097
+ pi_sampler_addressing_mode addressing_prop =
3098
+ static_cast <pi_sampler_addressing_mode>(
3099
+ (sampler->props_ >> 2 ) + PI_SAMPLER_ADDRESSING_MODE_NONE);
3100
+ return getInfo (param_value_size, param_value, param_value_size_ret,
3101
+ addressing_prop);
3102
+ }
3103
+ default :
3104
+ PI_HANDLE_UNKNOWN_PARAM_NAME (param_name);
3105
+ }
3005
3106
return {};
3006
3107
}
3007
3108
3008
- // / \TODO Not implemented in CUDA, need untie from OpenCL
3109
+ // / Retains a PI sampler object, incrementing its reference count.
3110
+ // /
3111
+ // / \param[in] sampler The sampler to increment the reference count of.
3112
+ // /
3113
+ // / \return PI_SUCCESS.
3009
3114
pi_result cuda_piSamplerRetain (pi_sampler sampler) {
3010
- cl::sycl::detail::pi::die (" cuda_piSamplerRetain not implemented" );
3011
- return {};
3115
+ assert (sampler != nullptr );
3116
+ sampler->increment_reference_count ();
3117
+ return PI_SUCCESS;
3012
3118
}
3013
3119
3014
- // / \TODO Not implemented in CUDA, need untie from OpenCL
3120
+ // / Releases a PI sampler object, decrementing its reference count. If the
3121
+ // / reference count reaches zero, the sampler object is destroyed.
3122
+ // /
3123
+ // / \param[in] sampler The sampler to decrement the reference count of.
3124
+ // /
3125
+ // / \return PI_SUCCESS.
3015
3126
pi_result cuda_piSamplerRelease (pi_sampler sampler) {
3016
- cl::sycl::detail::pi::die (" cuda_piSamplerRelease not implemented" );
3017
- return {};
3127
+ assert (sampler != nullptr );
3128
+
3129
+ // double delete or someone is messing with the ref count.
3130
+ // either way, cannot safely proceed.
3131
+ cl::sycl::detail::pi::assertion (
3132
+ sampler->get_reference_count () != 0 ,
3133
+ " Reference count overflow detected in cuda_piSamplerRelease." );
3134
+
3135
+ // decrement ref count. If it is 0, delete the sampler.
3136
+ if (sampler->decrement_reference_count () == 0 ) {
3137
+ delete sampler;
3138
+ }
3139
+
3140
+ return PI_SUCCESS;
3018
3141
}
3019
3142
3020
3143
// / General 3D memory copy operation.
@@ -3934,6 +4057,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
3934
4057
_PI_CL (piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo)
3935
4058
3936
4059
_PI_CL (piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj)
4060
+ _PI_CL (piextKernelSetArgSampler, cuda_piextKernelSetArgSampler)
3937
4061
3938
4062
#undef _PI_CL
3939
4063
0 commit comments