Skip to content

Commit 4849b71

Browse files
[SYCL] Implement remaining bits of sycl_ext_oneapi_launch_queries (#16709)
See the spec [here](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc) --------- Co-authored-by: Steffen Larsen <[email protected]>
1 parent f022906 commit 4849b71

16 files changed

+924
-0
lines changed

sycl/include/sycl/detail/info_desc_helpers.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,19 @@ struct IsKernelInfo<info::kernel_device_specific::ext_codeplay_num_regs>
118118
: std::true_type { \
119119
using return_type = Namespace::info::DescType::Desc::return_type; \
120120
};
121+
122+
#define __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(Namespace, Desctype, Desc, \
123+
ReturnT, UrCode) \
124+
template <int Dimensions> \
125+
struct is_##Desctype##_info_desc< \
126+
Namespace::info::Desctype::Desc<Dimensions>> : std::true_type { \
127+
using return_type = \
128+
typename Namespace::info::Desctype::Desc<Dimensions>::return_type; \
129+
};
130+
121131
#include <sycl/info/ext_oneapi_kernel_queue_specific_traits.def>
122132
#undef __SYCL_PARAM_TRAITS_SPEC
133+
#undef __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC
123134

124135
#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \
125136
template <> \
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
11
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,)
2+
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_work_group_size, size_t,)
3+
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_sub_group_size, uint32_t,)
4+
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, num_sub_groups, uint32_t,)
5+
__SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_work_item_sizes, sycl::id,)

sycl/include/sycl/info/info_desc.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,16 @@ template <typename T, T param> struct compatibility_param_traits {};
239239
} /*namespace info */ \
240240
} /*namespace Namespace */
241241

242+
#define __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(Namespace, Desctype, Desc, \
243+
ReturnT, UrCode) \
244+
namespace Namespace::info { \
245+
namespace Desctype { \
246+
template <int Dimensions> struct Desc { \
247+
using return_type = ReturnT<Dimensions>; \
248+
}; \
249+
} \
250+
}
251+
242252
namespace ext::oneapi::experimental::info::device {
243253
template <int Dimensions> struct max_work_groups;
244254
template <ext::oneapi::experimental::execution_scope CoordinationScope>
@@ -256,5 +266,6 @@ struct work_item_progress_capabilities;
256266

257267
#undef __SYCL_PARAM_TRAITS_SPEC
258268
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
269+
#undef __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC
259270
} // namespace _V1
260271
} // namespace sycl

sycl/include/sycl/kernel.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,36 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase<kernel> {
219219
ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
220220
size_t DynamicLocalMemorySize) const;
221221

222+
/// Query queue/launch-specific information from a kernel using the
223+
/// info::kernel_queue_specific descriptor for a specific Queue and values.
224+
///
225+
/// \param Queue is a valid SYCL queue.
226+
/// \param WG workgroup
227+
/// \return depends on information being queried.
228+
template <typename Param>
229+
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
230+
ext_oneapi_get_info(queue Queue, const range<3> &WG) const;
231+
232+
/// Query queue/launch-specific information from a kernel using the
233+
/// info::kernel_queue_specific descriptor for a specific Queue and values.
234+
///
235+
/// \param Queue is a valid SYCL queue.
236+
/// \param WG workgroup
237+
/// \return depends on information being queried.
238+
template <typename Param>
239+
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
240+
ext_oneapi_get_info(queue Queue, const range<2> &WG) const;
241+
242+
/// Query queue/launch-specific information from a kernel using the
243+
/// info::kernel_queue_specific descriptor for a specific Queue and values.
244+
///
245+
/// \param Queue is a valid SYCL queue.
246+
/// \param WG workgroup
247+
/// \return depends on information being queried.
248+
template <typename Param>
249+
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
250+
ext_oneapi_get_info(queue Queue, const range<1> &WG) const;
251+
222252
private:
223253
/// Constructs a SYCL kernel object from a valid kernel_impl instance.
224254
kernel(std::shared_ptr<detail::kernel_impl> Impl);

sycl/source/detail/kernel_impl.hpp

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,39 @@ class kernel_impl {
162162
ext_oneapi_get_info(queue Queue, const range<3> &MaxWorkGroupSize,
163163
size_t DynamicLocalMemorySize) const;
164164

165+
/// Query queue/launch-specific information from a kernel using the
166+
/// info::kernel_queue_specific descriptor for a specific Queue and values.
167+
/// max_num_work_groups is the only valid descriptor for this function.
168+
///
169+
/// \param Queue is a valid SYCL queue.
170+
/// \param WG is a work group size
171+
/// \return depends on information being queried.
172+
template <typename Param>
173+
typename Param::return_type ext_oneapi_get_info(queue Queue,
174+
const range<3> &WG) const;
175+
176+
/// Query queue/launch-specific information from a kernel using the
177+
/// info::kernel_queue_specific descriptor for a specific Queue and values.
178+
/// max_num_work_groups is the only valid descriptor for this function.
179+
///
180+
/// \param Queue is a valid SYCL queue.
181+
/// \param WG is a work group size
182+
/// \return depends on information being queried.
183+
template <typename Param>
184+
typename Param::return_type ext_oneapi_get_info(queue Queue,
185+
const range<2> &WG) const;
186+
187+
/// Query queue/launch-specific information from a kernel using the
188+
/// info::kernel_queue_specific descriptor for a specific Queue and values.
189+
/// max_num_work_groups is the only valid descriptor for this function.
190+
///
191+
/// \param Queue is a valid SYCL queue.
192+
/// \param WG is a work group size
193+
/// \return depends on information being queried.
194+
template <typename Param>
195+
typename Param::return_type ext_oneapi_get_info(queue Queue,
196+
const range<1> &WG) const;
197+
165198
/// Get a constant reference to a raw kernel object.
166199
///
167200
/// \return a constant reference to a valid UrKernel instance with raw
@@ -383,6 +416,91 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
383416
DynamicLocalMemorySize);
384417
}
385418

419+
template <>
420+
inline typename syclex::info::kernel_queue_specific::max_work_group_size::
421+
return_type
422+
kernel_impl::ext_oneapi_get_info<
423+
syclex::info::kernel_queue_specific::max_work_group_size>(
424+
queue Queue) const {
425+
const auto &Adapter = getAdapter();
426+
const auto DeviceNativeHandle =
427+
getSyclObjImpl(Queue.get_device())->getHandleRef();
428+
429+
size_t KernelWGSize = 0;
430+
Adapter->call<UrApiKind::urKernelGetGroupInfo>(
431+
MKernel, DeviceNativeHandle, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE,
432+
sizeof(size_t), &KernelWGSize, nullptr);
433+
return KernelWGSize;
434+
}
435+
436+
template <int Dimensions>
437+
inline sycl::id<Dimensions>
438+
generate_id(const sycl::range<Dimensions> &DevMaxWorkItemSizes,
439+
const size_t DevWgSize) {
440+
sycl::id<Dimensions> Ret;
441+
for (int i = 0; i < Dimensions; i++) {
442+
// DevMaxWorkItemSizes values are inverted, see
443+
// sycl/source/detail/device_info.hpp:582
444+
Ret[i] = std::min(DevMaxWorkItemSizes[i], DevWgSize);
445+
}
446+
return Ret;
447+
}
448+
449+
#define ADD_TEMPLATE_METHOD_SPEC(Num) \
450+
template <> \
451+
inline typename syclex::info::kernel_queue_specific::max_work_item_sizes< \
452+
Num>::return_type \
453+
kernel_impl::ext_oneapi_get_info< \
454+
syclex::info::kernel_queue_specific::max_work_item_sizes<Num>>( \
455+
queue Queue) const { \
456+
const auto Dev = Queue.get_device(); \
457+
const auto DeviceWgSize = \
458+
get_info<info::kernel_device_specific::work_group_size>(Dev); \
459+
const auto DeviceMaxWorkItemSizes = \
460+
Dev.get_info<info::device::max_work_item_sizes<Num>>(); \
461+
return generate_id<Num>(DeviceMaxWorkItemSizes, DeviceWgSize); \
462+
} // namespace detail
463+
464+
ADD_TEMPLATE_METHOD_SPEC(1)
465+
ADD_TEMPLATE_METHOD_SPEC(2)
466+
ADD_TEMPLATE_METHOD_SPEC(3)
467+
468+
#undef ADD_TEMPLATE_METHOD_SPEC
469+
470+
#define ADD_TEMPLATE_METHOD_SPEC(QueueSpec, Num, Kind, Reg) \
471+
template <> \
472+
inline typename syclex::info::kernel_queue_specific::QueueSpec::return_type \
473+
kernel_impl::ext_oneapi_get_info< \
474+
syclex::info::kernel_queue_specific::QueueSpec>( \
475+
queue Queue, const range<Num> &WG) const { \
476+
if (WG.size() == 0) \
477+
throw exception(sycl::make_error_code(errc::invalid), \
478+
"The work-group size cannot be zero."); \
479+
const auto &Adapter = getAdapter(); \
480+
const auto DeviceNativeHandle = \
481+
getSyclObjImpl(Queue.get_device())->getHandleRef(); \
482+
uint32_t KernelSubWGSize = 0; \
483+
Adapter->call<UrApiKind::Kind>(MKernel, DeviceNativeHandle, Reg, \
484+
sizeof(uint32_t), &KernelSubWGSize, \
485+
nullptr); \
486+
return KernelSubWGSize; \
487+
}
488+
489+
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 3, urKernelGetSubGroupInfo,
490+
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE)
491+
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 2, urKernelGetSubGroupInfo,
492+
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE)
493+
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 1, urKernelGetSubGroupInfo,
494+
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE)
495+
496+
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 3, urKernelGetSubGroupInfo,
497+
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS)
498+
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 2, urKernelGetSubGroupInfo,
499+
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS)
500+
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 1, urKernelGetSubGroupInfo,
501+
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS)
502+
503+
#undef ADD_TEMPLATE_METHOD_SPEC
386504
} // namespace detail
387505
} // namespace _V1
388506
} // namespace sycl

sycl/source/kernel.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,12 @@ kernel::ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize,
119119
DynamicLocalMemorySize);
120120
}
121121

122+
template <typename Param>
123+
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
124+
kernel::ext_oneapi_get_info(queue Queue, const range<3> &WG) const {
125+
return impl->ext_oneapi_get_info<Param>(Queue, WG);
126+
}
127+
122128
template <typename Param>
123129
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
124130
kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize,
@@ -127,6 +133,18 @@ kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize,
127133
DynamicLocalMemorySize);
128134
}
129135

136+
template <typename Param>
137+
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
138+
kernel::ext_oneapi_get_info(queue Queue, const range<2> &WG) const {
139+
return impl->ext_oneapi_get_info<Param>(Queue, WG);
140+
}
141+
142+
template <typename Param>
143+
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
144+
kernel::ext_oneapi_get_info(queue Queue, const range<1> &WG) const {
145+
return impl->ext_oneapi_get_info<Param>(Queue, WG);
146+
}
147+
130148
template <typename Param>
131149
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
132150
kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
@@ -135,6 +153,66 @@ kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
135153
DynamicLocalMemorySize);
136154
}
137155

156+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
157+
kernel_queue_specific::max_work_group_size::return_type
158+
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
159+
kernel_queue_specific::max_work_group_size>(
160+
queue Queue) const;
161+
162+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
163+
kernel_queue_specific::max_work_item_sizes<1>::return_type
164+
kernel::ext_oneapi_get_info<
165+
ext::oneapi::experimental::info::kernel_queue_specific::
166+
max_work_item_sizes<1>>(queue Queue) const;
167+
168+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
169+
kernel_queue_specific::max_work_item_sizes<2>::return_type
170+
kernel::ext_oneapi_get_info<
171+
ext::oneapi::experimental::info::kernel_queue_specific::
172+
max_work_item_sizes<2>>(queue Queue) const;
173+
174+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
175+
kernel_queue_specific::max_work_item_sizes<3>::return_type
176+
kernel::ext_oneapi_get_info<
177+
ext::oneapi::experimental::info::kernel_queue_specific::
178+
max_work_item_sizes<3>>(queue Queue) const;
179+
180+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
181+
kernel_queue_specific::max_sub_group_size::return_type
182+
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
183+
kernel_queue_specific::max_sub_group_size>(
184+
queue Queue, const range<3> &) const;
185+
186+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
187+
kernel_queue_specific::max_sub_group_size::return_type
188+
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
189+
kernel_queue_specific::max_sub_group_size>(
190+
queue Queue, const range<2> &) const;
191+
192+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
193+
kernel_queue_specific::max_sub_group_size::return_type
194+
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
195+
kernel_queue_specific::max_sub_group_size>(
196+
queue Queue, const range<1> &) const;
197+
198+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
199+
kernel_queue_specific::num_sub_groups::return_type
200+
kernel::ext_oneapi_get_info<
201+
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>(
202+
queue Queue, const range<3> &) const;
203+
204+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
205+
kernel_queue_specific::num_sub_groups::return_type
206+
kernel::ext_oneapi_get_info<
207+
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>(
208+
queue Queue, const range<2> &) const;
209+
210+
template __SYCL_EXPORT typename ext::oneapi::experimental::info::
211+
kernel_queue_specific::num_sub_groups::return_type
212+
kernel::ext_oneapi_get_info<
213+
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>(
214+
queue Queue, const range<1> &) const;
215+
138216
#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT) \
139217
template __SYCL_EXPORT ReturnT \
140218
kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>( \

0 commit comments

Comments
 (0)