-
Notifications
You must be signed in to change notification settings - Fork 787
SYCL ext one api launch queries implementation #16709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1ee3633
61dce52
76e7b8f
c746138
ae047a2
6da7073
8da4056
8ba6d06
78c54cb
38351d2
613aeac
f98a316
f09be72
f4ffa52
40cb651
bf3320b
ab51cf9
607f7a1
5900ea6
d3e7478
ae7ce87
d340ebd
1455516
bb127aa
e449b09
ea434e3
b7c07b6
1a5837b
d25062b
5df8cd3
703cf26
d2d29fa
85a88ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,5 @@ | ||
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,) | ||
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_work_group_size, size_t,) | ||
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_sub_group_size, uint32_t,) | ||
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, num_sub_groups, uint32_t,) | ||
__SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_work_item_sizes, sycl::id,) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,6 +162,39 @@ class kernel_impl { | |
ext_oneapi_get_info(queue Queue, const range<3> &MaxWorkGroupSize, | ||
size_t DynamicLocalMemorySize) const; | ||
|
||
/// Query queue/launch-specific information from a kernel using the | ||
/// info::kernel_queue_specific descriptor for a specific Queue and values. | ||
/// max_num_work_groups is the only valid descriptor for this function. | ||
/// | ||
/// \param Queue is a valid SYCL queue. | ||
/// \param WG is a work group size | ||
/// \return depends on information being queried. | ||
template <typename Param> | ||
typename Param::return_type ext_oneapi_get_info(queue Queue, | ||
const range<3> &WG) const; | ||
|
||
/// Query queue/launch-specific information from a kernel using the | ||
/// info::kernel_queue_specific descriptor for a specific Queue and values. | ||
/// max_num_work_groups is the only valid descriptor for this function. | ||
/// | ||
/// \param Queue is a valid SYCL queue. | ||
/// \param WG is a work group size | ||
/// \return depends on information being queried. | ||
template <typename Param> | ||
typename Param::return_type ext_oneapi_get_info(queue Queue, | ||
const range<2> &WG) const; | ||
|
||
/// Query queue/launch-specific information from a kernel using the | ||
/// info::kernel_queue_specific descriptor for a specific Queue and values. | ||
/// max_num_work_groups is the only valid descriptor for this function. | ||
/// | ||
/// \param Queue is a valid SYCL queue. | ||
/// \param WG is a work group size | ||
/// \return depends on information being queried. | ||
template <typename Param> | ||
typename Param::return_type ext_oneapi_get_info(queue Queue, | ||
const range<1> &WG) const; | ||
|
||
/// Get a constant reference to a raw kernel object. | ||
/// | ||
/// \return a constant reference to a valid UrKernel instance with raw | ||
|
@@ -383,6 +416,91 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: | |
DynamicLocalMemorySize); | ||
} | ||
|
||
template <> | ||
inline typename syclex::info::kernel_queue_specific::max_work_group_size:: | ||
return_type | ||
kernel_impl::ext_oneapi_get_info< | ||
syclex::info::kernel_queue_specific::max_work_group_size>( | ||
queue Queue) const { | ||
const auto &Adapter = getAdapter(); | ||
const auto DeviceNativeHandle = | ||
getSyclObjImpl(Queue.get_device())->getHandleRef(); | ||
|
||
size_t KernelWGSize = 0; | ||
Adapter->call<UrApiKind::urKernelGetGroupInfo>( | ||
MKernel, DeviceNativeHandle, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, | ||
sizeof(size_t), &KernelWGSize, nullptr); | ||
return KernelWGSize; | ||
} | ||
|
||
template <int Dimensions> | ||
inline sycl::id<Dimensions> | ||
generate_id(const sycl::range<Dimensions> &DevMaxWorkItemSizes, | ||
const size_t DevWgSize) { | ||
sycl::id<Dimensions> Ret; | ||
for (int i = 0; i < Dimensions; i++) { | ||
// DevMaxWorkItemSizes values are inverted, see | ||
// sycl/source/detail/device_info.hpp:582 | ||
Ret[i] = std::min(DevMaxWorkItemSizes[i], DevWgSize); | ||
} | ||
return Ret; | ||
} | ||
|
||
#define ADD_TEMPLATE_METHOD_SPEC(Num) \ | ||
template <> \ | ||
inline typename syclex::info::kernel_queue_specific::max_work_item_sizes< \ | ||
Num>::return_type \ | ||
kernel_impl::ext_oneapi_get_info< \ | ||
syclex::info::kernel_queue_specific::max_work_item_sizes<Num>>( \ | ||
queue Queue) const { \ | ||
const auto Dev = Queue.get_device(); \ | ||
const auto DeviceWgSize = \ | ||
get_info<info::kernel_device_specific::work_group_size>(Dev); \ | ||
const auto DeviceMaxWorkItemSizes = \ | ||
Dev.get_info<info::device::max_work_item_sizes<Num>>(); \ | ||
return generate_id<Num>(DeviceMaxWorkItemSizes, DeviceWgSize); \ | ||
} // namespace detail | ||
|
||
ADD_TEMPLATE_METHOD_SPEC(1) | ||
ADD_TEMPLATE_METHOD_SPEC(2) | ||
ADD_TEMPLATE_METHOD_SPEC(3) | ||
|
||
#undef ADD_TEMPLATE_METHOD_SPEC | ||
|
||
#define ADD_TEMPLATE_METHOD_SPEC(QueueSpec, Num, Kind, Reg) \ | ||
template <> \ | ||
inline typename syclex::info::kernel_queue_specific::QueueSpec::return_type \ | ||
kernel_impl::ext_oneapi_get_info< \ | ||
syclex::info::kernel_queue_specific::QueueSpec>( \ | ||
queue Queue, const range<Num> &WG) const { \ | ||
if (WG.size() == 0) \ | ||
throw exception(sycl::make_error_code(errc::invalid), \ | ||
"The work-group size cannot be zero."); \ | ||
const auto &Adapter = getAdapter(); \ | ||
const auto DeviceNativeHandle = \ | ||
getSyclObjImpl(Queue.get_device())->getHandleRef(); \ | ||
uint32_t KernelSubWGSize = 0; \ | ||
Adapter->call<UrApiKind::Kind>(MKernel, DeviceNativeHandle, Reg, \ | ||
sizeof(uint32_t), &KernelSubWGSize, \ | ||
nullptr); \ | ||
return KernelSubWGSize; \ | ||
} | ||
|
||
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 3, urKernelGetSubGroupInfo, | ||
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) | ||
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 2, urKernelGetSubGroupInfo, | ||
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) | ||
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 1, urKernelGetSubGroupInfo, | ||
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) | ||
|
||
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 3, urKernelGetSubGroupInfo, | ||
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) | ||
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 2, urKernelGetSubGroupInfo, | ||
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) | ||
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 1, urKernelGetSubGroupInfo, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit; Looks like they all use |
||
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) | ||
|
||
#undef ADD_TEMPLATE_METHOD_SPEC | ||
} // namespace detail | ||
} // namespace _V1 | ||
} // namespace sycl |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -119,6 +119,12 @@ kernel::ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize, | |
DynamicLocalMemorySize); | ||
} | ||
|
||
template <typename Param> | ||
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type | ||
kernel::ext_oneapi_get_info(queue Queue, const range<3> &WG) const { | ||
return impl->ext_oneapi_get_info<Param>(Queue, WG); | ||
} | ||
|
||
template <typename Param> | ||
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type | ||
kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize, | ||
|
@@ -127,6 +133,18 @@ kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize, | |
DynamicLocalMemorySize); | ||
} | ||
|
||
template <typename Param> | ||
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type | ||
kernel::ext_oneapi_get_info(queue Queue, const range<2> &WG) const { | ||
return impl->ext_oneapi_get_info<Param>(Queue, WG); | ||
} | ||
|
||
template <typename Param> | ||
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type | ||
kernel::ext_oneapi_get_info(queue Queue, const range<1> &WG) const { | ||
return impl->ext_oneapi_get_info<Param>(Queue, WG); | ||
} | ||
|
||
template <typename Param> | ||
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type | ||
kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize, | ||
|
@@ -135,6 +153,66 @@ kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize, | |
DynamicLocalMemorySize); | ||
} | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_work_group_size::return_type | ||
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_work_group_size>( | ||
queue Queue) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_work_item_sizes<1>::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_work_item_sizes<1>>(queue Queue) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_work_item_sizes<2>::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_work_item_sizes<2>>(queue Queue) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_work_item_sizes<3>::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_work_item_sizes<3>>(queue Queue) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_sub_group_size::return_type | ||
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_sub_group_size>( | ||
queue Queue, const range<3> &) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_sub_group_size::return_type | ||
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_sub_group_size>( | ||
queue Queue, const range<2> &) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_sub_group_size::return_type | ||
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info:: | ||
kernel_queue_specific::max_sub_group_size>( | ||
queue Queue, const range<1> &) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::num_sub_groups::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>( | ||
queue Queue, const range<3> &) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
kernel_queue_specific::num_sub_groups::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>( | ||
queue Queue, const range<2> &) const; | ||
|
||
template __SYCL_EXPORT typename ext::oneapi::experimental::info:: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lots of repeated code here, I would suggest that we hide it under a macro like it is done for |
||
kernel_queue_specific::num_sub_groups::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>( | ||
queue Queue, const range<1> &) const; | ||
|
||
#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT) \ | ||
template __SYCL_EXPORT ReturnT \ | ||
kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>( \ | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we perhaps use some kind of SFINAE here to explicitly check that
Param
has the value defined in the spec?It seems like at the moment we are doing SFINAE only on the return type by saying
Param::return_type
but there could be other descriptors that will have this type defined and we will end up instantiating declarations for them if the user supplies an invalidParam
by mistake. This could lead to some weird linker errors or some runtime bug.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't user-visible API. Don't we have proper constraints there?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The user-visible API in
kernel.hpp
which calls this one also does something similar withtypename detail::is_kernel_queue_specific_info_desc<Param>::return_type
which again, could potentially allow otherParam
types to slip through the cracks. Ideally, we'd like to follow the spec to the letter and mandateParam
to be as required by the spec through SFINAE, in this case for example, to bemax_num_work_groups
.