Skip to content

SYCL ext one api launch queries implementation #16709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1ee3633
[SYCL] implement max work group size query
dklochkov-emb Dec 9, 2024
61dce52
[SYCL] adding trats and dumps for new query
dklochkov-emb Dec 9, 2024
76e7b8f
[SYCL] rework max work group size
dklochkov-emb Dec 23, 2024
c746138
[SYCL] add get kernel sub group size query
dklochkov-emb Dec 30, 2024
ae047a2
[SYCL] add new queries
dklochkov-emb Jan 9, 2025
6da7073
[SYCL] make max_work_item_sizes dependent on dimensions
dklochkov-emb Jan 17, 2025
8da4056
[SYCL] make num_sub_groups and max_sub_group_size dependent from dime…
dklochkov-emb Jan 20, 2025
8ba6d06
[SYCL] update tests of launch queries
dklochkov-emb Jan 20, 2025
78c54cb
Merge branch 'sycl' into sycl-ext-one-api-launch-queries
dklochkov-emb Jan 21, 2025
38351d2
[SYCL] fix merge issues
dklochkov-emb Jan 21, 2025
613aeac
[SYCL] fix formatting
dklochkov-emb Jan 21, 2025
f98a316
[SYCL] fix formatting in tests
dklochkov-emb Jan 21, 2025
f09be72
[SYCL] use sub group struct in mock
dklochkov-emb Jan 22, 2025
f4ffa52
[SYCL] fix mock in unit tests
dklochkov-emb Jan 23, 2025
40cb651
[SYCL] do not include heavy headers into e2e test files
dklochkov-emb Jan 24, 2025
bf3320b
[SYCL] update includes in tests
dklochkov-emb Jan 24, 2025
ab51cf9
[SYCL] fix tests
dklochkov-emb Jan 24, 2025
607f7a1
[SYCL] rework e2e tests for sub group launch queries
dklochkov-emb Jan 24, 2025
5900ea6
[SYCL] fix assert
dklochkov-emb Jan 24, 2025
d3e7478
[SYCL] kernel value less or equal to dev value
dklochkov-emb Jan 24, 2025
ae7ce87
[SYCL] rework max work items sized tue to UR implementation
dklochkov-emb Jan 30, 2025
d340ebd
[SYCL] remove help info
dklochkov-emb Jan 30, 2025
1455516
[SYCL] do not revert values again
dklochkov-emb Jan 31, 2025
bb127aa
[SYCL] move e2e launch queries tests from Basic into separate folder
dklochkov-emb Feb 3, 2025
e449b09
[SYCL] rename template parameter
dklochkov-emb Feb 4, 2025
ea434e3
[SYCL] fix formatting
dklochkov-emb Feb 10, 2025
b7c07b6
Merge remote-tracking branch 'upstream/sycl' into sycl-ext-one-api-la…
dklochkov-emb Feb 11, 2025
1a5837b
Merge remote-tracking branch 'upstream/sycl' into sycl-ext-one-api-la…
dklochkov-emb Feb 14, 2025
d25062b
[SYCL] Update param description of new queries
dklochkov-emb Feb 21, 2025
5df8cd3
fix variables name
dklochkov-emb Feb 24, 2025
703cf26
[SYCL] fix typos and add lines to the end of files
dklochkov-emb Feb 24, 2025
d2d29fa
[SYCL] fix formatting
dklochkov-emb Feb 24, 2025
85a88ee
Merge remote-tracking branch 'origin/sycl' into sycl-ext-one-api-laun…
dklochkov-emb Feb 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions sycl/include/sycl/detail/info_desc_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,19 @@ struct IsKernelInfo<info::kernel_device_specific::ext_codeplay_num_regs>
: std::true_type { \
using return_type = Namespace::info::DescType::Desc::return_type; \
};

#define __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(Namespace, Desctype, Desc, \
ReturnT, UrCode) \
template <int Dimensions> \
struct is_##Desctype##_info_desc< \
Namespace::info::Desctype::Desc<Dimensions>> : std::true_type { \
using return_type = \
typename Namespace::info::Desctype::Desc<Dimensions>::return_type; \
};

#include <sycl/info/ext_oneapi_kernel_queue_specific_traits.def>
#undef __SYCL_PARAM_TRAITS_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC

#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \
template <> \
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,)
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_work_group_size, size_t,)
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_sub_group_size, uint32_t,)
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, num_sub_groups, uint32_t,)
__SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_work_item_sizes, sycl::id,)
11 changes: 11 additions & 0 deletions sycl/include/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,16 @@ template <typename T, T param> struct compatibility_param_traits {};
} /*namespace info */ \
} /*namespace Namespace */

#define __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC(Namespace, Desctype, Desc, \
ReturnT, UrCode) \
namespace Namespace::info { \
namespace Desctype { \
template <int Dimensions> struct Desc { \
using return_type = ReturnT<Dimensions>; \
}; \
} \
}

namespace ext::oneapi::experimental::info::device {
template <int Dimensions> struct max_work_groups;
template <ext::oneapi::experimental::execution_scope CoordinationScope>
Expand All @@ -256,5 +266,6 @@ struct work_item_progress_capabilities;

#undef __SYCL_PARAM_TRAITS_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_PARTIAL_SPEC
} // namespace _V1
} // namespace sycl
30 changes: 30 additions & 0 deletions sycl/include/sycl/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,36 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase<kernel> {
ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
size_t DynamicLocalMemorySize) const;

/// Query queue/launch-specific information from a kernel using the
/// info::kernel_queue_specific descriptor for a specific Queue and values.
///
/// \param Queue is a valid SYCL queue.
/// \param WG workgroup
/// \return depends on information being queried.
template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
ext_oneapi_get_info(queue Queue, const range<3> &WG) const;

/// Query queue/launch-specific information from a kernel using the
/// info::kernel_queue_specific descriptor for a specific Queue and values.
///
/// \param Queue is a valid SYCL queue.
/// \param WG workgroup
/// \return depends on information being queried.
template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
ext_oneapi_get_info(queue Queue, const range<2> &WG) const;

/// Query queue/launch-specific information from a kernel using the
/// info::kernel_queue_specific descriptor for a specific Queue and values.
///
/// \param Queue is a valid SYCL queue.
/// \param WG workgroup
/// \return depends on information being queried.
template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
ext_oneapi_get_info(queue Queue, const range<1> &WG) const;

private:
/// Constructs a SYCL kernel object from a valid kernel_impl instance.
kernel(std::shared_ptr<detail::kernel_impl> Impl);
Expand Down
118 changes: 118 additions & 0 deletions sycl/source/detail/kernel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,39 @@ class kernel_impl {
ext_oneapi_get_info(queue Queue, const range<3> &MaxWorkGroupSize,
size_t DynamicLocalMemorySize) const;

/// Query queue/launch-specific information from a kernel using the
/// info::kernel_queue_specific descriptor for a specific Queue and values.
/// max_num_work_groups is the only valid descriptor for this function.
///
/// \param Queue is a valid SYCL queue.
/// \param WG is a work group size
/// \return depends on information being queried.
template <typename Param>
Copy link
Contributor

@lbushi25 lbushi25 Feb 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we perhaps use some kind of SFINAE here to explicitly check that Param has the value defined in the spec?

It seems like at the moment we are doing SFINAE only on the return type by saying Param::return_type but there could be other descriptors that will have this type defined and we will end up instantiating declarations for them if the user supplies an invalid Param by mistake. This could lead to some weird linker errors or some runtime bug.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't user-visible API. Don't we have proper constraints there?

Copy link
Contributor

@lbushi25 lbushi25 Feb 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The user-visible API in kernel.hpp which calls this one also does something similar with typename detail::is_kernel_queue_specific_info_desc<Param>::return_type which again, could potentially allow other Param types to slip through the cracks. Ideally, we'd like to follow the spec to the letter and mandate Param to be as required by the spec through SFINAE, in this case for example, to be max_num_work_groups.

typename Param::return_type ext_oneapi_get_info(queue Queue,
const range<3> &WG) const;

/// Query queue/launch-specific information from a kernel using the
/// info::kernel_queue_specific descriptor for a specific Queue and values.
/// max_num_work_groups is the only valid descriptor for this function.
///
/// \param Queue is a valid SYCL queue.
/// \param WG is a work group size
/// \return depends on information being queried.
template <typename Param>
typename Param::return_type ext_oneapi_get_info(queue Queue,
const range<2> &WG) const;

/// Query queue/launch-specific information from a kernel using the
/// info::kernel_queue_specific descriptor for a specific Queue and values.
/// max_num_work_groups is the only valid descriptor for this function.
///
/// \param Queue is a valid SYCL queue.
/// \param WG is a work group size
/// \return depends on information being queried.
template <typename Param>
typename Param::return_type ext_oneapi_get_info(queue Queue,
const range<1> &WG) const;

/// Get a constant reference to a raw kernel object.
///
/// \return a constant reference to a valid UrKernel instance with raw
Expand Down Expand Up @@ -383,6 +416,91 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
DynamicLocalMemorySize);
}

template <>
inline typename syclex::info::kernel_queue_specific::max_work_group_size::
return_type
kernel_impl::ext_oneapi_get_info<
syclex::info::kernel_queue_specific::max_work_group_size>(
queue Queue) const {
const auto &Adapter = getAdapter();
const auto DeviceNativeHandle =
getSyclObjImpl(Queue.get_device())->getHandleRef();

size_t KernelWGSize = 0;
Adapter->call<UrApiKind::urKernelGetGroupInfo>(
MKernel, DeviceNativeHandle, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE,
sizeof(size_t), &KernelWGSize, nullptr);
return KernelWGSize;
}

template <int Dimensions>
inline sycl::id<Dimensions>
generate_id(const sycl::range<Dimensions> &DevMaxWorkItemSizes,
const size_t DevWgSize) {
sycl::id<Dimensions> Ret;
for (int i = 0; i < Dimensions; i++) {
// DevMaxWorkItemSizes values are inverted, see
// sycl/source/detail/device_info.hpp:582
Ret[i] = std::min(DevMaxWorkItemSizes[i], DevWgSize);
}
return Ret;
}

#define ADD_TEMPLATE_METHOD_SPEC(Num) \
template <> \
inline typename syclex::info::kernel_queue_specific::max_work_item_sizes< \
Num>::return_type \
kernel_impl::ext_oneapi_get_info< \
syclex::info::kernel_queue_specific::max_work_item_sizes<Num>>( \
queue Queue) const { \
const auto Dev = Queue.get_device(); \
const auto DeviceWgSize = \
get_info<info::kernel_device_specific::work_group_size>(Dev); \
const auto DeviceMaxWorkItemSizes = \
Dev.get_info<info::device::max_work_item_sizes<Num>>(); \
return generate_id<Num>(DeviceMaxWorkItemSizes, DeviceWgSize); \
} // namespace detail

ADD_TEMPLATE_METHOD_SPEC(1)
ADD_TEMPLATE_METHOD_SPEC(2)
ADD_TEMPLATE_METHOD_SPEC(3)

#undef ADD_TEMPLATE_METHOD_SPEC

#define ADD_TEMPLATE_METHOD_SPEC(QueueSpec, Num, Kind, Reg) \
template <> \
inline typename syclex::info::kernel_queue_specific::QueueSpec::return_type \
kernel_impl::ext_oneapi_get_info< \
syclex::info::kernel_queue_specific::QueueSpec>( \
queue Queue, const range<Num> &WG) const { \
if (WG.size() == 0) \
throw exception(sycl::make_error_code(errc::invalid), \
"The work-group size cannot be zero."); \
const auto &Adapter = getAdapter(); \
const auto DeviceNativeHandle = \
getSyclObjImpl(Queue.get_device())->getHandleRef(); \
uint32_t KernelSubWGSize = 0; \
Adapter->call<UrApiKind::Kind>(MKernel, DeviceNativeHandle, Reg, \
sizeof(uint32_t), &KernelSubWGSize, \
nullptr); \
return KernelSubWGSize; \
}

ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 3, urKernelGetSubGroupInfo,
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE)
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 2, urKernelGetSubGroupInfo,
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE)
ADD_TEMPLATE_METHOD_SPEC(max_sub_group_size, 1, urKernelGetSubGroupInfo,
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE)

ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 3, urKernelGetSubGroupInfo,
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS)
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 2, urKernelGetSubGroupInfo,
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS)
ADD_TEMPLATE_METHOD_SPEC(num_sub_groups, 1, urKernelGetSubGroupInfo,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit; Looks like they all use urKernelGetSubGroupInfo as their Kind argument. I think it would be easier to read if you just inline it instead.

UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS)

#undef ADD_TEMPLATE_METHOD_SPEC
} // namespace detail
} // namespace _V1
} // namespace sycl
78 changes: 78 additions & 0 deletions sycl/source/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ kernel::ext_oneapi_get_info(queue Queue, const range<1> &WorkGroupSize,
DynamicLocalMemorySize);
}

template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
kernel::ext_oneapi_get_info(queue Queue, const range<3> &WG) const {
return impl->ext_oneapi_get_info<Param>(Queue, WG);
}

template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize,
Expand All @@ -127,6 +133,18 @@ kernel::ext_oneapi_get_info(queue Queue, const range<2> &WorkGroupSize,
DynamicLocalMemorySize);
}

template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
kernel::ext_oneapi_get_info(queue Queue, const range<2> &WG) const {
return impl->ext_oneapi_get_info<Param>(Queue, WG);
}

template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
kernel::ext_oneapi_get_info(queue Queue, const range<1> &WG) const {
return impl->ext_oneapi_get_info<Param>(Queue, WG);
}

template <typename Param>
typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
Expand All @@ -135,6 +153,66 @@ kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
DynamicLocalMemorySize);
}

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_work_group_size::return_type
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
kernel_queue_specific::max_work_group_size>(
queue Queue) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_work_item_sizes<1>::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::
max_work_item_sizes<1>>(queue Queue) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_work_item_sizes<2>::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::
max_work_item_sizes<2>>(queue Queue) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_work_item_sizes<3>::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::
max_work_item_sizes<3>>(queue Queue) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_sub_group_size::return_type
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
kernel_queue_specific::max_sub_group_size>(
queue Queue, const range<3> &) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_sub_group_size::return_type
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
kernel_queue_specific::max_sub_group_size>(
queue Queue, const range<2> &) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::max_sub_group_size::return_type
kernel::ext_oneapi_get_info<ext::oneapi::experimental::info::
kernel_queue_specific::max_sub_group_size>(
queue Queue, const range<1> &) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::num_sub_groups::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>(
queue Queue, const range<3> &) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
kernel_queue_specific::num_sub_groups::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>(
queue Queue, const range<2> &) const;

template __SYCL_EXPORT typename ext::oneapi::experimental::info::
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lots of repeated code here, I would suggest that we hide it under a macro like it is done for max_num_work_groups below

kernel_queue_specific::num_sub_groups::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::num_sub_groups>(
queue Queue, const range<1> &) const;

#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT) \
template __SYCL_EXPORT ReturnT \
kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>( \
Expand Down
Loading