@@ -1208,8 +1208,8 @@ class __SYCL_EXPORT handler {
1208
1208
using KName = std::conditional_t <std::is_same<KernelType, NameT>::value,
1209
1209
decltype (Wrapper), NameWT>;
1210
1210
1211
- kernel_parallel_for_wrapper<KName, TransformedArgType , decltype (Wrapper),
1212
- PropertiesT>( Wrapper);
1211
+ KernelWrapper<WrapAs::parallel_for, KName , decltype (Wrapper),
1212
+ TransformedArgType, PropertiesT>:: wrap ( this , Wrapper);
1213
1213
#ifndef __SYCL_DEVICE_ONLY__
1214
1214
verifyUsedKernelBundleInternal (
1215
1215
detail::string_view{detail::getKernelName<NameT>()});
@@ -1234,8 +1234,8 @@ class __SYCL_EXPORT handler {
1234
1234
#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1235
1235
// If parallel_for range rounding is forced then only range rounded
1236
1236
// kernel is generated
1237
- kernel_parallel_for_wrapper<NameT, TransformedArgType , KernelType,
1238
- PropertiesT>( KernelFunc);
1237
+ KernelWrapper<WrapAs::parallel_for, NameT , KernelType, TransformedArgType ,
1238
+ PropertiesT>:: wrap ( this , KernelFunc);
1239
1239
#ifndef __SYCL_DEVICE_ONLY__
1240
1240
verifyUsedKernelBundleInternal (
1241
1241
detail::string_view{detail::getKernelName<NameT>()});
@@ -1283,8 +1283,8 @@ class __SYCL_EXPORT handler {
1283
1283
1284
1284
(void )ExecutionRange;
1285
1285
(void )Props;
1286
- kernel_parallel_for_wrapper<NameT, TransformedArgType , KernelType,
1287
- PropertiesT>( KernelFunc);
1286
+ KernelWrapper<WrapAs::parallel_for, NameT , KernelType, TransformedArgType ,
1287
+ PropertiesT>:: wrap ( this , KernelFunc);
1288
1288
#ifndef __SYCL_DEVICE_ONLY__
1289
1289
throwIfActionIsCreated ();
1290
1290
verifyUsedKernelBundleInternal (
@@ -1371,8 +1371,8 @@ class __SYCL_EXPORT handler {
1371
1371
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1372
1372
(void )NumWorkGroups;
1373
1373
(void )Props;
1374
- kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType , KernelType,
1375
- PropertiesT>( KernelFunc);
1374
+ KernelWrapper<WrapAs::parallel_for_work_group, NameT , KernelType,
1375
+ LambdaArgType, PropertiesT>:: wrap ( this , KernelFunc);
1376
1376
#ifndef __SYCL_DEVICE_ONLY__
1377
1377
throwIfActionIsCreated ();
1378
1378
verifyUsedKernelBundleInternal (
@@ -1413,8 +1413,8 @@ class __SYCL_EXPORT handler {
1413
1413
(void )NumWorkGroups;
1414
1414
(void )WorkGroupSize;
1415
1415
(void )Props;
1416
- kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType , KernelType,
1417
- PropertiesT>( KernelFunc);
1416
+ KernelWrapper<WrapAs::parallel_for_work_group, NameT , KernelType,
1417
+ LambdaArgType, PropertiesT>:: wrap ( this , KernelFunc);
1418
1418
#ifndef __SYCL_DEVICE_ONLY__
1419
1419
throwIfActionIsCreated ();
1420
1420
verifyUsedKernelBundleInternal (
@@ -1554,127 +1554,79 @@ class __SYCL_EXPORT handler {
1554
1554
#endif
1555
1555
}
1556
1556
1557
- template <typename ... Props> struct KernelPropertiesUnpackerImpl {
1558
- // Just pass extra Props... as template parameters to the underlying
1559
- // Caller->* member functions. Don't have reflection so try to use
1560
- // templates as much as possible to reduce the amount of boilerplate code
1561
- // needed. All the type checks are expected to be done at the Caller's
1562
- // methods side.
1563
-
1564
- template <typename ... TypesToForward, typename ... ArgsTy>
1565
- static void kernel_single_task_unpack (handler *h, ArgsTy &&...Args) {
1566
- h->kernel_single_task <TypesToForward..., Props...>(
1567
- std::forward<ArgsTy>(Args)...);
1568
- }
1569
-
1570
- template <typename ... TypesToForward, typename ... ArgsTy>
1571
- static void kernel_parallel_for_unpack (handler *h, ArgsTy &&...Args) {
1572
- h->kernel_parallel_for <TypesToForward..., Props...>(
1573
- std::forward<ArgsTy>(Args)...);
1574
- }
1575
-
1576
- template <typename ... TypesToForward, typename ... ArgsTy>
1577
- static void kernel_parallel_for_work_group_unpack (handler *h,
1578
- ArgsTy &&...Args) {
1579
- h->kernel_parallel_for_work_group <TypesToForward..., Props...>(
1580
- std::forward<ArgsTy>(Args)...);
1581
- }
1582
- };
1583
-
1584
- template <typename PropertiesT>
1585
- struct KernelPropertiesUnpacker : public KernelPropertiesUnpackerImpl <> {
1586
- // This should always fail outside the specialization below but must be
1587
- // dependent to avoid failing even if not instantiated.
1588
- static_assert (
1589
- ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1590
- " Template type is not a property list." );
1591
- };
1592
-
1593
- template <typename ... Props>
1594
- struct KernelPropertiesUnpacker <
1595
- ext::oneapi::experimental::detail::properties_t <Props...>>
1596
- : public KernelPropertiesUnpackerImpl<Props...> {};
1597
-
1598
- // Helper function to
1599
- //
1600
- // * Make use of the KernelPropertiesUnpacker above
1601
- // * Decide if we need an extra kernel_handler parameter
1557
+ // The KernelWrapper below has two purposes.
1602
1558
//
1603
- // The interface uses a \p Lambda callback to propagate that information back
1604
- // to the caller as we need the caller to communicate:
1559
+ // First, from SYCL 2020, Table 129 (Member functions of the `handler ` class)
1560
+ // > The callable ... can optionally take a `kernel_handler` ... in
1561
+ // which > case the SYCL runtime will construct an instance of
1562
+ // `kernel_handler` > and pass it to the callable.
1605
1563
//
1606
- // * Name of the method to call
1607
- // * Provide explicit template type parameters for the call
1564
+ // Note: "..." due to slight wording variability between
1565
+ // single_task/parallel_for (e.g. only parameter vs last). This helper class
1566
+ // calls `kernel_*` entry points (both hardcoded names known to FE and special
1567
+ // device-specific entry point attributes) with proper arguments (with/without
1568
+ // `kernel_handler` argument, depending on the signature of the SYCL kernel
1569
+ // function).
1608
1570
//
1609
- // Couldn't think of a better way to achieve both.
1610
- template <typename KernelName, typename KernelType, typename PropertiesT,
1611
- bool HasKernelHandlerArg, typename FuncTy>
1612
- void unpack (const KernelType &KernelFunc, FuncTy Lambda) {
1613
- #ifdef __SYCL_DEVICE_ONLY__
1614
- detail::CheckDeviceCopyable<KernelType>();
1615
- #endif // __SYCL_DEVICE_ONLY__
1616
- using MergedPropertiesT =
1617
- typename detail::GetMergedKernelProperties<KernelType,
1618
- PropertiesT>::type;
1619
- using Unpacker = KernelPropertiesUnpacker<MergedPropertiesT>;
1620
- #ifndef __SYCL_DEVICE_ONLY__
1621
- // If there are properties provided by get method then process them.
1622
- if constexpr (ext::oneapi::experimental::detail::
1623
- HasKernelPropertiesGetMethod<const KernelType &>::value) {
1624
- processProperties<detail::isKernelESIMD<KernelName>()>(
1625
- KernelFunc.get (ext::oneapi::experimental::properties_tag{}));
1626
- }
1627
- #endif
1628
- if constexpr (HasKernelHandlerArg) {
1629
- kernel_handler KH;
1630
- Lambda (Unpacker{}, this , KernelFunc, KH);
1631
- } else {
1632
- Lambda (Unpacker{}, this , KernelFunc);
1633
- }
1634
- }
1571
+ // Second, it performs a few checks and some properties processing (including
1572
+ // the one provided via `sycl_ext_oneapi_kernel_properties` extension by
1573
+ // embedding them into the kernel's type).
1635
1574
1636
- // NOTE: to support kernel_handler argument in kernel lambdas, only
1637
- // kernel_***_wrapper functions must be called in this code
1575
+ enum class WrapAs { single_task, parallel_for, parallel_for_work_group };
1638
1576
1639
1577
template <
1640
- typename KernelName, typename KernelType,
1641
- typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1642
- void kernel_single_task_wrapper (const KernelType &KernelFunc) {
1643
- unpack<KernelName, KernelType, PropertiesT,
1644
- detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
1645
- KernelFunc, [&](auto Unpacker, auto &&...args ) {
1646
- Unpacker.template kernel_single_task_unpack <KernelName, KernelType>(
1578
+ WrapAs WrapAsVal, typename KernelName, typename KernelType,
1579
+ typename ElementType,
1580
+ typename PropertiesT = ext::oneapi::experimental::empty_properties_t ,
1581
+ typename MergedPropertiesT = typename detail::GetMergedKernelProperties<
1582
+ KernelType, PropertiesT>::type>
1583
+ struct KernelWrapper ;
1584
+ template <WrapAs WrapAsVal, typename KernelName, typename KernelType,
1585
+ typename ElementType, typename PropertiesT, typename ... MergedProps>
1586
+ struct KernelWrapper <
1587
+ WrapAsVal, KernelName, KernelType, ElementType, PropertiesT,
1588
+ ext::oneapi::experimental::detail::properties_t <MergedProps...>> {
1589
+ static void wrap (handler *h, const KernelType &KernelFunc) {
1590
+ #ifdef __SYCL_DEVICE_ONLY__
1591
+ detail::CheckDeviceCopyable<KernelType>();
1592
+ #else
1593
+ // If there are properties provided by get method then process them.
1594
+ if constexpr (ext::oneapi::experimental::detail::
1595
+ HasKernelPropertiesGetMethod<
1596
+ const KernelType &>::value) {
1597
+ h->processProperties <detail::isKernelESIMD<KernelName>()>(
1598
+ KernelFunc.get (ext::oneapi::experimental::properties_tag{}));
1599
+ }
1600
+ #endif
1601
+ auto L = [&](auto &&...args ) {
1602
+ if constexpr (WrapAsVal == WrapAs::single_task) {
1603
+ h->kernel_single_task <KernelName, KernelType, MergedProps...>(
1647
1604
std::forward<decltype (args)>(args)...);
1648
- });
1649
- }
1650
-
1651
- template <
1652
- typename KernelName, typename ElementType, typename KernelType,
1653
- typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1654
- void kernel_parallel_for_wrapper (const KernelType &KernelFunc) {
1655
- unpack<KernelName, KernelType, PropertiesT,
1656
- detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1657
- ElementType>::value>(
1658
- KernelFunc, [&](auto Unpacker, auto &&...args ) {
1659
- Unpacker.template kernel_parallel_for_unpack <KernelName, ElementType,
1660
- KernelType>(
1605
+ } else if constexpr (WrapAsVal == WrapAs::parallel_for) {
1606
+ h->kernel_parallel_for <KernelName, ElementType, KernelType,
1607
+ MergedProps...>(
1661
1608
std::forward<decltype (args)>(args)...);
1662
- });
1663
- }
1664
-
1665
- template <
1666
- typename KernelName, typename ElementType, typename KernelType,
1667
- typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1668
- void kernel_parallel_for_work_group_wrapper (const KernelType &KernelFunc) {
1669
- unpack<KernelName, KernelType, PropertiesT,
1670
- detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1671
- ElementType>::value>(
1672
- KernelFunc, [&](auto Unpacker, auto &&...args ) {
1673
- Unpacker.template kernel_parallel_for_work_group_unpack <
1674
- KernelName, ElementType, KernelType>(
1609
+ } else if constexpr (WrapAsVal == WrapAs::parallel_for_work_group) {
1610
+ h->kernel_parallel_for_work_group <KernelName, ElementType, KernelType,
1611
+ MergedProps...>(
1675
1612
std::forward<decltype (args)>(args)...);
1676
- });
1677
- }
1613
+ } else {
1614
+ // Always false, but template-dependent.
1615
+ static_assert (WrapAsVal != WrapAsVal, " Unexpected WrapAsVal" );
1616
+ }
1617
+ };
1618
+ if constexpr (detail::KernelLambdaHasKernelHandlerArgT<
1619
+ KernelType, ElementType>::value) {
1620
+ kernel_handler KH;
1621
+ L (KernelFunc, KH);
1622
+ } else {
1623
+ L (KernelFunc);
1624
+ }
1625
+ }
1626
+ };
1627
+
1628
+ // NOTE: to support kernel_handler argument in kernel lambdas, only
1629
+ // KernelWrapper<...>::wrap() must be called in this code.
1678
1630
1679
1631
// / Defines and invokes a SYCL kernel function as a function object type.
1680
1632
// /
@@ -1694,7 +1646,8 @@ class __SYCL_EXPORT handler {
1694
1646
using NameT =
1695
1647
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
1696
1648
1697
- kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(KernelFunc);
1649
+ KernelWrapper<WrapAs::single_task, NameT, KernelType, void ,
1650
+ PropertiesT>::wrap (this , KernelFunc);
1698
1651
#ifndef __SYCL_DEVICE_ONLY__
1699
1652
throwIfActionIsCreated ();
1700
1653
throwOnKernelParameterMisuse<KernelName, KernelType>();
@@ -1997,7 +1950,8 @@ class __SYCL_EXPORT handler {
1997
1950
typename TransformUserItemType<Dims, LambdaArgType>::type>;
1998
1951
(void )NumWorkItems;
1999
1952
(void )WorkItemOffset;
2000
- kernel_parallel_for_wrapper<NameT, TransformedArgType>(KernelFunc);
1953
+ KernelWrapper<WrapAs::parallel_for, NameT, KernelType,
1954
+ TransformedArgType>::wrap (this , KernelFunc);
2001
1955
#ifndef __SYCL_DEVICE_ONLY__
2002
1956
throwIfActionIsCreated ();
2003
1957
verifyUsedKernelBundleInternal (
@@ -2173,7 +2127,8 @@ class __SYCL_EXPORT handler {
2173
2127
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2174
2128
(void )Kernel;
2175
2129
(void )NumWorkItems;
2176
- kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2130
+ KernelWrapper<WrapAs::parallel_for, NameT, KernelType, LambdaArgType>::wrap (
2131
+ this , KernelFunc);
2177
2132
#ifndef __SYCL_DEVICE_ONLY__
2178
2133
throwIfActionIsCreated ();
2179
2134
verifyUsedKernelBundleInternal (
@@ -2211,7 +2166,8 @@ class __SYCL_EXPORT handler {
2211
2166
(void )Kernel;
2212
2167
(void )NumWorkItems;
2213
2168
(void )WorkItemOffset;
2214
- kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2169
+ KernelWrapper<WrapAs::parallel_for, NameT, KernelType, LambdaArgType>::wrap (
2170
+ this , KernelFunc);
2215
2171
#ifndef __SYCL_DEVICE_ONLY__
2216
2172
throwIfActionIsCreated ();
2217
2173
// Ignore any set kernel bundles and use the one associated with the kernel
@@ -2250,7 +2206,8 @@ class __SYCL_EXPORT handler {
2250
2206
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
2251
2207
(void )Kernel;
2252
2208
(void )NDRange;
2253
- kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2209
+ KernelWrapper<WrapAs::parallel_for, NameT, KernelType, LambdaArgType>::wrap (
2210
+ this , KernelFunc);
2254
2211
#ifndef __SYCL_DEVICE_ONLY__
2255
2212
throwIfActionIsCreated ();
2256
2213
// Ignore any set kernel bundles and use the one associated with the kernel
@@ -2293,7 +2250,8 @@ class __SYCL_EXPORT handler {
2293
2250
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2294
2251
(void )Kernel;
2295
2252
(void )NumWorkGroups;
2296
- kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
2253
+ KernelWrapper<WrapAs::parallel_for_work_group, NameT, KernelType,
2254
+ LambdaArgType>::wrap (this , KernelFunc);
2297
2255
#ifndef __SYCL_DEVICE_ONLY__
2298
2256
throwIfActionIsCreated ();
2299
2257
// Ignore any set kernel bundles and use the one associated with the kernel
@@ -2335,7 +2293,8 @@ class __SYCL_EXPORT handler {
2335
2293
(void )Kernel;
2336
2294
(void )NumWorkGroups;
2337
2295
(void )WorkGroupSize;
2338
- kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
2296
+ KernelWrapper<WrapAs::parallel_for_work_group, NameT, KernelType,
2297
+ LambdaArgType>::wrap (this , KernelFunc);
2339
2298
#ifndef __SYCL_DEVICE_ONLY__
2340
2299
throwIfActionIsCreated ();
2341
2300
// Ignore any set kernel bundles and use the one associated with the kernel
0 commit comments