Skip to content

Commit f126cf5

Browse files
[SYCL][NFC] Optimize verifyUsedKernelBundle usage (#15496)
This PR intended to reduce amount of template instantiations performed during both host & device compilation passes, as well as reduce amount of LLVM IR emitted during host compilation pass. `handler::verifyUsedKernelBundle` is a one-line helper which was inlined to avoid one instantiation per-kernel and emission of associated LLVM IR. Its uses (now inlined) were also moved under `#ifndef __SYCL_DEVICE_ONLY__` to reduce amount of code device compiler has to parse.
1 parent 4040a7e commit f126cf5

File tree

1 file changed

+26
-16
lines changed

1 file changed

+26
-16
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -774,10 +774,6 @@ class __SYCL_EXPORT handler {
774774
///
775775
/// \param KernelName is the name of the SYCL kernel to check that the used
776776
/// kernel bundle contains.
777-
template <typename KernelNameT> void verifyUsedKernelBundle() {
778-
verifyUsedKernelBundleInternal(
779-
detail::string_view{detail::getKernelName<KernelNameT>()});
780-
}
781777
void verifyUsedKernelBundleInternal(detail::string_view KernelName);
782778

783779
/// Stores lambda to the template-free object
@@ -1233,7 +1229,6 @@ class __SYCL_EXPORT handler {
12331229
// conflicts they should be included in the name.
12341230
using NameT =
12351231
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1236-
verifyUsedKernelBundle<NameT>();
12371232

12381233
// Range rounding can be disabled by the user.
12391234
// Range rounding is not done on the host device.
@@ -1254,6 +1249,8 @@ class __SYCL_EXPORT handler {
12541249
kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
12551250
PropertiesT>(Wrapper);
12561251
#ifndef __SYCL_DEVICE_ONLY__
1252+
verifyUsedKernelBundleInternal(
1253+
detail::string_view{detail::getKernelName<NameT>()});
12571254
// We are executing over the rounded range, but there are still
12581255
// items/ids that are are constructed in ther range rounded
12591256
// kernel use items/ids in the user range, which means that
@@ -1279,6 +1276,8 @@ class __SYCL_EXPORT handler {
12791276
kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
12801277
PropertiesT>(KernelFunc);
12811278
#ifndef __SYCL_DEVICE_ONLY__
1279+
verifyUsedKernelBundleInternal(
1280+
detail::string_view{detail::getKernelName<NameT>()});
12821281
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
12831282
detail::checkValueRange<Dims>(UserRange);
12841283
setNDRangeDescriptor(std::move(UserRange));
@@ -1315,7 +1314,6 @@ class __SYCL_EXPORT handler {
13151314
// conflicts they should be included in the name.
13161315
using NameT =
13171316
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1318-
verifyUsedKernelBundle<NameT>();
13191317
using LambdaArgType =
13201318
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
13211319
static_assert(
@@ -1329,6 +1327,8 @@ class __SYCL_EXPORT handler {
13291327
kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
13301328
PropertiesT>(KernelFunc);
13311329
#ifndef __SYCL_DEVICE_ONLY__
1330+
verifyUsedKernelBundleInternal(
1331+
detail::string_view{detail::getKernelName<NameT>()});
13321332
detail::checkValueRange<Dims>(ExecutionRange);
13331333
setNDRangeDescriptor(std::move(ExecutionRange));
13341334
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
@@ -1405,14 +1405,15 @@ class __SYCL_EXPORT handler {
14051405
// conflicts they should be included in the name.
14061406
using NameT =
14071407
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1408-
verifyUsedKernelBundle<NameT>();
14091408
using LambdaArgType =
14101409
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
14111410
(void)NumWorkGroups;
14121411
(void)Props;
14131412
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
14141413
PropertiesT>(KernelFunc);
14151414
#ifndef __SYCL_DEVICE_ONLY__
1415+
verifyUsedKernelBundleInternal(
1416+
detail::string_view{detail::getKernelName<NameT>()});
14161417
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
14171418
detail::checkValueRange<Dims>(NumWorkGroups);
14181419
setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true);
@@ -1446,7 +1447,6 @@ class __SYCL_EXPORT handler {
14461447
// conflicts they should be included in the name.
14471448
using NameT =
14481449
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1449-
verifyUsedKernelBundle<NameT>();
14501450
using LambdaArgType =
14511451
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
14521452
(void)NumWorkGroups;
@@ -1455,6 +1455,8 @@ class __SYCL_EXPORT handler {
14551455
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
14561456
PropertiesT>(KernelFunc);
14571457
#ifndef __SYCL_DEVICE_ONLY__
1458+
verifyUsedKernelBundleInternal(
1459+
detail::string_view{detail::getKernelName<NameT>()});
14581460
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
14591461
nd_range<Dims> ExecRange =
14601462
nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
@@ -1728,9 +1730,10 @@ class __SYCL_EXPORT handler {
17281730
using NameT =
17291731
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
17301732

1731-
verifyUsedKernelBundle<NameT>();
17321733
kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(KernelFunc);
17331734
#ifndef __SYCL_DEVICE_ONLY__
1735+
verifyUsedKernelBundleInternal(
1736+
detail::string_view{detail::getKernelName<NameT>()});
17341737
// No need to check if range is out of INT_MAX limits as it's compile-time
17351738
// known constant.
17361739
setNDRangeDescriptor(range<1>{1});
@@ -2024,7 +2027,6 @@ class __SYCL_EXPORT handler {
20242027
throwIfActionIsCreated();
20252028
using NameT =
20262029
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2027-
verifyUsedKernelBundle<NameT>();
20282030
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
20292031
using TransformedArgType = std::conditional_t<
20302032
std::is_integral<LambdaArgType>::value && Dims == 1, item<Dims>,
@@ -2033,6 +2035,8 @@ class __SYCL_EXPORT handler {
20332035
(void)WorkItemOffset;
20342036
kernel_parallel_for_wrapper<NameT, TransformedArgType>(KernelFunc);
20352037
#ifndef __SYCL_DEVICE_ONLY__
2038+
verifyUsedKernelBundleInternal(
2039+
detail::string_view{detail::getKernelName<NameT>()});
20362040
detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
20372041
setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
20382042
StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
@@ -2165,10 +2169,11 @@ class __SYCL_EXPORT handler {
21652169
setHandlerKernelBundle(Kernel);
21662170
using NameT =
21672171
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2168-
verifyUsedKernelBundle<NameT>();
21692172
(void)Kernel;
21702173
kernel_single_task<NameT>(KernelFunc);
21712174
#ifndef __SYCL_DEVICE_ONLY__
2175+
verifyUsedKernelBundleInternal(
2176+
detail::string_view{detail::getKernelName<NameT>()});
21722177
// No need to check if range is out of INT_MAX limits as it's compile-time
21732178
// known constant
21742179
setNDRangeDescriptor(range<1>{1});
@@ -2200,12 +2205,13 @@ class __SYCL_EXPORT handler {
22002205
setHandlerKernelBundle(Kernel);
22012206
using NameT =
22022207
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2203-
verifyUsedKernelBundle<NameT>();
22042208
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
22052209
(void)Kernel;
22062210
(void)NumWorkItems;
22072211
kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
22082212
#ifndef __SYCL_DEVICE_ONLY__
2213+
verifyUsedKernelBundleInternal(
2214+
detail::string_view{detail::getKernelName<NameT>()});
22092215
detail::checkValueRange<Dims>(NumWorkItems);
22102216
setNDRangeDescriptor(std::move(NumWorkItems));
22112217
MKernel = detail::getSyclObjImpl(std::move(Kernel));
@@ -2239,13 +2245,14 @@ class __SYCL_EXPORT handler {
22392245
setHandlerKernelBundle(Kernel);
22402246
using NameT =
22412247
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2242-
verifyUsedKernelBundle<NameT>();
22432248
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
22442249
(void)Kernel;
22452250
(void)NumWorkItems;
22462251
(void)WorkItemOffset;
22472252
kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
22482253
#ifndef __SYCL_DEVICE_ONLY__
2254+
verifyUsedKernelBundleInternal(
2255+
detail::string_view{detail::getKernelName<NameT>()});
22492256
detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
22502257
setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
22512258
MKernel = detail::getSyclObjImpl(std::move(Kernel));
@@ -2278,13 +2285,14 @@ class __SYCL_EXPORT handler {
22782285
setHandlerKernelBundle(Kernel);
22792286
using NameT =
22802287
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2281-
verifyUsedKernelBundle<NameT>();
22822288
using LambdaArgType =
22832289
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
22842290
(void)Kernel;
22852291
(void)NDRange;
22862292
kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
22872293
#ifndef __SYCL_DEVICE_ONLY__
2294+
verifyUsedKernelBundleInternal(
2295+
detail::string_view{detail::getKernelName<NameT>()});
22882296
detail::checkValueRange<Dims>(NDRange);
22892297
setNDRangeDescriptor(std::move(NDRange));
22902298
MKernel = detail::getSyclObjImpl(std::move(Kernel));
@@ -2321,13 +2329,14 @@ class __SYCL_EXPORT handler {
23212329
setHandlerKernelBundle(Kernel);
23222330
using NameT =
23232331
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2324-
verifyUsedKernelBundle<NameT>();
23252332
using LambdaArgType =
23262333
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
23272334
(void)Kernel;
23282335
(void)NumWorkGroups;
23292336
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
23302337
#ifndef __SYCL_DEVICE_ONLY__
2338+
verifyUsedKernelBundleInternal(
2339+
detail::string_view{detail::getKernelName<NameT>()});
23312340
detail::checkValueRange<Dims>(NumWorkGroups);
23322341
setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true);
23332342
MKernel = detail::getSyclObjImpl(std::move(Kernel));
@@ -2361,14 +2370,15 @@ class __SYCL_EXPORT handler {
23612370
setHandlerKernelBundle(Kernel);
23622371
using NameT =
23632372
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
2364-
verifyUsedKernelBundle<NameT>();
23652373
using LambdaArgType =
23662374
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
23672375
(void)Kernel;
23682376
(void)NumWorkGroups;
23692377
(void)WorkGroupSize;
23702378
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
23712379
#ifndef __SYCL_DEVICE_ONLY__
2380+
verifyUsedKernelBundleInternal(
2381+
detail::string_view{detail::getKernelName<NameT>()});
23722382
nd_range<Dims> ExecRange =
23732383
nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
23742384
detail::checkValueRange<Dims>(ExecRange);

0 commit comments

Comments
 (0)