Skip to content

Commit c8986cd

Browse files
[SYCL] Update khr_free_function_commands extension interfaces to accept const queue& (#18564)
Purpose of this PR is to update `khr_free_function_commands` extension to accept `const queue&` instead creating a copy of the queue which may allow to improve performance. Extension spec: KhronosGroup/SYCL-Docs#644 --------- Co-authored-by: Marcos Maronas <[email protected]>
1 parent 8294d9e commit c8986cd

File tree

6 files changed

+109
-69
lines changed

6 files changed

+109
-69
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,14 @@ template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
9797
};
9898

9999
template <typename CommandGroupFunc, typename PropertiesT>
100-
void submit_impl(queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
100+
void submit_impl(const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
101101
const sycl::detail::code_location &CodeLoc) {
102102
Q.submit_without_event<__SYCL_USE_FALLBACK_ASSERT>(
103103
Props, detail::type_erased_cgfo_ty{CGF}, CodeLoc);
104104
}
105105

106106
template <typename CommandGroupFunc, typename PropertiesT>
107-
event submit_with_event_impl(queue &Q, PropertiesT Props,
107+
event submit_with_event_impl(const queue &Q, PropertiesT Props,
108108
CommandGroupFunc &&CGF,
109109
const sycl::detail::code_location &CodeLoc) {
110110
return Q.submit_with_event<__SYCL_USE_FALLBACK_ASSERT>(
@@ -113,34 +113,34 @@ event submit_with_event_impl(queue &Q, PropertiesT Props,
113113
} // namespace detail
114114

115115
template <typename CommandGroupFunc, typename PropertiesT>
116-
void submit(queue Q, PropertiesT Props, CommandGroupFunc &&CGF,
116+
void submit(const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
117117
const sycl::detail::code_location &CodeLoc =
118118
sycl::detail::code_location::current()) {
119119
sycl::ext::oneapi::experimental::detail::submit_impl(
120120
Q, Props, std::forward<CommandGroupFunc>(CGF), CodeLoc);
121121
}
122122

123123
template <typename CommandGroupFunc>
124-
void submit(queue Q, CommandGroupFunc &&CGF,
124+
void submit(const queue &Q, CommandGroupFunc &&CGF,
125125
const sycl::detail::code_location &CodeLoc =
126126
sycl::detail::code_location::current()) {
127-
submit(std::move(Q), empty_properties_t{},
128-
std::forward<CommandGroupFunc>(CGF), CodeLoc);
127+
submit(Q, empty_properties_t{}, std::forward<CommandGroupFunc>(CGF), CodeLoc);
129128
}
130129

131130
template <typename CommandGroupFunc, typename PropertiesT>
132-
event submit_with_event(queue Q, PropertiesT Props, CommandGroupFunc &&CGF,
131+
event submit_with_event(const queue &Q, PropertiesT Props,
132+
CommandGroupFunc &&CGF,
133133
const sycl::detail::code_location &CodeLoc =
134134
sycl::detail::code_location::current()) {
135135
return sycl::ext::oneapi::experimental::detail::submit_with_event_impl(
136136
Q, Props, std::forward<CommandGroupFunc>(CGF), CodeLoc);
137137
}
138138

139139
template <typename CommandGroupFunc>
140-
event submit_with_event(queue Q, CommandGroupFunc &&CGF,
140+
event submit_with_event(const queue &Q, CommandGroupFunc &&CGF,
141141
const sycl::detail::code_location &CodeLoc =
142142
sycl::detail::code_location::current()) {
143-
return submit_with_event(std::move(Q), empty_properties_t{},
143+
return submit_with_event(Q, empty_properties_t{},
144144
std::forward<CommandGroupFunc>(CGF), CodeLoc);
145145
}
146146

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 47 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ inline namespace _V1 {
99
namespace khr {
1010

1111
template <typename CommandGroupFunc>
12-
void submit(queue q, CommandGroupFunc &&cgf,
12+
void submit(const queue &q, CommandGroupFunc &&cgf,
1313
const sycl::detail::code_location &codeLoc =
1414
sycl::detail::code_location::current()) {
1515
sycl::ext::oneapi::experimental::submit(
1616
q, std::forward<CommandGroupFunc>(cgf), codeLoc);
1717
}
1818

1919
template <typename CommandGroupFunc>
20-
event submit_tracked(queue q, CommandGroupFunc &&cgf,
20+
event submit_tracked(const queue &q, CommandGroupFunc &&cgf,
2121
const sycl::detail::code_location &codeLoc =
2222
sycl::detail::code_location::current()) {
2323
return sycl::ext::oneapi::experimental::submit_with_event(
@@ -38,19 +38,19 @@ void launch(handler &h, range<3> r, const KernelType &k) {
3838
}
3939

4040
template <typename KernelType>
41-
void launch(queue q, range<1> r, const KernelType &k,
41+
void launch(const queue &q, range<1> r, const KernelType &k,
4242
const sycl::detail::code_location &codeLoc =
4343
sycl::detail::code_location::current()) {
4444
submit(q, [&](handler &h) { launch<KernelType>(h, r, k); }, codeLoc);
4545
}
4646
template <typename KernelType>
47-
void launch(queue q, range<2> r, const KernelType &k,
47+
void launch(const queue &q, range<2> r, const KernelType &k,
4848
const sycl::detail::code_location &codeLoc =
4949
sycl::detail::code_location::current()) {
5050
submit(q, [&](handler &h) { launch<KernelType>(h, r, k); }, codeLoc);
5151
}
5252
template <typename KernelType>
53-
void launch(queue q, range<3> r, const KernelType &k,
53+
void launch(const queue &q, range<3> r, const KernelType &k,
5454
const sycl::detail::code_location &codeLoc =
5555
sycl::detail::code_location::current()) {
5656
submit(q, [&](handler &h) { launch<KernelType>(h, r, k); }, codeLoc);
@@ -75,17 +75,17 @@ void launch(handler &h, range<3> r, const kernel &k, ArgsT &&...args) {
7575
}
7676

7777
template <typename... ArgsT>
78-
void launch(queue q, range<1> r, const kernel &k, ArgsT &&...args) {
78+
void launch(const queue &q, range<1> r, const kernel &k, ArgsT &&...args) {
7979
submit(q, [&](handler &h) { launch(h, r, k, std::forward<ArgsT>(args)...); });
8080
}
8181

8282
template <typename... ArgsT>
83-
void launch(queue q, range<2> r, const kernel &k, ArgsT &&...args) {
83+
void launch(const queue &q, range<2> r, const kernel &k, ArgsT &&...args) {
8484
submit(q, [&](handler &h) { launch(h, r, k, std::forward<ArgsT>(args)...); });
8585
}
8686

8787
template <typename... ArgsT>
88-
void launch(queue q, range<3> r, const kernel &k, ArgsT &&...args) {
88+
void launch(const queue &q, range<3> r, const kernel &k, ArgsT &&...args) {
8989
submit(q, [&](handler &h) { launch(h, r, k, std::forward<ArgsT>(args)...); });
9090
}
9191

@@ -107,23 +107,23 @@ void launch_reduce(handler &h, range<3> r, const KernelType &k,
107107
}
108108

109109
template <typename KernelType, typename... Reductions>
110-
void launch_reduce(queue q, range<1> r, const KernelType &k,
110+
void launch_reduce(const queue &q, range<1> r, const KernelType &k,
111111
Reductions &&...reductions) {
112112
submit(q, [&](handler &h) {
113113
launch_reduce<KernelType>(h, r, k, std::forward<Reductions>(reductions)...);
114114
});
115115
}
116116

117117
template <typename KernelType, typename... Reductions>
118-
void launch_reduce(queue q, range<2> r, const KernelType &k,
118+
void launch_reduce(const queue &q, range<2> r, const KernelType &k,
119119
Reductions &&...reductions) {
120120
submit(q, [&](handler &h) {
121121
launch_reduce<KernelType>(h, r, k, std::forward<Reductions>(reductions)...);
122122
});
123123
}
124124

125125
template <typename KernelType, typename... Reductions>
126-
void launch_reduce(queue q, range<3> r, const KernelType &k,
126+
void launch_reduce(const queue &q, range<3> r, const KernelType &k,
127127
Reductions &&...reductions) {
128128
submit(q, [&](handler &h) {
129129
launch_reduce<KernelType>(h, r, k, std::forward<Reductions>(reductions)...);
@@ -149,23 +149,26 @@ void launch_grouped(handler &h, range<3> r, range<3> size,
149149
}
150150

151151
template <typename KernelType>
152-
void launch_grouped(queue q, range<1> r, range<1> size, const KernelType &k,
152+
void launch_grouped(const queue &q, range<1> r, range<1> size,
153+
const KernelType &k,
153154
const sycl::detail::code_location &codeLoc =
154155
sycl::detail::code_location::current()) {
155156
submit(
156157
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
157158
codeLoc);
158159
}
159160
template <typename KernelType>
160-
void launch_grouped(queue q, range<2> r, range<2> size, const KernelType &k,
161+
void launch_grouped(const queue &q, range<2> r, range<2> size,
162+
const KernelType &k,
161163
const sycl::detail::code_location &codeLoc =
162164
sycl::detail::code_location::current()) {
163165
submit(
164166
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
165167
codeLoc);
166168
}
167169
template <typename KernelType>
168-
void launch_grouped(queue q, range<3> r, range<3> size, const KernelType &k,
170+
void launch_grouped(const queue &q, range<3> r, range<3> size,
171+
const KernelType &k,
169172
const sycl::detail::code_location &codeLoc =
170173
sycl::detail::code_location::current()) {
171174
submit(
@@ -195,23 +198,23 @@ void launch_grouped(sycl::handler &h, sycl::range<3> r, sycl::range<3> size,
195198
}
196199

197200
template <typename... Args>
198-
void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size,
201+
void launch_grouped(const sycl::queue &q, sycl::range<1> r, sycl::range<1> size,
199202
const sycl::kernel &k, Args &&...args) {
200203
submit(q, [&](handler &h) {
201204
launch_grouped(h, r, size, k, std::forward<Args>(args)...);
202205
});
203206
}
204207

205208
template <typename... Args>
206-
void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size,
209+
void launch_grouped(const sycl::queue &q, sycl::range<2> r, sycl::range<2> size,
207210
const sycl::kernel &k, Args &&...args) {
208211
submit(q, [&](handler &h) {
209212
launch_grouped(h, r, size, k, std::forward<Args>(args)...);
210213
});
211214
}
212215

213216
template <typename... Args>
214-
void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size,
217+
void launch_grouped(const sycl::queue &q, sycl::range<3> r, sycl::range<3> size,
215218
const sycl::kernel &k, Args &&...args) {
216219
submit(q, [&](handler &h) {
217220
launch_grouped(h, r, size, k, std::forward<Args>(args)...);
@@ -242,26 +245,29 @@ void launch_grouped_reduce(sycl::handler &h, sycl::range<3> r,
242245
}
243246

244247
template <typename KernelType, typename... Reductions>
245-
void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, sycl::range<1> size,
246-
const KernelType &k, Reductions &&...reductions) {
248+
void launch_grouped_reduce(const sycl::queue &q, sycl::range<1> r,
249+
sycl::range<1> size, const KernelType &k,
250+
Reductions &&...reductions) {
247251
submit(q, [&](handler &h) {
248252
launch_grouped_reduce<KernelType>(h, r, size, k,
249253
std::forward<Reductions>(reductions)...);
250254
});
251255
}
252256

253257
template <typename KernelType, typename... Reductions>
254-
void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, sycl::range<2> size,
255-
const KernelType &k, Reductions &&...reductions) {
258+
void launch_grouped_reduce(const sycl::queue &q, sycl::range<2> r,
259+
sycl::range<2> size, const KernelType &k,
260+
Reductions &&...reductions) {
256261
submit(q, [&](handler &h) {
257262
launch_grouped_reduce<KernelType>(h, r, size, k,
258263
std::forward<Reductions>(reductions)...);
259264
});
260265
}
261266

262267
template <typename KernelType, typename... Reductions>
263-
void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, sycl::range<3> size,
264-
const KernelType &k, Reductions &&...reductions) {
268+
void launch_grouped_reduce(const sycl::queue &q, sycl::range<3> r,
269+
sycl::range<3> size, const KernelType &k,
270+
Reductions &&...reductions) {
265271
submit(q, [&](handler &h) {
266272
launch_grouped_reduce<KernelType>(h, r, size, k,
267273
std::forward<Reductions>(reductions)...);
@@ -274,7 +280,7 @@ void launch_task(handler &h, const KernelType &k) {
274280
}
275281

276282
template <typename KernelType>
277-
void launch_task(sycl::queue q, const KernelType &k,
283+
void launch_task(const sycl::queue &q, const KernelType &k,
278284
const sycl::detail::code_location &codeLoc =
279285
sycl::detail::code_location::current()) {
280286
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
@@ -287,15 +293,15 @@ void launch_task(sycl::handler &h, const sycl::kernel &k, Args &&...args) {
287293
}
288294

289295
template <typename... Args>
290-
void launch_task(queue q, const kernel &k, Args &&...args) {
296+
void launch_task(const queue &q, const kernel &k, Args &&...args) {
291297
submit(q,
292298
[&](handler &h) { launch_task(h, k, std::forward<Args>(args)...); });
293299
}
294300

295301
inline void memcpy(handler &h, void *dest, const void *src, size_t numBytes) {
296302
h.memcpy(dest, src, numBytes);
297303
}
298-
inline void memcpy(queue q, void *dest, const void *src, size_t numBytes,
304+
inline void memcpy(const queue &q, void *dest, const void *src, size_t numBytes,
299305
const sycl::detail::code_location &codeLoc =
300306
sycl::detail::code_location::current()) {
301307
sycl::ext::oneapi::experimental::memcpy(q, dest, src, numBytes, codeLoc);
@@ -307,7 +313,7 @@ void copy(handler &h, const T *src, T *dest, size_t count) {
307313
}
308314

309315
template <typename T>
310-
void copy(queue q, const T *src, T *dest, size_t count,
316+
void copy(const queue &q, const T *src, T *dest, size_t count,
311317
const sycl::detail::code_location &codeLoc =
312318
sycl::detail::code_location::current()) {
313319
submit(q, [&](handler &h) { copy(h, src, dest, count); }, codeLoc);
@@ -326,7 +332,7 @@ void copy(handler &h, std::shared_ptr<SrcT> src,
326332
}
327333

328334
template <typename SrcT, typename DestT, int DestDims, access_mode DestMode>
329-
void copy(queue q, const SrcT *src,
335+
void copy(const queue &q, const SrcT *src,
330336
accessor<DestT, DestDims, DestMode, target::device> dest,
331337
const sycl::detail::code_location &codeLoc =
332338
sycl::detail::code_location::current()) {
@@ -340,7 +346,7 @@ void copy(queue q, const SrcT *src,
340346
}
341347

342348
template <typename SrcT, typename DestT, int DestDims, access_mode DestMode>
343-
void copy(queue q, std::shared_ptr<SrcT> src,
349+
void copy(const queue &q, std::shared_ptr<SrcT> src,
344350
accessor<DestT, DestDims, DestMode, target::device> dest,
345351
const sycl::detail::code_location &codeLoc =
346352
sycl::detail::code_location::current()) {
@@ -366,7 +372,7 @@ void copy(handler &h, accessor<SrcT, SrcDims, SrcMode, target::device> src,
366372
}
367373

368374
template <typename SrcT, int SrcDims, access_mode SrcMode, typename DestT>
369-
void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
375+
void copy(const queue &q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
370376
DestT *dest,
371377
const sycl::detail::code_location &codeLoc =
372378
sycl::detail::code_location::current()) {
@@ -380,7 +386,7 @@ void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
380386
}
381387

382388
template <typename SrcT, int SrcDims, access_mode SrcMode, typename DestT>
383-
void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
389+
void copy(const queue &q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
384390
std::shared_ptr<DestT> dest,
385391
const sycl::detail::code_location &codeLoc =
386392
sycl::detail::code_location::current()) {
@@ -402,7 +408,7 @@ void copy(handler &h, accessor<SrcT, SrcDims, SrcMode, target::device> src,
402408

403409
template <typename SrcT, int SrcDims, access_mode SrcMode, typename DestT,
404410
int DestDims, access_mode DestMode>
405-
void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
411+
void copy(const queue &q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
406412
accessor<DestT, DestDims, DestMode, target::device> dest,
407413
const sycl::detail::code_location &codeLoc =
408414
sycl::detail::code_location::current()) {
@@ -419,7 +425,7 @@ inline void memset(handler &h, void *ptr, int value, size_t numBytes) {
419425
h.memset(ptr, value, numBytes);
420426
}
421427

422-
inline void memset(queue q, void *ptr, int value, size_t numBytes,
428+
inline void memset(const queue &q, void *ptr, int value, size_t numBytes,
423429
const sycl::detail::code_location &codeLoc =
424430
sycl::detail::code_location::current()) {
425431
sycl::ext::oneapi::experimental::memset(q, ptr, value, numBytes, codeLoc);
@@ -437,14 +443,15 @@ void fill(handler &h, accessor<T, Dims, Mode, target::device> dest,
437443
}
438444

439445
template <typename T>
440-
void fill(queue q, T *ptr, const T &pattern, size_t count,
446+
void fill(const queue &q, T *ptr, const T &pattern, size_t count,
441447
const sycl::detail::code_location &codeLoc =
442448
sycl::detail::code_location::current()) {
443449
submit(q, [&](handler &h) { fill(h, ptr, pattern, count); }, codeLoc);
444450
}
445451

446452
template <typename T, int Dims, access_mode Mode>
447-
void fill(queue q, accessor<T, Dims, Mode, target::device> dest, const T &src,
453+
void fill(const queue &q, accessor<T, Dims, Mode, target::device> dest,
454+
const T &src,
448455
const sycl::detail::code_location &codeLoc =
449456
sycl::detail::code_location::current()) {
450457
submit(
@@ -462,7 +469,7 @@ void update_host(handler &h, accessor<T, Dims, Mode, target::device> acc) {
462469
}
463470

464471
template <typename T, int Dims, access_mode Mode>
465-
void update_host(queue q, accessor<T, Dims, Mode, target::device> acc,
472+
void update_host(const queue &q, accessor<T, Dims, Mode, target::device> acc,
466473
const sycl::detail::code_location &codeLoc =
467474
sycl::detail::code_location::current()) {
468475
submit(
@@ -477,7 +484,7 @@ inline void prefetch(handler &h, void *ptr, size_t numBytes) {
477484
h.prefetch(ptr, numBytes);
478485
}
479486

480-
inline void prefetch(queue q, void *ptr, size_t numBytes,
487+
inline void prefetch(const queue &q, void *ptr, size_t numBytes,
481488
const sycl::detail::code_location &codeLoc =
482489
sycl::detail::code_location::current()) {
483490
submit(q, [&](handler &h) { prefetch(h, ptr, numBytes); }, codeLoc);
@@ -487,7 +494,7 @@ inline void mem_advise(handler &h, void *ptr, size_t numBytes, int advice) {
487494
h.mem_advise(ptr, numBytes, advice);
488495
}
489496

490-
inline void mem_advise(queue q, void *ptr, size_t numBytes, int advice,
497+
inline void mem_advise(const queue &q, void *ptr, size_t numBytes, int advice,
491498
const sycl::detail::code_location &codeLoc =
492499
sycl::detail::code_location::current()) {
493500
sycl::ext::oneapi::experimental::mem_advise(q, ptr, numBytes, advice,
@@ -496,7 +503,7 @@ inline void mem_advise(queue q, void *ptr, size_t numBytes, int advice,
496503

497504
inline void command_barrier(handler &h) { h.ext_oneapi_barrier(); }
498505

499-
inline void command_barrier(queue q,
506+
inline void command_barrier(const queue &q,
500507
const sycl::detail::code_location &codeLoc =
501508
sycl::detail::code_location::current()) {
502509
submit(q, [&](handler &h) { command_barrier(h); }, codeLoc);
@@ -506,7 +513,7 @@ inline void event_barrier(handler &h, const std::vector<event> &events) {
506513
h.ext_oneapi_barrier(events);
507514
}
508515

509-
inline void event_barrier(queue q, const std::vector<event> &events,
516+
inline void event_barrier(const queue &q, const std::vector<event> &events,
510517
const sycl::detail::code_location &codeLoc =
511518
sycl::detail::code_location::current()) {
512519
submit(q, [&](handler &h) { event_barrier(h, events); }, codeLoc);

0 commit comments

Comments
 (0)