Skip to content

Commit f4f83d9

Browse files
[SYCL] Fix an issue with queue shortcuts with offsets (#6440)
Fix an issue where it wasn't possible to pass an init-list for dependency events vector due to an error during template parameter pack resolution.
1 parent c145959 commit f4f83d9

File tree

2 files changed

+74
-0
lines changed

2 files changed

+74
-0
lines changed

sycl/include/sycl/queue.hpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,54 @@ class __SYCL_EXPORT queue {
862862
return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
863863
}
864864

865+
// While other shortcuts with offsets are able to go through parallel_for(...,
866+
// RestT &&...Rest), those that accept dependency events vector have to be
867+
// overloaded to allow implicit construction from an init-list.
868+
/// parallel_for version with a kernel represented as a lambda + range and
869+
/// offset that specify global size and global offset correspondingly.
870+
///
871+
/// \param Range specifies the global work space of the kernel
872+
/// \param WorkItemOffset specifies the offset for each work item id
873+
/// \param KernelFunc is the Kernel functor or lambda
874+
/// \param CodeLoc contains the code location of user code
875+
template <typename KernelName = detail::auto_name, typename KernelType>
876+
event parallel_for(range<1> Range, id<1> WorkItemOffset,
877+
const std::vector<event> &DepEvents,
878+
_KERNELFUNCPARAM(KernelFunc)) {
879+
return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
880+
KernelFunc);
881+
}
882+
883+
/// parallel_for version with a kernel represented as a lambda + range and
884+
/// offset that specify global size and global offset correspondingly.
885+
///
886+
/// \param Range specifies the global work space of the kernel
887+
/// \param WorkItemOffset specifies the offset for each work item id
888+
/// \param KernelFunc is the Kernel functor or lambda
889+
/// \param CodeLoc contains the code location of user code
890+
template <typename KernelName = detail::auto_name, typename KernelType>
891+
event parallel_for(range<2> Range, id<2> WorkItemOffset,
892+
const std::vector<event> &DepEvents,
893+
_KERNELFUNCPARAM(KernelFunc)) {
894+
return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
895+
KernelFunc);
896+
}
897+
898+
/// parallel_for version with a kernel represented as a lambda + range and
899+
/// offset that specify global size and global offset correspondingly.
900+
///
901+
/// \param Range specifies the global work space of the kernel
902+
/// \param WorkItemOffset specifies the offset for each work item id
903+
/// \param KernelFunc is the Kernel functor or lambda
904+
/// \param CodeLoc contains the code location of user code
905+
template <typename KernelName = detail::auto_name, typename KernelType>
906+
event parallel_for(range<3> Range, id<3> WorkItemOffset,
907+
const std::vector<event> &DepEvents,
908+
_KERNELFUNCPARAM(KernelFunc)) {
909+
return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
910+
KernelFunc);
911+
}
912+
865913
/// parallel_for version with a kernel represented as a lambda + range and
866914
/// offset that specify global size and global offset correspondingly.
867915
///
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s -o %t.out
2+
//=---queue_offset_shortcut_initlist.cpp - SYCL queue offset shortcuts test--=//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#include <sycl/sycl.hpp>
11+
12+
class KernelNameA;
13+
class KernelNameB;
14+
class KernelNameC;
15+
16+
int main() {
17+
sycl::queue q;
18+
sycl::event e;
19+
// Check that init-list works here.
20+
q.parallel_for<KernelNameA>(sycl::range<1>{1}, sycl::id<1>{0}, {e},
21+
[=](sycl::item<1> i) {});
22+
q.parallel_for<KernelNameB>(sycl::range<2>{1, 1}, sycl::id<2>{0, 0}, {e},
23+
[=](sycl::item<2> i) {});
24+
q.parallel_for<KernelNameC>(sycl::range<3>{1, 1, 1}, sycl::id<3>{0, 0, 0},
25+
{e}, [=](sycl::item<3> i) {});
26+
}

0 commit comments

Comments
 (0)