Skip to content

Commit 65c378e

Browse files
committed
[SYCL] Fix types and transparent functors recognition in reduction
1. Enable operator*, operator+, operator|, operator&, operator^= for corresponding transparent functors used in reduction. 2. Fixed the case when reduction object is passed to parallel_for an R-value. 3. Allow identity-less constructors for reductions with transparent functors. 4. Replaced some 'auto' declarations with Reduction::result_type and added intermediate assignments/casts to avoid type ambiguities caused by using sycl::half type, and which may also be caused by custom/user types as well. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 4e6cf6f commit 65c378e

File tree

3 files changed

+204
-82
lines changed

3 files changed

+204
-82
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,7 @@ class __SYCL_EXPORT handler {
853853
int Dims, typename Reduction>
854854
detail::enable_if_t<Reduction::accessor_mode == access::mode::read_write &&
855855
Reduction::has_fast_atomics>
856-
parallel_for(nd_range<Dims> Range, Reduction &Redu, KernelType KernelFunc) {
856+
parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
857857
if (Reduction::is_usm)
858858
Redu.associateWithHandler(*this);
859859
shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
@@ -886,7 +886,7 @@ class __SYCL_EXPORT handler {
886886
int Dims, typename Reduction>
887887
detail::enable_if_t<Reduction::accessor_mode == access::mode::discard_write &&
888888
Reduction::has_fast_atomics>
889-
parallel_for(nd_range<Dims> Range, Reduction &Redu, KernelType KernelFunc) {
889+
parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
890890
shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
891891
auto RWAcc = Redu.getReadWriteScalarAcc(*this);
892892
intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
@@ -920,7 +920,7 @@ class __SYCL_EXPORT handler {
920920
template <typename KernelName = detail::auto_name, typename KernelType,
921921
int Dims, typename Reduction>
922922
detail::enable_if_t<!Reduction::has_fast_atomics>
923-
parallel_for(nd_range<Dims> Range, Reduction &Redu, KernelType KernelFunc) {
923+
parallel_for(nd_range<Dims> Range, Reduction Redu, KernelType KernelFunc) {
924924
size_t NWorkGroups = Range.get_group_range().size();
925925

926926
// This parallel_for() is lowered to the following sequence:

0 commit comments

Comments
 (0)