Skip to content

[SYCL] Support kernels accepting item in range reduction parallel_for #7478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions sycl/include/sycl/item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ template <typename TransformedArgType, int Dims, typename KernelType>
class RoundedRangeKernel;
template <typename TransformedArgType, int Dims, typename KernelType>
class RoundedRangeKernelWithKH;

namespace reduction {
template <int Dims>
item<Dims, false> getDelinearizedItem(range<Dims> Range, id<Dims> Id);
} // namespace reduction
} // namespace detail
template <int dimensions> class id;
template <int dimensions> class range;

/// Identifies an instance of the function object executing at each point
/// in a range.
Expand Down Expand Up @@ -130,6 +133,10 @@ template <int dimensions = 1, bool with_offset = true> class item {
friend class detail::RoundedRangeKernelWithKH;
void set_allowed_range(const range<dimensions> rnwi) { MImpl.MExtent = rnwi; }

template <int Dims>
friend item<Dims, false>
detail::reduction::getDelinearizedItem(range<Dims> Range, id<Dims> Id);

detail::ItemBase<dimensions, with_offset> MImpl;
};

Expand Down
12 changes: 11 additions & 1 deletion sycl/include/sycl/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2369,8 +2369,18 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,
size_t Start = GroupStart + NDId.get_local_id(0);
size_t End = GroupEnd;
size_t Stride = NDId.get_local_range(0);
auto GetDelinearized = [&](size_t I) {
auto Id = getDelinearizedId(Range, I);
if constexpr (std::is_invocable_v<decltype(KernelFunc), id<Dims>,
decltype(Reducers)...>)
return Id;
else
// SYCL doesn't provide parallel_for accepting offset in presence of
// reductions, so use with_offset==false.
return reduction::getDelinearizedItem(Range, Id);
};
for (size_t I = Start; I < End; I += Stride)
KernelFunc(getDelinearizedId(Range, I), Reducers...);
KernelFunc(GetDelinearized(I), Reducers...);
};
if constexpr (NumArgs == 2) {
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
Expand Down
5 changes: 5 additions & 0 deletions sycl/include/sycl/reduction_forward.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ enum class strategy : int {
// are limited to those below.
inline void finalizeHandler(handler &CGH);
template <class FunctorTy> void withAuxHandler(handler &CGH, FunctorTy Func);

template <int Dims>
item<Dims, false> getDelinearizedItem(range<Dims> Range, id<Dims> Id) {
return {Range, Id};
}
} // namespace reduction

template <typename KernelName,
Expand Down