Skip to content

[SYCL][Reduction] Use core group_algorithm.hpp instead of an extension #6419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 76 additions & 79 deletions sycl/include/sycl/ext/oneapi/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@

#include <CL/sycl/accessor.hpp>
#include <CL/sycl/atomic.hpp>
#include <CL/sycl/atomic_ref.hpp>
#include <CL/sycl/detail/tuple.hpp>
#include <CL/sycl/group_algorithm.hpp>
#include <CL/sycl/handler.hpp>
#include <CL/sycl/kernel.hpp>
#include <CL/sycl/known_identity.hpp>
#include <sycl/ext/oneapi/accessor_property_list.hpp>
#include <sycl/ext/oneapi/group_algorithm.hpp>

#include <tuple>

Expand Down Expand Up @@ -222,8 +223,8 @@ template <class Reducer> class combiner {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
auto AtomicRef =
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E]);
sycl::atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(),
Space>(multi_ptr<T, Space>(ReduVarPtr)[E]);
Functor(AtomicRef, reducer->getElement(E));
}
}
Expand Down Expand Up @@ -312,13 +313,15 @@ template <class Reducer> class combiner {
/// reduction.
template <typename T, class BinaryOperation, int Dims, size_t Extent,
class Algorithm, bool View>
class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 0 && Extent == 1 && View == false &&
!IsKnownIdentityOp<T, BinaryOperation>::value>>
class reducer<
T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 0 && Extent == 1 && View == false &&
!sycl::detail::IsKnownIdentityOp<T, BinaryOperation>::value>>
: public combiner<
reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 0 && Extent == 1 && View == false &&
!IsKnownIdentityOp<T, BinaryOperation>::value>>> {
!sycl::detail::IsKnownIdentityOp<
T, BinaryOperation>::value>>> {
public:
reducer(const T &Identity, BinaryOperation BOp)
: MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {}
Expand All @@ -343,13 +346,15 @@ class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
/// the identity field inside it and allows to add a default constructor.
template <typename T, class BinaryOperation, int Dims, size_t Extent,
class Algorithm, bool View>
class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 0 && Extent == 1 && View == false &&
IsKnownIdentityOp<T, BinaryOperation>::value>>
class reducer<
T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 0 && Extent == 1 && View == false &&
sycl::detail::IsKnownIdentityOp<T, BinaryOperation>::value>>
: public combiner<
reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 0 && Extent == 1 && View == false &&
IsKnownIdentityOp<T, BinaryOperation>::value>>> {
sycl::detail::IsKnownIdentityOp<
T, BinaryOperation>::value>>> {
public:
reducer() : MValue(getIdentity()) {}
reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {}
Expand All @@ -360,7 +365,7 @@ class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
}

static T getIdentity() {
return known_identity_impl<BinaryOperation, T>::value;
return sycl::detail::known_identity_impl<BinaryOperation, T>::value;
}

T &getElement(size_t) { return MValue; }
Expand Down Expand Up @@ -390,13 +395,14 @@ class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
/// subscript operator.
template <typename T, class BinaryOperation, int Dims, size_t Extent,
class Algorithm, bool View>
class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
!IsKnownIdentityOp<T, BinaryOperation>::value>>
: public combiner<
reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
!IsKnownIdentityOp<T, BinaryOperation>::value>>> {
class reducer<
T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
!sycl::detail::IsKnownIdentityOp<T, BinaryOperation>::value>>
: public combiner<reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
!sycl::detail::IsKnownIdentityOp<
T, BinaryOperation>::value>>> {
public:
reducer(const T &Identity, BinaryOperation BOp)
: MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {}
Expand All @@ -420,13 +426,14 @@ class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
/// in cases where the identity value is known.
template <typename T, class BinaryOperation, int Dims, size_t Extent,
class Algorithm, bool View>
class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
IsKnownIdentityOp<T, BinaryOperation>::value>>
: public combiner<
reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
IsKnownIdentityOp<T, BinaryOperation>::value>>> {
class reducer<
T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
sycl::detail::IsKnownIdentityOp<T, BinaryOperation>::value>>
: public combiner<reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
enable_if_t<Dims == 1 && View == false &&
sycl::detail::IsKnownIdentityOp<
T, BinaryOperation>::value>>> {
public:
reducer() : MValue(getIdentity()) {}
reducer(const T & /* Identity */, BinaryOperation) : MValue(getIdentity()) {}
Expand All @@ -439,7 +446,7 @@ class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
}

static T getIdentity() {
return known_identity_impl<BinaryOperation, T>::value;
return sycl::detail::known_identity_impl<BinaryOperation, T>::value;
}

T &getElement(size_t E) { return MValue[E]; }
Expand All @@ -464,14 +471,14 @@ template <typename T, class BinaryOperation> class reduction_impl_common {
public:
/// Returns the statically known identity value.
template <typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<IsKnownIdentityOp<_T, _BinaryOperation>::value,
enable_if_t<sycl::detail::IsKnownIdentityOp<_T, _BinaryOperation>::value,
_T> constexpr getIdentity() {
return known_identity_impl<_BinaryOperation, _T>::value;
return sycl::detail::known_identity_impl<_BinaryOperation, _T>::value;
}

/// Returns the identity value given by user.
template <typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<!IsKnownIdentityOp<_T, _BinaryOperation>::value, _T>
enable_if_t<!sycl::detail::IsKnownIdentityOp<_T, _BinaryOperation>::value, _T>
getIdentity() {
return MIdentity;
}
Expand Down Expand Up @@ -752,8 +759,8 @@ class reduction_impl
/// SYCL-2020.
/// Constructs reduction_impl when the identity value is statically known.
template <typename _T, typename AllocatorT,
std::enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * =
nullptr>
std::enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH,
bool InitializeToIdentity)
: algo(reducer_type::getIdentity(), BinaryOperation(),
Expand All @@ -766,9 +773,8 @@ class reduction_impl
}

/// Constructs reduction_impl when the identity value is statically known.
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(rw_accessor_type &Acc)
: algo(reducer_type::getIdentity(), BinaryOperation(), false,
std::make_shared<rw_accessor_type>(Acc)) {
Expand All @@ -779,9 +785,8 @@ class reduction_impl
}

/// Constructs reduction_impl when the identity value is statically known.
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(dw_accessor_type &Acc)
: algo(reducer_type::getIdentity(), BinaryOperation(), true,
std::make_shared<dw_accessor_type>(Acc)) {
Expand All @@ -796,7 +801,8 @@ class reduction_impl
/// and user still passed the identity value.
template <
typename _T, typename AllocatorT,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
enable_if_t<sycl::detail::IsKnownIdentityOp<_T, BinaryOperation>::value>
* = nullptr>
reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH,
const T & /*Identity*/, BinaryOperation,
bool InitializeToIdentity)
Expand All @@ -822,9 +828,8 @@ class reduction_impl

/// Constructs reduction_impl when the identity value is statically known,
/// and user still passed the identity value.
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(rw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation)
: algo(reducer_type::getIdentity(), BinaryOperation(), false,
std::make_shared<rw_accessor_type>(Acc)) {
Expand All @@ -847,9 +852,8 @@ class reduction_impl

/// Constructs reduction_impl when the identity value is statically known,
/// and user still passed the identity value.
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(dw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation)
: algo(reducer_type::getIdentity(), BinaryOperation(), true,
std::make_shared<dw_accessor_type>(Acc)) {
Expand All @@ -874,7 +878,8 @@ class reduction_impl
/// Constructs reduction_impl when the identity value is NOT known statically.
template <
typename _T, typename AllocatorT,
enable_if_t<!IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
enable_if_t<!sycl::detail::IsKnownIdentityOp<_T, BinaryOperation>::value>
* = nullptr>
reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH,
const T &Identity, BinaryOperation BOp,
bool InitializeToIdentity)
Expand All @@ -888,9 +893,8 @@ class reduction_impl
}

/// Constructs reduction_impl when the identity value is unknown.
template <
typename _T = T,
enable_if_t<!IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<!sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(rw_accessor_type &Acc, const T &Identity, BinaryOperation BOp)
: algo(Identity, BOp, false, std::make_shared<rw_accessor_type>(Acc)) {
if (Acc.size() != 1)
Expand All @@ -900,9 +904,8 @@ class reduction_impl
}

/// Constructs reduction_impl when the identity value is unknown.
template <
typename _T = T,
enable_if_t<!IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<!sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(dw_accessor_type &Acc, const T &Identity, BinaryOperation BOp)
: algo(Identity, BOp, true, std::make_shared<dw_accessor_type>(Acc)) {
if (Acc.size() != 1)
Expand All @@ -915,9 +918,8 @@ class reduction_impl
/// The \param VarPtr is a USM pointer to memory, to where the computed
/// reduction value is added using BinaryOperation, i.e. it is expected that
/// the memory is pre-initialized with some meaningful value.
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(T *VarPtr, bool InitializeToIdentity = false)
: algo(reducer_type::getIdentity(), BinaryOperation(),
InitializeToIdentity, VarPtr) {}
Expand All @@ -927,9 +929,8 @@ class reduction_impl
/// The \param VarPtr is a USM pointer to memory, to where the computed
/// reduction value is added using BinaryOperation, i.e. it is expected that
/// the memory is pre-initialized with some meaningful value.
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(T *VarPtr, const T &Identity, BinaryOperation,
bool InitializeToIdentity = false)
: algo(Identity, BinaryOperation(), InitializeToIdentity, VarPtr) {
Expand All @@ -950,35 +951,31 @@ class reduction_impl
/// The \param VarPtr is a USM pointer to memory, to where the computed
/// reduction value is added using BinaryOperation, i.e. it is expected that
/// the memory is pre-initialized with some meaningful value.
template <
typename _T = T,
enable_if_t<!IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<!sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(T *VarPtr, const T &Identity, BinaryOperation BOp,
bool InitializeToIdentity = false)
: algo(Identity, BOp, InitializeToIdentity, VarPtr) {}

/// Constructs reduction_impl when the identity value is statically known
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(span<_T, Extent> Span, bool InitializeToIdentity = false)
: algo(reducer_type::getIdentity(), BinaryOperation(),
InitializeToIdentity, Span.data()) {}

/// Constructs reduction_impl when the identity value is statically known
/// and user passed an identity value anyway
template <
typename _T = T,
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(span<_T, Extent> Span, const T & /* Identity */,
BinaryOperation BOp, bool InitializeToIdentity = false)
: algo(reducer_type::getIdentity(), BOp, InitializeToIdentity,
Span.data()) {}

/// Constructs reduction_impl when the identity value is not statically known
template <
typename _T = T,
enable_if_t<!IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
template <typename _T = T, enable_if_t<!sycl::detail::IsKnownIdentityOp<
_T, BinaryOperation>::value> * = nullptr>
reduction_impl(span<T, Extent> Span, const T &Identity, BinaryOperation BOp,
bool InitializeToIdentity = false)
: algo(Identity, BOp, InitializeToIdentity, Span.data()) {}
Expand Down Expand Up @@ -1105,8 +1102,8 @@ void reduCGFuncForRangeFastReduce(handler &CGH, KernelType KernelFunc,
// Signal this work-group has finished after all values are reduced
if (LID == 0) {
auto NFinished =
atomic_ref<int, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(
sycl::atomic_ref<int, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(
NWorkGroupsFinished[0]);
DoReducePartialSumsInLastWG[0] =
++NFinished == NWorkGroups && NWorkGroups > 1;
Expand Down Expand Up @@ -1204,8 +1201,8 @@ void reduCGFuncForRangeBasic(handler &CGH, KernelType KernelFunc,
// Signal this work-group has finished after all values are reduced
if (LID == 0) {
auto NFinished =
atomic_ref<int, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(
sycl::atomic_ref<int, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(
NWorkGroupsFinished[0]);
DoReducePartialSumsInLastWG[0] =
++NFinished == NWorkGroups && NWorkGroups > 1;
Expand Down Expand Up @@ -1302,7 +1299,7 @@ void reduCGFuncForNDRangeBothFastReduceAndAtomics(
typename Reduction::binary_operation BOp;
for (int E = 0; E < NElements; ++E) {
Reducer.getElement(E) =
ext::oneapi::reduce(NDIt.get_group(), Reducer.getElement(E), BOp);
reduce_over_group(NDIt.get_group(), Reducer.getElement(E), BOp);
}
if (NDIt.get_local_linear_id() == 0)
Reducer.atomic_combine(Reduction::getOutPointer(Out));
Expand Down Expand Up @@ -1426,7 +1423,7 @@ void reduCGFuncForNDRangeFastReduceOnly(
for (int E = 0; E < NElements; ++E) {
typename Reduction::result_type PSum;
PSum = Reducer.getElement(E);
PSum = ext::oneapi::reduce(NDIt.get_group(), PSum, BOp);
PSum = reduce_over_group(NDIt.get_group(), PSum, BOp);
if (NDIt.get_local_linear_id() == 0) {
if (IsUpdateOfUserVar)
PSum = BOp(Reduction::getOutPointer(Out)[E], PSum);
Expand Down Expand Up @@ -1556,7 +1553,7 @@ void reduAuxCGFuncFastReduceImpl(handler &CGH, bool UniformWG,
(UniformWG || (GID < NWorkItems))
? In[GID * NElements + E]
: Reduction::reducer_type::getIdentity();
PSum = ext::oneapi::reduce(NDIt.get_group(), PSum, BOp);
PSum = reduce_over_group(NDIt.get_group(), PSum, BOp);
if (NDIt.get_local_linear_id() == 0) {
if (IsUpdateOfUserVar)
PSum = BOp(Reduction::getOutPointer(Out)[E], PSum);
Expand Down Expand Up @@ -2587,7 +2584,7 @@ reduction(accessor<T, Dims, AccMode, access::target::device, IsPH> &Acc,
/// The identity value is not passed to this version as it is statically known.
template <typename T, class BinaryOperation, int Dims, access::mode AccMode,
access::placeholder IsPH>
std::enable_if_t<detail::IsKnownIdentityOp<T, BinaryOperation>::value,
std::enable_if_t<sycl::detail::IsKnownIdentityOp<T, BinaryOperation>::value,
detail::reduction_impl<
T, BinaryOperation, 0, 1,
detail::default_reduction_algorithm<false, IsPH, Dims>>>
Expand Down Expand Up @@ -2615,7 +2612,7 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) {
/// The identity value is not passed to this version as it is statically known.
template <typename T, class BinaryOperation>
std::enable_if_t<
detail::IsKnownIdentityOp<T, BinaryOperation>::value,
sycl::detail::IsKnownIdentityOp<T, BinaryOperation>::value,
detail::reduction_impl<T, BinaryOperation, 0, 1,
detail::default_reduction_algorithm<
true, access::placeholder::false_t, 1>>>
Expand Down