Skip to content

Commit ab8a600

Browse files
author
JackAKirk
committed
Renamed has_atomic_add_float to has_atomic_add_float64.
Renamed has_atomic_add_float to has_atomic_add_float64, since the general usage that includes float32 is only expected to be temporary. has_atomic_add_float64 is a pseudonym of IsReduOptForAtomic64Add. Updated documentation describing the current temporary usage of fp32 within IsReduOptForAtomic64Add. IsReduOptForFastFloatAtomicAdd has been renamed IsReduOptForAtomic64Add to distinguish that this boolean should only be used in the case that the device has the sycl2020 atomic64 aspect, consistent with the naming convention used in other functions that are specializations for the atomic64 aspect. Signed-off-by: JackAKirk <[email protected]>
1 parent 70a2c54 commit ab8a600

File tree

2 files changed

+33
-19
lines changed

2 files changed

+33
-19
lines changed

sycl/include/CL/sycl/ONEAPI/reduction.hpp

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,20 @@ using IsReduOptForFastAtomicFetch =
5353
sycl::detail::IsBitAND<T, BinaryOperation>::value)>;
5454
#endif
5555

56-
// This type trait is used to detect if the group algorithm reduce() used with
57-
// operands of the type T and the operation Plus is available
58-
// for using in reduction. Note that this type trait is a subset of
56+
// This type trait is used to detect if the atomic operation BinaryOperation
57+
// used with operands of the type T is available for using in reduction, in
58+
// addition to the cases covered by "IsReduOptForFastAtomicFetch", if the device
59+
// has the atomic64 aspect. This type trait should only be used if the device
60+
// has the atomic64 aspect. Note that this type trait is currently a subset of
5961
// IsReduOptForFastReduce. The macro SYCL_REDUCTION_DETERMINISTIC prohibits
60-
// using the reduce() algorithm to produce stable results across same type
61-
// devices.
62+
// using the reduce_over_group() algorithm to produce stable results across same
63+
// type devices.
64+
// TODO 32 bit floating point atomics are eventually expected to be supported by
65+
// the has_fast_atomics specialization. Once the reducer class is updated to
66+
// replace the deprecated atomic class with atomic_ref, the (sizeof(T) == 4)
67+
// case should be removed here and replaced in IsReduOptForFastAtomicFetch.
6268
template <typename T, class BinaryOperation>
63-
using IsReduOptForFastFloatAtomicAdd =
69+
using IsReduOptForAtomic64Add =
6470
#ifdef SYCL_REDUCTION_DETERMINISTIC
6571
bool_constant<false>;
6672
#else
@@ -307,7 +313,7 @@ class reducer<T, BinaryOperation,
307313
/// Atomic ADD operation: for floating point using atomic_ref
308314
template <typename _T = T, class _BinaryOperation = BinaryOperation>
309315
enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
310-
IsReduOptForFastFloatAtomicAdd<T, _BinaryOperation>::value>
316+
IsReduOptForAtomic64Add<T, _BinaryOperation>::value>
311317
atomic_combine(_T *ReduVarPtr) const {
312318

313319
atomic_ref<T, sycl::ONEAPI::memory_order::relaxed,
@@ -358,8 +364,8 @@ class reduction_impl : private reduction_impl_base {
358364
using local_accessor_type =
359365
accessor<T, buffer_dim, access::mode::read_write, access::target::local>;
360366

361-
static constexpr bool has_atomic_add_float =
362-
IsReduOptForFastFloatAtomicAdd<T, BinaryOperation>::value;
367+
static constexpr bool has_atomic_add_float64 =
368+
IsReduOptForAtomic64Add<T, BinaryOperation>::value;
363369
static constexpr bool has_fast_atomics =
364370
IsReduOptForFastAtomicFetch<T, BinaryOperation>::value;
365371
static constexpr bool has_fast_reduce =
@@ -667,8 +673,8 @@ class reduction_impl : private reduction_impl_base {
667673
/// accessor. Otherwise, create 1-element global buffer initialized with
668674
/// identity value and return an accessor to that buffer.
669675

670-
template <bool HasFastAtomics = has_fast_atomics>
671-
std::enable_if_t<HasFastAtomics || has_atomic_add_float, rw_accessor_type>
676+
template <bool HasFastAtomics = (has_fast_atomics || has_atomic_add_float64)>
677+
std::enable_if_t<HasFastAtomics, rw_accessor_type>
672678
getReadWriteAccessorToInitializedMem(handler &CGH) {
673679
if (!is_usm && !initializeToIdentity())
674680
return *MRWAcc;
@@ -1499,15 +1505,19 @@ void reduCGFunc(handler &CGH, KernelType KernelFunc,
14991505
}
15001506

15011507
// Specialization for devices with the atomic64 aspect, which guarantees 64 (and
1502-
// 32) bit floating point support for atomic add.
1508+
// temporarily 32) bit floating point support for atomic add.
1509+
// TODO 32 bit floating point atomics are eventually expected to be supported by
1510+
// the has_fast_atomics specialization. Corresponding changes to
1511+
// IsReduOptForAtomic64Add, as prescribed in its documentation, should then also
1512+
// be made.
15031513
template <typename KernelName, typename KernelType, int Dims, class Reduction>
1504-
std::enable_if_t<Reduction::has_atomic_add_float>
1514+
std::enable_if_t<Reduction::has_atomic_add_float64>
15051515
reduCGFuncImplAtomic64(handler &CGH, KernelType KernelFunc,
15061516
const nd_range<Dims> &Range, Reduction &,
15071517
typename Reduction::rw_accessor_type Out) {
15081518
using Name = typename get_reduction_main_kernel_name_t<
15091519
KernelName, KernelType, Reduction::is_usm,
1510-
Reduction::has_atomic_add_float,
1520+
Reduction::has_atomic_add_float64,
15111521
typename Reduction::rw_accessor_type>::name;
15121522
CGH.parallel_for<Name>(Range, [=](nd_item<Dims> NDIt) {
15131523
// Call user's function. Reducer.MValue gets initialized there.
@@ -1523,9 +1533,13 @@ reduCGFuncImplAtomic64(handler &CGH, KernelType KernelFunc,
15231533
}
15241534

15251535
// Specialization for devices with the atomic64 aspect, which guarantees 64 (and
1526-
// 32) bit floating point support for atomic add.
1536+
// temporarily 32) bit floating point support for atomic add.
1537+
// TODO 32 bit floating point atomics are eventually expected to be supported by
1538+
// the has_fast_atomics specialization. Corresponding changes to
1539+
// IsReduOptForAtomic64Add, as prescribed in its documentation, should then also
1540+
// be made.
15271541
template <typename KernelName, typename KernelType, int Dims, class Reduction>
1528-
enable_if_t<Reduction::has_atomic_add_float>
1542+
enable_if_t<Reduction::has_atomic_add_float64>
15291543
reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc,
15301544
const nd_range<Dims> &Range, Reduction &Redu) {
15311545

sycl/include/CL/sycl/handler.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ using cl::sycl::detail::enable_if_t;
211211
using cl::sycl::detail::queue_impl;
212212

213213
template <typename KernelName, typename KernelType, int Dims, class Reduction>
214-
enable_if_t<Reduction::has_atomic_add_float>
214+
enable_if_t<Reduction::has_atomic_add_float64>
215215
reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc,
216216
const nd_range<Dims> &Range, Reduction &Redu);
217217

@@ -1396,7 +1396,7 @@ class __SYCL_EXPORT handler {
13961396
// seem efficient.
13971397
template <typename KernelName = detail::auto_name, typename KernelType,
13981398
int Dims, typename Reduction>
1399-
detail::enable_if_t<Reduction::has_atomic_add_float>
1399+
detail::enable_if_t<Reduction::has_atomic_add_float64>
14001400
parallel_for(nd_range<Dims> Range, Reduction Redu,
14011401
_KERNELFUNCPARAM(KernelFunc)) {
14021402

@@ -1438,7 +1438,7 @@ class __SYCL_EXPORT handler {
14381438
template <typename KernelName = detail::auto_name, typename KernelType,
14391439
int Dims, typename Reduction>
14401440
detail::enable_if_t<!Reduction::has_fast_atomics &&
1441-
!Reduction::has_atomic_add_float>
1441+
!Reduction::has_atomic_add_float64>
14421442
parallel_for(nd_range<Dims> Range, Reduction Redu,
14431443
_KERNELFUNCPARAM(KernelFunc)) {
14441444

0 commit comments

Comments
 (0)