@@ -53,14 +53,20 @@ using IsReduOptForFastAtomicFetch =
53
53
sycl::detail::IsBitAND<T, BinaryOperation>::value)>;
54
54
#endif
55
55
56
- // This type trait is used to detect if the group algorithm reduce() used with
57
- // operands of the type T and the operation Plus is available
58
- // for using in reduction. Note that this type trait is a subset of
56
+ // This type trait is used to detect if the atomic operation BinaryOperation
57
+ // used with operands of the type T is available for using in reduction, in
58
+ // addition to the cases covered by "IsReduOptForFastAtomicFetch", if the device
59
+ // has the atomic64 aspect. This type trait should only be used if the device
60
+ // has the atomic64 aspect. Note that this type trait is currently a subset of
59
61
// IsReduOptForFastReduce. The macro SYCL_REDUCTION_DETERMINISTIC prohibits
60
- // using the reduce() algorithm to produce stable results across same type
61
- // devices.
62
+ // using the reduce_over_group() algorithm to produce stable results across same
63
+ // type devices.
64
+ // TODO 32 bit floating point atomics are eventually expected to be supported by
65
+ // the has_fast_atomics specialization. Once the reducer class is updated to
66
+ // replace the deprecated atomic class with atomic_ref, the (sizeof(T) == 4)
67
+ // case should be removed here and replaced in IsReduOptForFastAtomicFetch.
62
68
template <typename T, class BinaryOperation >
63
- using IsReduOptForFastFloatAtomicAdd =
69
+ using IsReduOptForAtomic64Add =
64
70
#ifdef SYCL_REDUCTION_DETERMINISTIC
65
71
bool_constant<false >;
66
72
#else
@@ -307,7 +313,7 @@ class reducer<T, BinaryOperation,
307
313
// / Atomic ADD operation: for floating point using atomic_ref
308
314
template <typename _T = T, class _BinaryOperation = BinaryOperation>
309
315
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
310
- IsReduOptForFastFloatAtomicAdd <T, _BinaryOperation>::value>
316
+ IsReduOptForAtomic64Add <T, _BinaryOperation>::value>
311
317
atomic_combine (_T *ReduVarPtr) const {
312
318
313
319
atomic_ref<T, sycl::ONEAPI::memory_order::relaxed,
@@ -358,8 +364,8 @@ class reduction_impl : private reduction_impl_base {
358
364
using local_accessor_type =
359
365
accessor<T, buffer_dim, access::mode::read_write, access::target::local>;
360
366
361
- static constexpr bool has_atomic_add_float =
362
- IsReduOptForFastFloatAtomicAdd <T, BinaryOperation>::value;
367
+ static constexpr bool has_atomic_add_float64 =
368
+ IsReduOptForAtomic64Add <T, BinaryOperation>::value;
363
369
static constexpr bool has_fast_atomics =
364
370
IsReduOptForFastAtomicFetch<T, BinaryOperation>::value;
365
371
static constexpr bool has_fast_reduce =
@@ -667,8 +673,8 @@ class reduction_impl : private reduction_impl_base {
667
673
// / accessor. Otherwise, create 1-element global buffer initialized with
668
674
// / identity value and return an accessor to that buffer.
669
675
670
- template <bool HasFastAtomics = has_fast_atomics>
671
- std::enable_if_t <HasFastAtomics || has_atomic_add_float , rw_accessor_type>
676
+ template <bool HasFastAtomics = ( has_fast_atomics || has_atomic_add_float64) >
677
+ std::enable_if_t <HasFastAtomics, rw_accessor_type>
672
678
getReadWriteAccessorToInitializedMem (handler &CGH) {
673
679
if (!is_usm && !initializeToIdentity ())
674
680
return *MRWAcc;
@@ -1499,15 +1505,19 @@ void reduCGFunc(handler &CGH, KernelType KernelFunc,
1499
1505
}
1500
1506
1501
1507
// Specialization for devices with the atomic64 aspect, which guarantees 64 (and
1502
- // 32) bit floating point support for atomic add.
1508
+ // temporarily 32) bit floating point support for atomic add.
1509
+ // TODO 32 bit floating point atomics are eventually expected to be supported by
1510
+ // the has_fast_atomics specialization. Corresponding changes to
1511
+ // IsReduOptForAtomic64Add, as prescribed in its documentation, should then also
1512
+ // be made.
1503
1513
template <typename KernelName, typename KernelType, int Dims, class Reduction >
1504
- std::enable_if_t <Reduction::has_atomic_add_float >
1514
+ std::enable_if_t <Reduction::has_atomic_add_float64 >
1505
1515
reduCGFuncImplAtomic64 (handler &CGH, KernelType KernelFunc,
1506
1516
const nd_range<Dims> &Range, Reduction &,
1507
1517
typename Reduction::rw_accessor_type Out) {
1508
1518
using Name = typename get_reduction_main_kernel_name_t <
1509
1519
KernelName, KernelType, Reduction::is_usm,
1510
- Reduction::has_atomic_add_float ,
1520
+ Reduction::has_atomic_add_float64 ,
1511
1521
typename Reduction::rw_accessor_type>::name;
1512
1522
CGH.parallel_for <Name>(Range, [=](nd_item<Dims> NDIt) {
1513
1523
// Call user's function. Reducer.MValue gets initialized there.
@@ -1523,9 +1533,13 @@ reduCGFuncImplAtomic64(handler &CGH, KernelType KernelFunc,
1523
1533
}
1524
1534
1525
1535
// Specialization for devices with the atomic64 aspect, which guarantees 64 (and
1526
- // 32) bit floating point support for atomic add.
1536
+ // temporarily 32) bit floating point support for atomic add.
1537
+ // TODO 32 bit floating point atomics are eventually expected to be supported by
1538
+ // the has_fast_atomics specialization. Corresponding changes to
1539
+ // IsReduOptForAtomic64Add, as prescribed in its documentation, should then also
1540
+ // be made.
1527
1541
template <typename KernelName, typename KernelType, int Dims, class Reduction >
1528
- enable_if_t <Reduction::has_atomic_add_float >
1542
+ enable_if_t <Reduction::has_atomic_add_float64 >
1529
1543
reduCGFuncAtomic64 (handler &CGH, KernelType KernelFunc,
1530
1544
const nd_range<Dims> &Range, Reduction &Redu) {
1531
1545
0 commit comments