@@ -200,9 +200,11 @@ template <typename ReducerT> class ReducerAccess {
200
200
ReducerT &MReducerRef;
201
201
};
202
202
203
- // Deduction guide to simplify the use of ReducerAccess.
204
- template <typename ReducerT>
205
- ReducerAccess (ReducerT &) -> ReducerAccess<ReducerT>;
203
+ // Helper function to simplify the use of ReducerAccess. This avoids the need
204
+ // for potentially unsupported deduction guides.
205
+ template <typename ReducerT> auto getReducerAccess (ReducerT &Reducer) {
206
+ return ReducerAccess<ReducerT>{Reducer};
207
+ }
206
208
207
209
// / Use CRTP to avoid redefining shorthand operators in terms of combine
208
210
// /
@@ -283,7 +285,7 @@ template <class Reducer> class combiner {
283
285
auto AtomicRef = sycl::atomic_ref<T, memory_order::relaxed,
284
286
getMemoryScope<Space>(), Space>(
285
287
address_space_cast<Space, access::decorated::no>(ReduVarPtr)[E]);
286
- Functor (std::move (AtomicRef), ReducerAccess{ *reducer} .getElement (E));
288
+ Functor (std::move (AtomicRef), getReducerAccess ( *reducer) .getElement (E));
287
289
}
288
290
}
289
291
@@ -956,7 +958,7 @@ struct NDRangeReduction<reduction::strategy::local_atomic_and_atomic_cross_wg> {
956
958
// Work-group cooperates to initialize multiple reduction variables
957
959
auto LID = NDId.get_local_id (0 );
958
960
for (size_t E = LID; E < NElements; E += NDId.get_local_range (0 )) {
959
- GroupSum[E] = ReducerAccess (Reducer).getIdentity ();
961
+ GroupSum[E] = getReducerAccess (Reducer).getIdentity ();
960
962
}
961
963
workGroupBarrier ();
962
964
@@ -969,7 +971,7 @@ struct NDRangeReduction<reduction::strategy::local_atomic_and_atomic_cross_wg> {
969
971
workGroupBarrier ();
970
972
if (LID == 0 ) {
971
973
for (size_t E = 0 ; E < NElements; ++E) {
972
- ReducerAccess{ Reducer} .getElement (E) = GroupSum[E];
974
+ getReducerAccess ( Reducer) .getElement (E) = GroupSum[E];
973
975
}
974
976
Reducer.template atomic_combine (&Out[0 ]);
975
977
}
@@ -1019,7 +1021,7 @@ struct NDRangeReduction<
1019
1021
// reduce_over_group is only defined for each T, not for span<T, ...>
1020
1022
size_t LID = NDId.get_local_id (0 );
1021
1023
for (int E = 0 ; E < NElements; ++E) {
1022
- auto &RedElem = ReducerAccess{ Reducer} .getElement (E);
1024
+ auto &RedElem = getReducerAccess ( Reducer) .getElement (E);
1023
1025
RedElem = reduce_over_group (Group, RedElem, BOp);
1024
1026
if (LID == 0 ) {
1025
1027
if (NWorkGroups == 1 ) {
@@ -1030,7 +1032,7 @@ struct NDRangeReduction<
1030
1032
Out[E] = RedElem;
1031
1033
} else {
1032
1034
PartialSums[NDId.get_group_linear_id () * NElements + E] =
1033
- ReducerAccess{ Reducer} .getElement (E);
1035
+ getReducerAccess ( Reducer) .getElement (E);
1034
1036
}
1035
1037
}
1036
1038
}
@@ -1053,7 +1055,7 @@ struct NDRangeReduction<
1053
1055
// Reduce each result separately
1054
1056
// TODO: Opportunity to parallelize across elements.
1055
1057
for (int E = 0 ; E < NElements; ++E) {
1056
- auto LocalSum = ReducerAccess{ Reducer} .getIdentity ();
1058
+ auto LocalSum = getReducerAccess ( Reducer) .getIdentity ();
1057
1059
for (size_t I = LID; I < NWorkGroups; I += WGSize)
1058
1060
LocalSum = BOp (LocalSum, PartialSums[I * NElements + E]);
1059
1061
auto Result = reduce_over_group (Group, LocalSum, BOp);
@@ -1143,7 +1145,7 @@ template <> struct NDRangeReduction<reduction::strategy::range_basic> {
1143
1145
for (int E = 0 ; E < NElements; ++E) {
1144
1146
1145
1147
// Copy the element to local memory to prepare it for tree-reduction.
1146
- LocalReds[LID] = ReducerAccess{ Reducer} .getElement (E);
1148
+ LocalReds[LID] = getReducerAccess ( Reducer) .getElement (E);
1147
1149
1148
1150
doTreeReduction (WGSize, LID, false , Identity, LocalReds, BOp,
1149
1151
[&]() { workGroupBarrier (); });
@@ -1218,8 +1220,8 @@ struct NDRangeReduction<reduction::strategy::group_reduce_and_atomic_cross_wg> {
1218
1220
1219
1221
typename Reduction::binary_operation BOp;
1220
1222
for (int E = 0 ; E < NElements; ++E) {
1221
- ReducerAccess{ Reducer} .getElement (E) = reduce_over_group (
1222
- NDIt.get_group (), ReducerAccess{ Reducer} .getElement (E), BOp);
1223
+ getReducerAccess ( Reducer) .getElement (E) = reduce_over_group (
1224
+ NDIt.get_group (), getReducerAccess ( Reducer) .getElement (E), BOp);
1223
1225
}
1224
1226
if (NDIt.get_local_linear_id () == 0 )
1225
1227
Reducer.atomic_combine (&Out[0 ]);
@@ -1267,15 +1269,15 @@ struct NDRangeReduction<
1267
1269
for (int E = 0 ; E < NElements; ++E) {
1268
1270
1269
1271
// Copy the element to local memory to prepare it for tree-reduction.
1270
- LocalReds[LID] = ReducerAccess{ Reducer} .getElement (E);
1272
+ LocalReds[LID] = getReducerAccess ( Reducer) .getElement (E);
1271
1273
1272
1274
typename Reduction::binary_operation BOp;
1273
1275
doTreeReduction (WGSize, LID, IsPow2WG,
1274
- ReducerAccess{ Reducer} .getIdentity (), LocalReds, BOp ,
1275
- [&]() { NDIt.barrier (); });
1276
+ getReducerAccess ( Reducer) .getIdentity (), LocalReds,
1277
+ BOp, [&]() { NDIt.barrier (); });
1276
1278
1277
1279
if (LID == 0 ) {
1278
- ReducerAccess{ Reducer} .getElement (E) =
1280
+ getReducerAccess ( Reducer) .getElement (E) =
1279
1281
IsPow2WG ? LocalReds[0 ] : BOp (LocalReds[0 ], LocalReds[WGSize]);
1280
1282
}
1281
1283
@@ -1343,7 +1345,7 @@ struct NDRangeReduction<
1343
1345
typename Reduction::binary_operation BOp;
1344
1346
for (int E = 0 ; E < NElements; ++E) {
1345
1347
typename Reduction::result_type PSum;
1346
- PSum = ReducerAccess{ Reducer} .getElement (E);
1348
+ PSum = getReducerAccess ( Reducer) .getElement (E);
1347
1349
PSum = reduce_over_group (NDIt.get_group (), PSum, BOp);
1348
1350
if (NDIt.get_local_linear_id () == 0 ) {
1349
1351
if (IsUpdateOfUserVar)
@@ -1482,7 +1484,7 @@ template <> struct NDRangeReduction<reduction::strategy::basic> {
1482
1484
for (int E = 0 ; E < NElements; ++E) {
1483
1485
1484
1486
// Copy the element to local memory to prepare it for tree-reduction.
1485
- LocalReds[LID] = ReducerAccess{ Reducer} .getElement (E);
1487
+ LocalReds[LID] = getReducerAccess ( Reducer) .getElement (E);
1486
1488
1487
1489
doTreeReduction (WGSize, LID, IsPow2WG, ReduIdentity, LocalReds, BOp,
1488
1490
[&]() { NDIt.barrier (); });
@@ -1756,7 +1758,7 @@ void reduCGFuncImplScalar(
1756
1758
size_t LID = NDIt.get_local_linear_id ();
1757
1759
1758
1760
((std::get<Is>(LocalAccsTuple)[LID] =
1759
- ReducerAccess{ std::get<Is>(ReducersTuple)} .getElement (0 )),
1761
+ getReducerAccess ( std::get<Is>(ReducersTuple)) .getElement (0 )),
1760
1762
...);
1761
1763
1762
1764
// For work-groups, which size is not power of two, local accessors have
@@ -1807,7 +1809,7 @@ void reduCGFuncImplArrayHelper(bool Pow2WG, bool IsOneWG, nd_item<Dims> NDIt,
1807
1809
for (size_t E = 0 ; E < NElements; ++E) {
1808
1810
1809
1811
// Copy the element to local memory to prepare it for tree-reduction.
1810
- LocalReds[LID] = ReducerAccess{ Reducer} .getElement (E);
1812
+ LocalReds[LID] = getReducerAccess ( Reducer) .getElement (E);
1811
1813
1812
1814
doTreeReduction (WGSize, LID, Pow2WG, Identity, LocalReds, BOp,
1813
1815
[&]() { NDIt.barrier (); });
0 commit comments