@@ -27,11 +27,50 @@ namespace detail {
27
27
28
28
using cl::sycl::detail::bool_constant;
29
29
using cl::sycl::detail::enable_if_t ;
30
- using cl::sycl::detail::is_sgenfloat;
31
- using cl::sycl::detail::is_sgeninteger;
32
30
using cl::sycl::detail::queue_impl;
33
31
using cl::sycl::detail::remove_AS;
34
32
33
+ // This type trait is used to detect if the atomic operation BinaryOperation
34
+ // used with operands of the type T is available for using in reduction.
35
+ // The order in which the atomic operations are performed may be arbitrary and
36
+ // thus may cause different results from run to run even on the same elements
37
+ // and on same device. The macro SYCL_REDUCTION_DETERMINISTIC prohibits using
38
+ // atomic operations for reduction and helps to produce stable results.
39
+ // SYCL_REDUCTION_DETERMINISTIC is a short term solution, which perhaps become
40
+ // deprecated eventually and is replaced by a sycl property passed to reduction.
41
+ template <typename T, class BinaryOperation >
42
+ using IsReduOptForFastAtomicFetch =
43
+ #ifdef SYCL_REDUCTION_DETERMINISTIC
44
+ bool_constant<false >;
45
+ #else
46
+ bool_constant<sycl::detail::is_sgeninteger<T>::value &&
47
+ sycl::detail::IsValidAtomicType<T>::value &&
48
+ (sycl::detail::IsPlus<T, BinaryOperation>::value ||
49
+ sycl::detail::IsMinimum<T, BinaryOperation>::value ||
50
+ sycl::detail::IsMaximum<T, BinaryOperation>::value ||
51
+ sycl::detail::IsBitOR<T, BinaryOperation>::value ||
52
+ sycl::detail::IsBitXOR<T, BinaryOperation>::value ||
53
+ sycl::detail::IsBitAND<T, BinaryOperation>::value)>;
54
+ #endif
55
+
56
+ // This type trait is used to detect if the group algorithm reduce() used with
57
+ // operands of the type T and the operation BinaryOperation is available
58
+ // for using in reduction.
59
+ // The macro SYCL_REDUCTION_DETERMINISTIC prohibits using the reduce() algorithm
60
+ // to produce stable results across same type devices.
61
+ template <typename T, class BinaryOperation >
62
+ using IsReduOptForFastReduce =
63
+ #ifdef SYCL_REDUCTION_DETERMINISTIC
64
+ bool_constant<false >;
65
+ #else
66
+ bool_constant<((sycl::detail::is_sgeninteger<T>::value &&
67
+ (sizeof (T) == 4 || sizeof (T) == 8 )) ||
68
+ sycl::detail::is_sgenfloat<T>::value) &&
69
+ (sycl::detail::IsPlus<T, BinaryOperation>::value ||
70
+ sycl::detail::IsMinimum<T, BinaryOperation>::value ||
71
+ sycl::detail::IsMaximum<T, BinaryOperation>::value)>;
72
+ #endif
73
+
35
74
// std::tuple seems to be a) too heavy and b) not copyable to device now
36
75
// Thus sycl::detail::tuple is used instead.
37
76
// Switching from sycl::device::tuple to std::tuple can be done by re-defining
@@ -46,10 +85,6 @@ __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class<queue_impl> Queue,
46
85
__SYCL_EXPORT size_t reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize,
47
86
size_t &NWorkGroups);
48
87
49
-
50
-
51
-
52
-
53
88
// / Class that is used to represent objects that are passed to user's lambda
54
89
// / functions and representing users' reduction variable.
55
90
// / The generic version of the class represents those reductions of those
@@ -64,45 +99,45 @@ class reducer {
64
99
T getIdentity () const { return MIdentity; }
65
100
66
101
template <typename _T = T>
67
- enable_if_t <IsReduPlus <_T, BinaryOperation>::value &&
102
+ enable_if_t <sycl::detail::IsPlus <_T, BinaryOperation>::value &&
68
103
sycl::detail::is_geninteger<_T>::value>
69
104
operator ++() {
70
105
combine (static_cast <T>(1 ));
71
106
}
72
107
73
108
template <typename _T = T>
74
- enable_if_t <IsReduPlus <_T, BinaryOperation>::value &&
109
+ enable_if_t <sycl::detail::IsPlus <_T, BinaryOperation>::value &&
75
110
sycl::detail::is_geninteger<_T>::value>
76
111
operator ++(int ) {
77
112
combine (static_cast <T>(1 ));
78
113
}
79
114
80
115
template <typename _T = T>
81
- enable_if_t <IsReduPlus <_T, BinaryOperation>::value>
116
+ enable_if_t <sycl::detail::IsPlus <_T, BinaryOperation>::value>
82
117
operator +=(const _T &Partial) {
83
118
combine (Partial);
84
119
}
85
120
86
121
template <typename _T = T>
87
- enable_if_t <IsReduMultiplies <_T, BinaryOperation>::value>
122
+ enable_if_t <sycl::detail::IsMultiplies <_T, BinaryOperation>::value>
88
123
operator *=(const _T &Partial) {
89
124
combine (Partial);
90
125
}
91
126
92
127
template <typename _T = T>
93
- enable_if_t <IsReduBitOR <_T, BinaryOperation>::value>
128
+ enable_if_t <sycl::detail::IsBitOR <_T, BinaryOperation>::value>
94
129
operator |=(const _T &Partial) {
95
130
combine (Partial);
96
131
}
97
132
98
133
template <typename _T = T>
99
- enable_if_t <IsReduBitXOR <_T, BinaryOperation>::value>
134
+ enable_if_t <sycl::detail::IsBitXOR <_T, BinaryOperation>::value>
100
135
operator ^=(const _T &Partial) {
101
136
combine (Partial);
102
137
}
103
138
104
139
template <typename _T = T>
105
- enable_if_t <IsReduBitAND <_T, BinaryOperation>::value>
140
+ enable_if_t <sycl::detail::IsBitAND <_T, BinaryOperation>::value>
106
141
operator &=(const _T &Partial) {
107
142
combine (Partial);
108
143
}
@@ -150,45 +185,45 @@ class reducer<T, BinaryOperation,
150
185
}
151
186
152
187
template <typename _T = T>
153
- enable_if_t <IsReduPlus <_T, BinaryOperation>::value &&
188
+ enable_if_t <sycl::detail::IsPlus <_T, BinaryOperation>::value &&
154
189
sycl::detail::is_geninteger<_T>::value>
155
190
operator ++() {
156
191
combine (static_cast <T>(1 ));
157
192
}
158
193
159
194
template <typename _T = T>
160
- enable_if_t <IsReduPlus <_T, BinaryOperation>::value &&
195
+ enable_if_t <sycl::detail::IsPlus <_T, BinaryOperation>::value &&
161
196
sycl::detail::is_geninteger<_T>::value>
162
197
operator ++(int ) {
163
198
combine (static_cast <T>(1 ));
164
199
}
165
200
166
201
template <typename _T = T>
167
- enable_if_t <IsReduPlus <_T, BinaryOperation>::value>
202
+ enable_if_t <sycl::detail::IsPlus <_T, BinaryOperation>::value>
168
203
operator +=(const _T &Partial) {
169
204
combine (Partial);
170
205
}
171
206
172
207
template <typename _T = T>
173
- enable_if_t <IsReduMultiplies <_T, BinaryOperation>::value>
208
+ enable_if_t <sycl::detail::IsMultiplies <_T, BinaryOperation>::value>
174
209
operator *=(const _T &Partial) {
175
210
combine (Partial);
176
211
}
177
212
178
213
template <typename _T = T>
179
- enable_if_t <IsReduBitOR <_T, BinaryOperation>::value>
214
+ enable_if_t <sycl::detail::IsBitOR <_T, BinaryOperation>::value>
180
215
operator |=(const _T &Partial) {
181
216
combine (Partial);
182
217
}
183
218
184
219
template <typename _T = T>
185
- enable_if_t <IsReduBitXOR <_T, BinaryOperation>::value>
220
+ enable_if_t <sycl::detail::IsBitXOR <_T, BinaryOperation>::value>
186
221
operator ^=(const _T &Partial) {
187
222
combine (Partial);
188
223
}
189
224
190
225
template <typename _T = T>
191
- enable_if_t <IsReduBitAND <_T, BinaryOperation>::value>
226
+ enable_if_t <sycl::detail::IsBitAND <_T, BinaryOperation>::value>
192
227
operator &=(const _T &Partial) {
193
228
combine (Partial);
194
229
}
@@ -197,7 +232,7 @@ class reducer<T, BinaryOperation,
197
232
template <typename _T = T, class _BinaryOperation = BinaryOperation>
198
233
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
199
234
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
200
- IsReduPlus <T, _BinaryOperation>::value>
235
+ sycl::detail::IsPlus <T, _BinaryOperation>::value>
201
236
atomic_combine (_T *ReduVarPtr) const {
202
237
atomic<T, access::address_space::global_space>(global_ptr<T>(ReduVarPtr))
203
238
.fetch_add (MValue);
@@ -207,7 +242,7 @@ class reducer<T, BinaryOperation,
207
242
template <typename _T = T, class _BinaryOperation = BinaryOperation>
208
243
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
209
244
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
210
- IsReduBitOR <T, _BinaryOperation>::value>
245
+ sycl::detail::IsBitOR <T, _BinaryOperation>::value>
211
246
atomic_combine (_T *ReduVarPtr) const {
212
247
atomic<T, access::address_space::global_space>(global_ptr<T>(ReduVarPtr))
213
248
.fetch_or (MValue);
@@ -217,7 +252,7 @@ class reducer<T, BinaryOperation,
217
252
template <typename _T = T, class _BinaryOperation = BinaryOperation>
218
253
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
219
254
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
220
- IsReduBitXOR <T, _BinaryOperation>::value>
255
+ sycl::detail::IsBitXOR <T, _BinaryOperation>::value>
221
256
atomic_combine (_T *ReduVarPtr) const {
222
257
atomic<T, access::address_space::global_space>(global_ptr<T>(ReduVarPtr))
223
258
.fetch_xor (MValue);
@@ -227,7 +262,7 @@ class reducer<T, BinaryOperation,
227
262
template <typename _T = T, class _BinaryOperation = BinaryOperation>
228
263
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
229
264
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
230
- IsReduBitAND <T, _BinaryOperation>::value>
265
+ sycl::detail::IsBitAND <T, _BinaryOperation>::value>
231
266
atomic_combine (_T *ReduVarPtr) const {
232
267
atomic<T, access::address_space::global_space>(global_ptr<T>(ReduVarPtr))
233
268
.fetch_and (MValue);
@@ -237,7 +272,7 @@ class reducer<T, BinaryOperation,
237
272
template <typename _T = T, class _BinaryOperation = BinaryOperation>
238
273
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
239
274
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
240
- IsReduMinimum <T, _BinaryOperation>::value>
275
+ sycl::detail::IsMinimum <T, _BinaryOperation>::value>
241
276
atomic_combine (_T *ReduVarPtr) const {
242
277
atomic<T, access::address_space::global_space>(global_ptr<T>(ReduVarPtr))
243
278
.fetch_min (MValue);
@@ -247,7 +282,7 @@ class reducer<T, BinaryOperation,
247
282
template <typename _T = T, class _BinaryOperation = BinaryOperation>
248
283
enable_if_t <std::is_same<typename remove_AS<_T>::type, T>::value &&
249
284
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
250
- IsReduMaximum <T, _BinaryOperation>::value>
285
+ sycl::detail::IsMaximum <T, _BinaryOperation>::value>
251
286
atomic_combine (_T *ReduVarPtr) const {
252
287
atomic<T, access::address_space::global_space>(global_ptr<T>(ReduVarPtr))
253
288
.fetch_max (MValue);
0 commit comments