@@ -40,8 +40,14 @@ size_t getLinearId(range<3> Range, id<3> Id) {
40
40
return Id[0 ] * Range[1 ] * Range[2 ] + Id[1 ] * Range[2 ] + Id[2 ];
41
41
}
42
42
43
+ enum class submission_mode {
44
+ handler,
45
+ queue,
46
+ };
47
+
43
48
// Test a span and a regular sum
44
- template <size_t N, typename T, typename BinaryOperation, typename Range>
49
+ template <size_t N, typename T, typename BinaryOperation, typename Range,
50
+ submission_mode SubmissionMode>
45
51
void test1 (queue Q, Range Rng, T Identity, T Value) {
46
52
47
53
// Initialize output to identity value
@@ -51,17 +57,20 @@ void test1(queue Q, Range Rng, T Identity, T Value) {
51
57
Q.parallel_for (range<1 >{N}, [=](id<1 > I) { Output[I] = Identity; }).wait ();
52
58
53
59
// Perform generalized "histogram" with N bins
54
- // TODO: Test Q.parallel_for when code_location is fixed
55
- Q.submit ([&](handler &CGH) {
56
- CGH.parallel_for (
57
- Rng, reduction (Sum, plus<>()),
58
- reduction (span<T, N>(Output, N), Identity, BinaryOperation ()),
59
- [=](auto It, auto &ScalarReducer, auto &SpanReducer) {
60
- ScalarReducer++;
61
- size_t Index = getLinearId (Rng, It) % N;
62
- SpanReducer[Index].combine (Value);
63
- });
64
- }).wait ();
60
+ auto ScalarRedu = reduction (Sum, plus<>());
61
+ auto SpanRedu = reduction (span<T, N>(Output, N), Identity, BinaryOperation ());
62
+ auto Kern = [=](auto It, auto &ScalarReducer, auto &SpanReducer) {
63
+ ScalarReducer++;
64
+ size_t Index = getLinearId (Rng, It) % N;
65
+ SpanReducer[Index].combine (Value);
66
+ };
67
+ if constexpr (SubmissionMode == submission_mode::handler) {
68
+ Q.submit ([&](handler &CGH) {
69
+ CGH.parallel_for (Rng, ScalarRedu, SpanRedu, Kern);
70
+ }).wait ();
71
+ } else /* if (SubmissionMode == submission_mode::queue) */ {
72
+ Q.parallel_for (Rng, ScalarRedu, SpanRedu, Kern).wait ();
73
+ }
65
74
66
75
size_t Size = getLinearSize (Rng);
67
76
@@ -89,7 +98,8 @@ void test1(queue Q, Range Rng, T Identity, T Value) {
89
98
}
90
99
91
100
// Test two spans
92
- template <size_t N, typename T, typename BinaryOperation, typename Range>
101
+ template <size_t N, typename T, typename BinaryOperation, typename Range,
102
+ submission_mode SubmissionMode>
93
103
void test2 (queue Q, Range Rng, T Identity, T Value) {
94
104
95
105
// Initialize output to identity value
@@ -99,17 +109,20 @@ void test2(queue Q, Range Rng, T Identity, T Value) {
99
109
Q.parallel_for (range<1 >{N}, [=](id<1 > I) { Output2[I] = Identity; }).wait ();
100
110
101
111
// Perform generalized "histogram" with N bins
102
- // TODO: Test Q.parallel_for when code_location is fixed
103
- Q.submit ([&](handler &CGH) {
104
- CGH.parallel_for (
105
- Rng, reduction (span<int , N>(Output1, N), plus<>()),
106
- reduction (span<T, N>(Output2, N), Identity, BinaryOperation ()),
107
- [=](auto It, auto &Reducer1, auto &Reducer2) {
108
- size_t Index = getLinearId (Rng, It) % N;
109
- Reducer1[Index]++;
110
- Reducer2[Index].combine (Value);
111
- });
112
- }).wait ();
112
+ auto Redu1 = reduction (span<int , N>(Output1, N), plus<>());
113
+ auto Redu2 = reduction (span<T, N>(Output2, N), Identity, BinaryOperation ());
114
+ auto Kern = [=](auto It, auto &Reducer1, auto &Reducer2) {
115
+ size_t Index = getLinearId (Rng, It) % N;
116
+ Reducer1[Index]++;
117
+ Reducer2[Index].combine (Value);
118
+ };
119
+ if constexpr (SubmissionMode == submission_mode::handler) {
120
+ Q.submit ([&](handler &CGH) {
121
+ CGH.parallel_for (Rng, Redu1, Redu2, Kern);
122
+ }).wait ();
123
+ } else /* if (SubmissionMode == submission_mode::queue) */ {
124
+ Q.parallel_for (Rng, Redu1, Redu2, Kern).wait ();
125
+ }
113
126
114
127
size_t Size = getLinearSize (Rng);
115
128
bool Passed = true ;
@@ -165,10 +178,10 @@ struct CustomBinaryOperation {
165
178
}
166
179
};
167
180
168
- template <size_t N, typename T, typename BinaryOperation, typename Range>
181
+ template <size_t N, typename T, typename BinaryOperation, typename Range, submission_mode SubmissionMode >
169
182
void test (queue Q, Range Rng, T Identity, T Value) {
170
- test1<N, T, BinaryOperation, Range>(Q, Rng, Identity, Value);
171
- test2<N, T, BinaryOperation, Range>(Q, Rng, Identity, Value);
183
+ test1<N, T, BinaryOperation, Range, SubmissionMode >(Q, Rng, Identity, Value);
184
+ test2<N, T, BinaryOperation, Range, SubmissionMode >(Q, Rng, Identity, Value);
172
185
}
173
186
174
187
int main () {
@@ -177,18 +190,39 @@ int main() {
177
190
// Tests for small spans that can be privatized efficiently
178
191
// Each combination tests a different sycl::reduction implementation
179
192
// TODO: Enable range<> tests once parallel_for accepts pack
180
- /* test<16, int, std::plus<int>, sycl::range<1>>(Q, 24, 0, 1);
181
- test<16, float, std::plus<float>, sycl::range<1>>(Q, 24, 0, 1);
182
- test<16, int, std::multiplies<int>, sycl::range<1>>(Q, 24, 1, 2);
183
- test<16, CustomType, CustomBinaryOperation, sycl::range<1>>(Q, 24,
193
+ /* test<16, int, std::plus<int>, sycl::range<1>, submission_mode::handler>(Q,
194
+ 24, 0, 1); test<16, float, std::plus<float>, sycl::range<1>,
195
+ submission_mode::handler>(Q, 24, 0, 1); test<16, int, std::multiplies<int>,
196
+ sycl::range<1>, submission_mode::handler>(Q, 24, 1, 2); test<16, CustomType,
197
+ CustomBinaryOperation, sycl::range<1>, submission_mode::handler>(Q, 24,
198
+ CustomType{0}, CustomType{1});
199
+ test<16, int, std::plus<int>, sycl::range<1>, submission_mode::queue>(Q, 24,
200
+ 0, 1); test<16, float, std::plus<float>, sycl::range<1>,
201
+ submission_mode::queue>(Q, 24, 0, 1); test<16, int, std::multiplies<int>,
202
+ sycl::range<1>, submission_mode::queue>(Q, 24, 1, 2); test<16, CustomType,
203
+ CustomBinaryOperation, sycl::range<1>, submission_mode::queue>(Q, 24,
184
204
CustomType{0}, CustomType{1});*/
185
205
186
- test<16 , int , std::plus<int >, sycl::nd_range<1 >>(Q, {24 , 8 }, 0 , 1 );
187
- test<16 , float , std::plus<float >, sycl::nd_range<1 >>(Q, {24 , 8 }, 0 , 1 );
188
- test<16 , int , std::multiplies<int >, sycl::nd_range<1 >>(Q, {24 , 8 }, 1 , 2 );
189
- test<16 , int , std::bit_or<int >, sycl::nd_range<1 >>(Q, {24 , 8 }, 0 , 1 );
190
- test<16 , CustomType, CustomBinaryOperation, sycl::nd_range<1 >>(
191
- Q, {24 , 8 }, CustomType{0 }, CustomType{1 });
206
+ test<16 , int , std::plus<int >, sycl::nd_range<1 >, submission_mode::handler>(
207
+ Q, {24 , 8 }, 0 , 1 );
208
+ test<16 , float , std::plus<float >, sycl::nd_range<1 >,
209
+ submission_mode::handler>(Q, {24 , 8 }, 0 , 1 );
210
+ test<16 , int , std::multiplies<int >, sycl::nd_range<1 >,
211
+ submission_mode::handler>(Q, {24 , 8 }, 1 , 2 );
212
+ test<16 , int , std::bit_or<int >, sycl::nd_range<1 >, submission_mode::handler>(
213
+ Q, {24 , 8 }, 0 , 1 );
214
+ test<16 , CustomType, CustomBinaryOperation, sycl::nd_range<1 >,
215
+ submission_mode::handler>(Q, {24 , 8 }, CustomType{0 }, CustomType{1 });
216
+ test<16 , int , std::plus<int >, sycl::nd_range<1 >, submission_mode::queue>(
217
+ Q, {24 , 8 }, 0 , 1 );
218
+ test<16 , float , std::plus<float >, sycl::nd_range<1 >, submission_mode::queue>(
219
+ Q, {24 , 8 }, 0 , 1 );
220
+ test<16 , int , std::multiplies<int >, sycl::nd_range<1 >,
221
+ submission_mode::queue>(Q, {24 , 8 }, 1 , 2 );
222
+ test<16 , int , std::bit_or<int >, sycl::nd_range<1 >, submission_mode::queue>(
223
+ Q, {24 , 8 }, 0 , 1 );
224
+ test<16 , CustomType, CustomBinaryOperation, sycl::nd_range<1 >,
225
+ submission_mode::queue>(Q, {24 , 8 }, CustomType{0 }, CustomType{1 });
192
226
193
227
return NumErrors;
194
228
}
0 commit comments