@@ -178,96 +178,101 @@ sycl::event operation(sycl::queue q) {
178
178
}
179
179
180
180
int main (int argc, char *argv[]) {
181
- size_t count = 100 ;
181
+ try {
182
+ size_t count = 100 ;
182
183
183
- int size = 0 ;
184
- int rank = 0 ;
184
+ int size = 0 ;
185
+ int rank = 0 ;
185
186
186
- size_t num_iters = 20 ;
187
- size_t kernel_num = 3 ;
187
+ size_t num_iters = 20 ;
188
+ size_t kernel_num = 3 ;
188
189
189
- if (argc > 1 )
190
- kernel_num = atoi (argv[1 ]);
191
- if (argc > 2 )
192
- count = atoi (argv[2 ]);
193
- if (argc > 3 )
194
- num_iters = atoi (argv[3 ]);
190
+ if (argc > 1 )
191
+ kernel_num = atoi (argv[1 ]);
192
+ if (argc > 2 )
193
+ count = atoi (argv[2 ]);
194
+ if (argc > 3 )
195
+ num_iters = atoi (argv[3 ]);
195
196
196
- size_t byte_count = count * 4 ;
197
+ size_t byte_count = count * 4 ;
197
198
198
- sycl::property_list props{sycl::property::queue::in_order{},
199
- sycl::property::queue::enable_profiling{}};
200
- sycl::queue q{props};
199
+ sycl::property_list props{sycl::property::queue::in_order{},
200
+ sycl::property::queue::enable_profiling{}};
201
+ sycl::queue q{props};
201
202
202
- init ();
203
+ init ();
203
204
204
- // Store allocated mem ptrs to free them later
205
- std::vector<std::pair<float *, float *>> ptrs (kernel_num);
206
- // allocate all the buffers
207
- for (size_t i = 0 ; i < kernel_num; i++) {
208
- float *weight_buf = (float *)sycl::malloc_device (byte_count, q);
209
- float *weight_allreduce_buf = (float *)sycl::malloc_device (byte_count, q);
210
- ptrs[i] = {weight_buf, weight_allreduce_buf};
211
- }
205
+ // Store allocated mem ptrs to free them later
206
+ std::vector<std::pair<float *, float *>> ptrs (kernel_num);
207
+ // allocate all the buffers
208
+ for (size_t i = 0 ; i < kernel_num; i++) {
209
+ float *weight_buf = (float *)sycl::malloc_device (byte_count, q);
210
+ float *weight_allreduce_buf = (float *)sycl::malloc_device (byte_count, q);
211
+ ptrs[i] = {weight_buf, weight_allreduce_buf};
212
+ }
212
213
213
- std::vector<std::tuple<sycl::event, sycl::event>> kernel_events (num_iters *
214
- kernel_num);
214
+ std::vector<std::tuple<sycl::event, sycl::event>> kernel_events (num_iters *
215
+ kernel_num);
215
216
216
- std::vector<sycl::event> barrier_events;
217
+ std::vector<sycl::event> barrier_events;
217
218
218
- std::thread worker_thread (worker);
219
+ std::thread worker_thread (worker);
219
220
220
- for (size_t i = 0 ; i < num_iters; ++i) {
221
- std::cout << " Running iteration " << i << std::endl;
221
+ for (size_t i = 0 ; i < num_iters; ++i) {
222
+ std::cout << " Running iteration " << i << std::endl;
222
223
223
- for (size_t j = 0 ; j < kernel_num; j++) {
224
- size_t num = i * kernel_num + j;
225
- float *weight_buf = ptrs[j].first ;
226
- float *weight_allreduce_buf = ptrs[j].second ;
224
+ for (size_t j = 0 ; j < kernel_num; j++) {
225
+ size_t num = i * kernel_num + j;
226
+ float *weight_buf = ptrs[j].first ;
227
+ float *weight_allreduce_buf = ptrs[j].second ;
227
228
228
- // Step1: FWK kernel submission
229
- sycl::event submit_event;
230
- if (i == 0 ) {
231
- submit_event = q.submit ([&](auto &h) {
232
- h.parallel_for (count, [=](auto id) {
233
- // Initial weight in first iteration
234
- weight_buf[id] = j * (rank + 1 );
229
+ // Step1: FWK kernel submission
230
+ sycl::event submit_event;
231
+ if (i == 0 ) {
232
+ submit_event = q.submit ([&](auto &h) {
233
+ h.parallel_for (count, [=](auto id) {
234
+ // Initial weight in first iteration
235
+ weight_buf[id] = j * (rank + 1 );
236
+ });
235
237
});
236
- });
237
- } else {
238
- submit_event = q. submit ([& ](auto &h ) {
239
- h. parallel_for (count, [=]( auto id) {
240
- // Make weight differ in each iteration
241
- weight_buf[id] = weight_buf[id] + (j * (rank + 1 ) );
238
+ } else {
239
+ submit_event = q. submit ([&]( auto &h) {
240
+ h. parallel_for (count, [= ](auto id ) {
241
+ // Make weight differ in each iteration
242
+ weight_buf[id] = weight_buf[id] + (j * (rank + 1 ));
243
+ } );
242
244
});
243
- });
244
- }
245
+ }
245
246
246
- barrier_events.push_back (operation (q));
247
+ barrier_events.push_back (operation (q));
247
248
248
- // Step3: Weight update
249
- auto update_event = q.submit ([&](auto &h) {
250
- h.parallel_for (count, [=](auto id) {
251
- // Update weight in each iteration
252
- weight_buf[id] = weight_allreduce_buf[id] * 0.5 ;
249
+ // Step3: Weight update
250
+ auto update_event = q.submit ([&](auto &h) {
251
+ h.parallel_for (count, [=](auto id) {
252
+ // Update weight in each iteration
253
+ weight_buf[id] = weight_allreduce_buf[id] * 0 .5f ;
254
+ });
253
255
});
254
- });
255
256
256
- kernel_events[num] = {submit_event, update_event};
257
+ kernel_events[num] = {submit_event, update_event};
258
+ }
259
+ q.wait ();
257
260
}
258
- q.wait ();
259
- }
260
261
261
- // Make sure there is no exceptions in the queue
262
- q.wait_and_throw ();
262
+ // Make sure there is no exceptions in the queue
263
+ q.wait_and_throw ();
263
264
264
- for (auto p : ptrs) {
265
- sycl::free (p.first , q);
266
- sycl::free (p.second , q);
267
- }
265
+ for (auto p : ptrs) {
266
+ sycl::free (p.first , q);
267
+ sycl::free (p.second , q);
268
+ }
268
269
269
- stop_worker = true ;
270
- cv.notify_all ();
271
- worker_thread.join ();
270
+ stop_worker = true ;
271
+ cv.notify_all ();
272
+ worker_thread.join ();
273
+ } catch (std::exception &E) {
274
+ std::cout << E.what () << std::endl;
275
+ return 1 ;
276
+ }
272
277
return 0 ;
273
278
}
0 commit comments