@@ -207,7 +207,188 @@ bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) {
207
207
}
208
208
} // end if (VS == 1)
209
209
Vals.copy_to (Out + GlobalID * N);
210
- // scatter(Out, ByteOffsets.template select<NOffsets, 1>(), Vals);
210
+ }).wait ();
211
+ } catch (sycl::exception const &e) {
212
+ std::cout << " SYCL exception caught: " << e.what () << ' \n ' ;
213
+ sycl::free (In, Q);
214
+ sycl::free (Out, Q);
215
+ return false ;
216
+ }
217
+
218
+ bool Passed = verify (In, Out, N, Size, VS, MaskStride, UseMask, UsePassThru);
219
+ if (!Passed)
220
+ std::cout << " Case FAILED" << std::endl;
221
+
222
+ sycl::free (In, Q);
223
+ sycl::free (Out, Q);
224
+ return Passed;
225
+ }
226
+
227
+ template <typename T, uint16_t N, uint16_t VS, bool UseMask, bool UsePassThru,
228
+ bool UseProperties, typename PropertiesT>
229
+ bool testACC (queue Q, uint32_t MaskStride, PropertiesT) {
230
+
231
+ static_assert (VS > 0 && N % VS == 0 ,
232
+ " Incorrect VS parameter. N must be divisible by VS." );
233
+ constexpr int NOffsets = N / VS;
234
+ static_assert (!UsePassThru || UseMask,
235
+ " PassThru cannot be used without using mask" );
236
+
237
+ uint32_t Groups = 8 ;
238
+ uint32_t Threads = 16 ;
239
+
240
+ std::cout << " Running case: T=" << esimd_test::type_name<T>() << " , N=" << N
241
+ << " , VS=" << VS << " , MaskStride=" << MaskStride
242
+ << " , Groups=" << Groups << " , Threads=" << Threads
243
+ << " , use_mask=" << UseMask << " , use_pass_thru=" << UsePassThru
244
+ << " , use_properties=" << UseProperties << std::endl;
245
+
246
+ uint16_t Size = Groups * Threads * N;
247
+ using Tuint = esimd_test::uint_type_t <sizeof (T)>;
248
+
249
+ sycl::range<1 > GlobalRange{Groups};
250
+ sycl::range<1 > LocalRange{Threads};
251
+ sycl::nd_range<1 > Range{GlobalRange * LocalRange, LocalRange};
252
+
253
+ T *Out = sycl::malloc_shared<T>(Size, Q);
254
+ std::memset (Out, 0 , Size * sizeof (T));
255
+
256
+ T *In = sycl::malloc_shared<T>(Size * 2 , Q);
257
+ for (int I = 0 ; I < Size; I++)
258
+ In[I] = esimd_test::getRandomValue<T>();
259
+
260
+ try {
261
+ buffer<T, 1 > InBuf (In, Size * 2 );
262
+ Q.submit ([&](handler &CGH) {
263
+ accessor InAcc{InBuf, CGH};
264
+ CGH.parallel_for (Range, [=](sycl::nd_item<1 > NDI) SYCL_ESIMD_KERNEL {
265
+ int GlobalID = NDI.get_global_id (0 );
266
+ PropertiesT Props{};
267
+
268
+ simd<OffsetT, NOffsets> ByteOffsets (GlobalID * N * sizeof (T),
269
+ VS * sizeof (T));
270
+ simd_view ByteOffsetsView = ByteOffsets.template select <NOffsets, 1 >();
271
+
272
+ simd_mask<NOffsets> Pred;
273
+ for (int I = 0 ; I < NOffsets; I++)
274
+ Pred[I] = (I % MaskStride == 0 ) ? 1 : 0 ;
275
+
276
+ using Tuint = esimd_test::uint_type_t <sizeof (T)>;
277
+ simd<Tuint, N> PassThruInt (GlobalID * N, 1 );
278
+ simd<T, N> PassThru = PassThruInt.template bit_cast_view <T>();
279
+ auto PassThruView = PassThru.template select <N, 1 >(0 );
280
+
281
+ simd<T, N> Vals;
282
+ if constexpr (VS > 1 ) { // VS > 1 requires specifying <T, N, VS>
283
+ if constexpr (UsePassThru) {
284
+ if constexpr (UseProperties) {
285
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
286
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThru,
287
+ Props);
288
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
289
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThruView,
290
+ Props);
291
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
292
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, PassThru,
293
+ Props);
294
+ else // ByteOffset - view, PassThru - view
295
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred,
296
+ PassThruView, Props);
297
+ } else { // UseProperties is false
298
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
299
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThru);
300
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
301
+ Vals =
302
+ gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThruView);
303
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
304
+ Vals =
305
+ gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, PassThru);
306
+ else // ByteOffset - view, PassThru - view
307
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred,
308
+ PassThruView);
309
+ }
310
+ } else if constexpr (UseMask) { // UsePassThru is false
311
+ if constexpr (UseProperties) {
312
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
313
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, Props);
314
+ else // ByteOffset - simd_view
315
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, Props);
316
+ } else { // UseProperties is false
317
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
318
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred);
319
+ else // ByteOffset - simd_view
320
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred);
321
+ }
322
+ } else { // UseMask is false, UsePassThru is false
323
+ if constexpr (UseProperties) {
324
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
325
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Props);
326
+ else // ByteOffset - simd_view
327
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Props);
328
+ } else { // UseProperties is false
329
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
330
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets);
331
+ else // ByteOffset - simd_view
332
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView);
333
+ }
334
+ }
335
+ } else {
336
+ // if (VS == 1) then <T, N, VS> can often be omitted - test it here.
337
+ // The variants accepting simd_view for 'PassThru' operand though
338
+ // still require <T, N> to be specified explicitly to help
339
+ // C++ FE do simd to simd_view matching.
340
+ if constexpr (UsePassThru) {
341
+ if constexpr (UseProperties) {
342
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
343
+ Vals = gather<T>(InAcc, ByteOffsets, Pred, PassThru, Props);
344
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
345
+ Vals = gather<T, N>(InAcc, ByteOffsets, Pred, PassThruView,
346
+ Props);
347
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
348
+ Vals = gather (InAcc, ByteOffsetsView, Pred, PassThru, Props);
349
+ else // ByteOffset - view, PassThru - view
350
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThruView,
351
+ Props);
352
+ } else { // UseProperties is false
353
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
354
+ Vals = gather (InAcc, ByteOffsets, Pred, PassThru);
355
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
356
+ Vals = gather<T, N>(InAcc, ByteOffsets, Pred, PassThruView);
357
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
358
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThru);
359
+ else // ByteOffset - view, PassThru - view
360
+ Vals =
361
+ gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThruView);
362
+ }
363
+ } else if constexpr (UseMask) { // UsePassThru is false
364
+ if constexpr (UseProperties) {
365
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
366
+ Vals = gather<T>(InAcc, ByteOffsets, Pred, Props);
367
+ else // ByteOffset - simd_view
368
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, Props);
369
+ } else { // UseProperties is false
370
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
371
+ Vals = gather<T>(InAcc, ByteOffsets, Pred);
372
+ else // ByteOffset - simd_view
373
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred);
374
+ }
375
+ } else { // UsePassThru is false, UseMask is false
376
+ if constexpr (UseProperties) {
377
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
378
+ Vals = gather<T>(InAcc, ByteOffsets, Props);
379
+ else // ByteOffset - simd_view
380
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Props);
381
+ } else {
382
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
383
+ Vals = gather<T>(InAcc, ByteOffsets);
384
+ else // ByteOffset - simd_view
385
+ Vals = gather<T, N>(InAcc, ByteOffsetsView);
386
+ }
387
+ }
388
+ } // end if (VS == 1)
389
+ Vals.copy_to (Out + GlobalID * N);
390
+ // scatter(Out, ByteOffsets.template select<NOffsets, 1>(), Vals);
391
+ });
211
392
}).wait ();
212
393
} catch (sycl::exception const &e) {
213
394
std::cout << " SYCL exception caught: " << e.what () << ' \n ' ;
@@ -286,3 +467,61 @@ template <typename T, TestFeatures Features> bool testUSM(queue Q) {
286
467
}
287
468
return Passed;
288
469
}
470
+
471
+ template <typename T, TestFeatures Features> bool testACC (queue Q) {
472
+ constexpr bool UseMask = true ;
473
+ constexpr bool UsePassThru = true ;
474
+ constexpr bool UseProperties = true ;
475
+
476
+ properties AlignElemProps{alignment<sizeof (T)>};
477
+
478
+ bool Passed = true ;
479
+ Passed &= testACC<T, 1 , 1 , !UseMask, !UsePassThru, !UseProperties>(
480
+ Q, 2 , AlignElemProps);
481
+ #ifdef __ESIMD_FORCE_STATELESS_MEM
482
+ Passed &= testACC<T, 2 , 1 , UseMask, !UsePassThru, !UseProperties>(
483
+ Q, 2 , AlignElemProps);
484
+ Passed &= testACC<T, 4 , 1 , UseMask, !UsePassThru, !UseProperties>(
485
+ Q, 2 , AlignElemProps);
486
+ #endif // __ESIMD_FORCE_STATELESS_MEM
487
+ Passed &= testACC<T, 8 , 1 , UseMask, !UsePassThru, !UseProperties>(
488
+ Q, 3 , AlignElemProps);
489
+ Passed &= testACC<T, 16 , 1 , UseMask, !UsePassThru, UseProperties>(
490
+ Q, 2 , AlignElemProps);
491
+ Passed &= testACC<T, 32 , 1 , UseMask, !UsePassThru, !UseProperties>(
492
+ Q, 3 , AlignElemProps);
493
+
494
+ if constexpr (Features == TestFeatures::PVC ||
495
+ Features == TestFeatures::DG2) {
496
+ properties LSCProps{cache_hint_L1<cache_hint::streaming>,
497
+ cache_hint_L2<cache_hint::cached>,
498
+ alignment<sizeof (T)>};
499
+ Passed &=
500
+ testACC<T, 1 , 1 , !UseMask, !UsePassThru, UseProperties>(Q, 2 , LSCProps);
501
+ Passed &=
502
+ testACC<T, 2 , 1 , UseMask, !UsePassThru, UseProperties>(Q, 2 , LSCProps);
503
+ Passed &=
504
+ testACC<T, 4 , 1 , UseMask, UsePassThru, UseProperties>(Q, 2 , LSCProps);
505
+ Passed &=
506
+ testACC<T, 8 , 1 , UseMask, UsePassThru, UseProperties>(Q, 3 , LSCProps);
507
+
508
+ Passed &=
509
+ testACC<T, 32 , 1 , UseMask, UsePassThru, UseProperties>(Q, 2 , LSCProps);
510
+
511
+ // Check VS > 1. GPU supports only dwords and qwords in this mode.
512
+ if constexpr (sizeof (T) >= 4 ) {
513
+ // TODO: This test case causes flaky fail. Enable it after the issue
514
+ // in GPU driver is fixed.
515
+ // Passed &= testACC<T, 16, 2, UseMask, !UsePassThru, UseProperties>(
516
+ // Q, 3, AlignElemProps);
517
+
518
+ Passed &= testACC<T, 32 , 2 , !UseMask, !UsePassThru, UseProperties>(
519
+ Q, 3 , AlignElemProps);
520
+ Passed &= testACC<T, 32 , 2 , UseMask, !UsePassThru, UseProperties>(
521
+ Q, 3 , AlignElemProps);
522
+ Passed &= testACC<T, 32 , 2 , UseMask, UsePassThru, UseProperties>(
523
+ Q, 3 , AlignElemProps);
524
+ }
525
+ }
526
+ return Passed;
527
+ }
0 commit comments