@@ -225,6 +225,188 @@ bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) {
225
225
return Passed;
226
226
}
227
227
228
+ template <typename T, uint16_t N, uint16_t VS, bool UseMask, bool UsePassThru,
229
+ bool UseProperties, typename PropertiesT>
230
+ bool testACC (queue Q, uint32_t MaskStride, PropertiesT) {
231
+
232
+ static_assert (VS > 0 && N % VS == 0 ,
233
+ " Incorrect VS parameter. N must be divisible by VS." );
234
+ constexpr int NOffsets = N / VS;
235
+ static_assert (!UsePassThru || UseMask,
236
+ " PassThru cannot be used without using mask" );
237
+
238
+ uint32_t Groups = 8 ;
239
+ uint32_t Threads = 16 ;
240
+
241
+ std::cout << " Running case: T=" << esimd_test::type_name<T>() << " , N=" << N
242
+ << " , VS=" << VS << " , MaskStride=" << MaskStride
243
+ << " , Groups=" << Groups << " , Threads=" << Threads
244
+ << " , use_mask=" << UseMask << " , use_pass_thru=" << UsePassThru
245
+ << " , use_properties=" << UseProperties << std::endl;
246
+
247
+ uint16_t Size = Groups * Threads * N;
248
+ using Tuint = esimd_test::uint_type_t <sizeof (T)>;
249
+
250
+ sycl::range<1 > GlobalRange{Groups};
251
+ sycl::range<1 > LocalRange{Threads};
252
+ sycl::nd_range<1 > Range{GlobalRange * LocalRange, LocalRange};
253
+
254
+ T *Out = sycl::malloc_shared<T>(Size, Q);
255
+ std::memset (Out, 0 , Size * sizeof (T));
256
+
257
+ T *In = sycl::malloc_shared<T>(Size * 2 , Q);
258
+ for (int I = 0 ; I < Size; I++)
259
+ In[I] = esimd_test::getRandomValue<T>();
260
+
261
+ try {
262
+ buffer<T, 1 > InBuf (In, Size * 2 );
263
+ Q.submit ([&](handler &CGH) {
264
+ accessor InAcc{InBuf, CGH};
265
+ CGH.parallel_for (Range, [=](sycl::nd_item<1 > NDI) SYCL_ESIMD_KERNEL {
266
+ int GlobalID = NDI.get_global_id (0 );
267
+ PropertiesT Props{};
268
+
269
+ simd<OffsetT, NOffsets> ByteOffsets (GlobalID * N * sizeof (T),
270
+ VS * sizeof (T));
271
+ simd_view ByteOffsetsView = ByteOffsets.template select <NOffsets, 1 >();
272
+
273
+ simd_mask<NOffsets> Pred;
274
+ for (int I = 0 ; I < NOffsets; I++)
275
+ Pred[I] = (I % MaskStride == 0 ) ? 1 : 0 ;
276
+
277
+ using Tuint = esimd_test::uint_type_t <sizeof (T)>;
278
+ simd<Tuint, N> PassThruInt (GlobalID * N, 1 );
279
+ simd<T, N> PassThru = PassThruInt.template bit_cast_view <T>();
280
+ auto PassThruView = PassThru.template select <N, 1 >(0 );
281
+
282
+ simd<T, N> Vals;
283
+ if constexpr (VS > 1 ) { // VS > 1 requires specifying <T, N, VS>
284
+ if constexpr (UsePassThru) {
285
+ if constexpr (UseProperties) {
286
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
287
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThru,
288
+ Props);
289
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
290
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThruView,
291
+ Props);
292
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
293
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, PassThru,
294
+ Props);
295
+ else // ByteOffset - view, PassThru - view
296
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred,
297
+ PassThruView, Props);
298
+ } else { // UseProperties is false
299
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
300
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThru);
301
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
302
+ Vals =
303
+ gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThruView);
304
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
305
+ Vals =
306
+ gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, PassThru);
307
+ else // ByteOffset - view, PassThru - view
308
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred,
309
+ PassThruView);
310
+ }
311
+ } else if constexpr (UseMask) { // UsePassThru is false
312
+ if constexpr (UseProperties) {
313
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
314
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, Props);
315
+ else // ByteOffset - simd_view
316
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, Props);
317
+ } else { // UseProperties is false
318
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
319
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred);
320
+ else // ByteOffset - simd_view
321
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred);
322
+ }
323
+ } else { // UseMask is false, UsePassThru is false
324
+ if constexpr (UseProperties) {
325
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
326
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets, Props);
327
+ else // ByteOffset - simd_view
328
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Props);
329
+ } else { // UseProperties is false
330
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
331
+ Vals = gather<T, N, VS>(InAcc, ByteOffsets);
332
+ else // ByteOffset - simd_view
333
+ Vals = gather<T, N, VS>(InAcc, ByteOffsetsView);
334
+ }
335
+ }
336
+ } else {
337
+ // if (VS == 1) then <T, N, VS> can often be omitted - test it here.
338
+ // The variants accepting simd_view for 'PassThru' operand though
339
+ // still require <T, N> to be specified explicitly to help
340
+ // C++ FE do simd to simd_view matching.
341
+ if constexpr (UsePassThru) {
342
+ if constexpr (UseProperties) {
343
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
344
+ Vals = gather<T>(InAcc, ByteOffsets, Pred, PassThru, Props);
345
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
346
+ Vals = gather<T, N>(InAcc, ByteOffsets, Pred, PassThruView,
347
+ Props);
348
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
349
+ Vals = gather (InAcc, ByteOffsetsView, Pred, PassThru, Props);
350
+ else // ByteOffset - view, PassThru - view
351
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThruView,
352
+ Props);
353
+ } else { // UseProperties is false
354
+ if (GlobalID % 4 == 0 ) // ByteOffset - simd, PassThru - simd
355
+ Vals = gather (InAcc, ByteOffsets, Pred, PassThru);
356
+ else if (GlobalID % 4 == 1 ) // ByteOffset - simd, PassThru - view
357
+ Vals = gather<T, N>(InAcc, ByteOffsets, Pred, PassThruView);
358
+ else if (GlobalID % 4 == 2 ) // ByteOffset - view, PassThru - simd
359
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThru);
360
+ else // ByteOffset - view, PassThru - view
361
+ Vals =
362
+ gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThruView);
363
+ }
364
+ } else if constexpr (UseMask) { // UsePassThru is false
365
+ if constexpr (UseProperties) {
366
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
367
+ Vals = gather<T>(InAcc, ByteOffsets, Pred, Props);
368
+ else // ByteOffset - simd_view
369
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, Props);
370
+ } else { // UseProperties is false
371
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
372
+ Vals = gather<T>(InAcc, ByteOffsets, Pred);
373
+ else // ByteOffset - simd_view
374
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred);
375
+ }
376
+ } else { // UsePassThru is false, UseMask is false
377
+ if constexpr (UseProperties) {
378
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
379
+ Vals = gather<T>(InAcc, ByteOffsets, Props);
380
+ else // ByteOffset - simd_view
381
+ Vals = gather<T, N>(InAcc, ByteOffsetsView, Props);
382
+ } else {
383
+ if (GlobalID % 2 == 0 ) // ByteOffset - simd
384
+ Vals = gather<T>(InAcc, ByteOffsets);
385
+ else // ByteOffset - simd_view
386
+ Vals = gather<T, N>(InAcc, ByteOffsetsView);
387
+ }
388
+ }
389
+ } // end if (VS == 1)
390
+ Vals.copy_to (Out + GlobalID * N);
391
+ // scatter(Out, ByteOffsets.template select<NOffsets, 1>(), Vals);
392
+ });
393
+ }).wait ();
394
+ } catch (sycl::exception const &e) {
395
+ std::cout << " SYCL exception caught: " << e.what () << ' \n ' ;
396
+ sycl::free (In, Q);
397
+ sycl::free (Out, Q);
398
+ return false ;
399
+ }
400
+
401
+ bool Passed = verify (In, Out, N, Size, VS, MaskStride, UseMask, UsePassThru);
402
+ if (!Passed)
403
+ std::cout << " Case FAILED" << std::endl;
404
+
405
+ sycl::free (In, Q);
406
+ sycl::free (Out, Q);
407
+ return Passed;
408
+ }
409
+
228
410
template <typename T, TestFeatures Features> bool testUSM (queue Q) {
229
411
constexpr bool UseMask = true ;
230
412
constexpr bool UsePassThru = true ;
@@ -286,3 +468,61 @@ template <typename T, TestFeatures Features> bool testUSM(queue Q) {
286
468
}
287
469
return Passed;
288
470
}
471
+
472
+ template <typename T, TestFeatures Features> bool testACC (queue Q) {
473
+ constexpr bool UseMask = true ;
474
+ constexpr bool UsePassThru = true ;
475
+ constexpr bool UseProperties = true ;
476
+
477
+ properties AlignElemProps{alignment<sizeof (T)>};
478
+
479
+ bool Passed = true ;
480
+ Passed &= testACC<T, 1 , 1 , !UseMask, !UsePassThru, !UseProperties>(
481
+ Q, 2 , AlignElemProps);
482
+ #ifdef __ESIMD_FORCE_STATELESS_MEM
483
+ Passed &= testACC<T, 2 , 1 , UseMask, !UsePassThru, !UseProperties>(
484
+ Q, 2 , AlignElemProps);
485
+ Passed &= testACC<T, 4 , 1 , UseMask, !UsePassThru, !UseProperties>(
486
+ Q, 2 , AlignElemProps);
487
+ #endif // __ESIMD_FORCE_STATELESS_MEM
488
+ Passed &= testACC<T, 8 , 1 , UseMask, !UsePassThru, !UseProperties>(
489
+ Q, 3 , AlignElemProps);
490
+ Passed &= testACC<T, 16 , 1 , UseMask, !UsePassThru, UseProperties>(
491
+ Q, 2 , AlignElemProps);
492
+ Passed &= testACC<T, 32 , 1 , UseMask, !UsePassThru, !UseProperties>(
493
+ Q, 3 , AlignElemProps);
494
+
495
+ if constexpr (Features == TestFeatures::PVC ||
496
+ Features == TestFeatures::DG2) {
497
+ properties LSCProps{cache_hint_L1<cache_hint::streaming>,
498
+ cache_hint_L2<cache_hint::cached>,
499
+ alignment<sizeof (T)>};
500
+ Passed &=
501
+ testACC<T, 1 , 1 , !UseMask, !UsePassThru, UseProperties>(Q, 2 , LSCProps);
502
+ Passed &=
503
+ testACC<T, 2 , 1 , UseMask, !UsePassThru, UseProperties>(Q, 2 , LSCProps);
504
+ Passed &=
505
+ testACC<T, 4 , 1 , UseMask, UsePassThru, UseProperties>(Q, 2 , LSCProps);
506
+ Passed &=
507
+ testACC<T, 8 , 1 , UseMask, UsePassThru, UseProperties>(Q, 3 , LSCProps);
508
+
509
+ Passed &=
510
+ testACC<T, 32 , 1 , UseMask, UsePassThru, UseProperties>(Q, 2 , LSCProps);
511
+
512
+ // Check VS > 1. GPU supports only dwords and qwords in this mode.
513
+ if constexpr (sizeof (T) >= 4 ) {
514
+ // TODO: This test case causes flaky fail. Enable it after the issue
515
+ // in GPU driver is fixed.
516
+ // Passed &= testACC<T, 16, 2, UseMask, !UsePassThru, UseProperties>(
517
+ // Q, 3, AlignElemProps);
518
+
519
+ Passed &= testACC<T, 32 , 2 , !UseMask, !UsePassThru, UseProperties>(
520
+ Q, 3 , AlignElemProps);
521
+ Passed &= testACC<T, 32 , 2 , UseMask, !UsePassThru, UseProperties>(
522
+ Q, 3 , AlignElemProps);
523
+ Passed &= testACC<T, 32 , 2 , UseMask, UsePassThru, UseProperties>(
524
+ Q, 3 , AlignElemProps);
525
+ }
526
+ }
527
+ return Passed;
528
+ }
0 commit comments