8
8
// This test checks LSC atomic operations.
9
9
// ===----------------------------------------------------------------------===//
10
10
// REQUIRES: gpu-intel-pvc
11
- // UNSUPPORTED: cuda || hip
12
- // RUN: %clangxx -fsycl -DUSE_LSC_ATOMICS %s -o %t.out
11
+ // TODO: esimd_emulator fails due to unsupported __esimd_svm_atomic0/1/2
12
+ // XFAIL: esimd_emulator
13
+ // RUN: %clangxx -fsycl %s -o %t.out
13
14
// RUN: %GPU_RUN_PLACEHOLDER %t.out
14
15
15
16
#include " ../esimd_test_utils.hpp"
18
19
#include < iostream>
19
20
#include < sycl/ext/intel/esimd.hpp>
20
21
21
- #ifdef UNDEF_USE_LSC_ATOMICS
22
- #undef USE_LSC_ATOMICS
23
- #endif
24
-
25
22
using namespace sycl ;
26
23
using namespace sycl ::ext::intel::esimd;
27
24
using namespace sycl ::ext::intel::experimental::esimd;
@@ -39,7 +36,7 @@ struct Config {
39
36
#define PREFER_FULL_BARRIER 0
40
37
#endif // PREFER_FULL_BARRIER
41
38
42
- #if PREFER_FULL_BARRIER && defined(USE_LSC_ATOMICS )
39
+ #if PREFER_FULL_BARRIER && ! defined(USE_DWORD_ATOMICS )
43
40
#define USE_FULL_BARRIER 1
44
41
#else
45
42
#define USE_FULL_BARRIER 0
@@ -59,20 +56,20 @@ using LSCAtomicOp = sycl::ext::intel::esimd::native::lsc::atomic_op;
59
56
using DWORDAtomicOp = sycl::ext::intel::esimd::atomic_op;
60
57
61
58
// This macro selects between DWORD ("legacy") and LSC-based atomics.
62
- #ifdef USE_LSC_ATOMICS
63
- using AtomicOp = LSCAtomicOp;
64
- constexpr char MODE[] = " LSC" ;
65
- #else
59
+ #ifdef USE_DWORD_ATOMICS
66
60
using AtomicOp = DWORDAtomicOp;
67
61
constexpr char MODE[] = " DWORD" ;
68
- #endif // USE_LSC_ATOMICS
62
+ #else
63
+ using AtomicOp = LSCAtomicOp;
64
+ constexpr char MODE[] = " LSC" ;
65
+ #endif // USE_DWORD_ATOMICS
69
66
70
- #ifdef USE_LSC_ATOMICS
67
+ #ifndef USE_DWORD_ATOMICS
71
68
uint32_t atomic_load (uint32_t *addr) {
72
69
auto v = atomic_update<LSCAtomicOp::load, uint32_t , 1 >(addr, 0 , 1 );
73
70
return v[0 ];
74
71
}
75
- #endif // USE_LSC_ATOMICS
72
+ #endif // USE_DWORD_ATOMICS
76
73
77
74
template <class , int , template <class , int > class > class TestID ;
78
75
@@ -415,7 +412,7 @@ struct ImplSMax : ImplMax<T, N, DWORDAtomicOp, DWORDAtomicOp::smax> {};
415
412
template <class T , int N>
416
413
struct ImplUMax : ImplMax<T, N, DWORDAtomicOp, DWORDAtomicOp::umax> {};
417
414
418
- #ifdef USE_LSC_ATOMICS
415
+ #ifndef USE_DWORD_ATOMICS
419
416
// These will be redirected by API implementation to LSC ones:
420
417
template <class T , int N>
421
418
struct ImplFadd : ImplAdd<T, N, DWORDAtomicOp, DWORDAtomicOp::fadd> {};
@@ -434,7 +431,7 @@ template <class T, int N>
434
431
struct ImplLSCFmin : ImplMin<T, N, LSCAtomicOp, LSCAtomicOp::fmin> {};
435
432
template <class T , int N>
436
433
struct ImplLSCFmax : ImplMax<T, N, LSCAtomicOp, LSCAtomicOp::fmax> {};
437
- #endif // USE_LSC_ATOMICS
434
+ #endif // USE_DWORD_ATOMICS
438
435
439
436
template <class T , int N, class C , C Op> struct ImplCmpxchgBase {
440
437
static constexpr C atomic_op = Op;
@@ -461,7 +458,7 @@ template <class T, int N>
461
458
struct ImplCmpxchg
462
459
: ImplCmpxchgBase<T, N, DWORDAtomicOp, DWORDAtomicOp::cmpxchg> {};
463
460
464
- #ifdef USE_LSC_ATOMICS
461
+ #ifndef USE_DWORD_ATOMICS
465
462
// This will be redirected by API implementation to LSC one:
466
463
template <class T , int N>
467
464
struct ImplFcmpwr
@@ -470,7 +467,7 @@ struct ImplFcmpwr
470
467
template <class T , int N>
471
468
struct ImplLSCFcmpwr
472
469
: ImplCmpxchgBase<T, N, LSCAtomicOp, LSCAtomicOp::fcmpxchg> {};
473
- #endif // USE_LSC_ATOMICS
470
+ #endif // USE_DWORD_ATOMICS
474
471
475
472
// ----------------- Main function and test combinations.
476
473
@@ -500,6 +497,7 @@ int main(void) {
500
497
};
501
498
502
499
bool passed = true ;
500
+ #ifndef CMPXCHG_TEST
503
501
// Template params:
504
502
// - element type, simd size, threads per group, num groups, atomic op,
505
503
// verification function, argument generation functions...
@@ -525,7 +523,7 @@ int main(void) {
525
523
526
524
// TODO: add other operations
527
525
528
- #ifdef USE_LSC_ATOMICS
526
+ #ifndef USE_DWORD_ATOMICS
529
527
passed &= test<float , 8 , ImplFadd>(q, cfg);
530
528
passed &= test<float , 8 , ImplFsub>(q, cfg);
531
529
passed &= test<float , 16 , ImplFadd>(q, cfg);
@@ -545,15 +543,19 @@ int main(void) {
545
543
passed &= test<float , 16 , ImplLSCFmin>(q, cfg);
546
544
passed &= test<float , 16 , ImplLSCFmax>(q, cfg);
547
545
passed &= test<float , 32 , ImplLSCFmin>(q, cfg);
548
- #endif // USE_LSC_ATOMICS
549
-
546
+ #endif // USE_DWORD_ATOMICS
547
+ # else // CMPXCHG_TEST
550
548
// Can't easily reset input to initial state, so just 1 iteration for CAS.
551
549
cfg.repeat = 1 ;
550
+ // Decrease number of threads to reduce risk of halting kernel by the driver.
551
+ cfg.n_groups = 7 ;
552
+ cfg.threads_per_group = 3 ;
552
553
passed &= test_int_types<8 , ImplCmpxchg>(q, cfg);
553
- #ifdef USE_LSC_ATOMICS
554
+ #ifndef USE_DWORD_ATOMICS
554
555
passed &= test<float , 8 , ImplFcmpwr>(q, cfg);
555
556
passed &= test<float , 8 , ImplLSCFcmpwr>(q, cfg);
556
- #endif // USE_LSC_ATOMICS
557
+ #endif // USE_DWORD_ATOMICS
558
+ #endif // CMPXCHG_TEST
557
559
// TODO: check double other vector lengths in LSC mode.
558
560
559
561
std::cout << (passed ? " Passed\n " : " FAILED\n " );
0 commit comments