Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 6393422

Browse files
authored
[ESIMD] Separate out cmpxchg tests which can take more time to execute. (#1227)
Also decrease number of threads to reduce risk of halting kernel by the driver. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 6c5ca79 commit 6393422

File tree

4 files changed

+74
-29
lines changed

4 files changed

+74
-29
lines changed

SYCL/ESIMD/dword_atomic_cmpxchg.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//==---------------- dword_atomic_smoke.cpp - DPC++ ESIMD on-device test --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This test checks DWORD compare-and-exchange atomic operations.
9+
//===----------------------------------------------------------------------===//
10+
// REQUIRES: gpu
11+
// UNSUPPORTED: cuda || hip
12+
// TODO: esimd_emulator fails due to unsupported __esimd_svm_atomic0/1/2
13+
// XFAIL: esimd_emulator
14+
// RUN: %clangxx -fsycl %s -o %t.out
15+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
16+
17+
// This macro enables only cmpxch tests. They may require more time to execute,
18+
// and have higher probablity to hit kernel execution time limit, so they are
19+
// separated.
20+
#define CMPXCHG_TEST
21+
22+
// This macro enforces usage of dword atomics in the included test.
23+
#define USE_DWORD_ATOMICS
24+
25+
#include "lsc/atomic_smoke.cpp"

SYCL/ESIMD/dword_atomic_smoke.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,12 @@
99
//===----------------------------------------------------------------------===//
1010
// REQUIRES: gpu
1111
// UNSUPPORTED: cuda || hip
12-
//
1312
// TODO: esimd_emulator fails due to unsupported __esimd_svm_atomic0/1/2
14-
// TODO: fails on a regular gpu with
15-
// "SYCL exception caught: Native API failed. Native API returns: -1"
16-
// REQUIRES: TEMPORARY_DISABLED
13+
// XFAIL: esimd_emulator
1714
// RUN: %clangxx -fsycl %s -o %t.out
1815
// RUN: %GPU_RUN_PLACEHOLDER %t.out
1916

20-
// This macro disables usage of LSC atomics in the included test.
21-
#define UNDEF_USE_LSC_ATOMICS
17+
// This macro enforces usage of dword atomics in the included test.
18+
#define USE_DWORD_ATOMICS
2219

2320
#include "lsc/atomic_smoke.cpp"

SYCL/ESIMD/lsc/atomic_cmpxchg.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//==---------------- atomic_cmpxchg.cpp - DPC++ ESIMD on-device test ------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This test checks LSC compare-and-exchange atomic operations.
9+
//===----------------------------------------------------------------------===//
10+
// REQUIRES: gpu-intel-pvc
11+
// TODO: esimd_emulator fails due to unsupported __esimd_svm_atomic0/1/2
12+
// XFAIL: esimd_emulator
13+
// RUN: %clangxx -fsycl %s -o %t.out
14+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
15+
16+
// This macro enables only cmpxch tests. They may require more time to execute,
17+
// and have higher probablity to hit kernel execution time limit, so they are
18+
// separated.
19+
#define CMPXCHG_TEST
20+
21+
#include "lsc/atomic_smoke.cpp"

SYCL/ESIMD/lsc/atomic_smoke.cpp

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
// This test checks LSC atomic operations.
99
//===----------------------------------------------------------------------===//
1010
// REQUIRES: gpu-intel-pvc
11-
// UNSUPPORTED: cuda || hip
12-
// RUN: %clangxx -fsycl -DUSE_LSC_ATOMICS %s -o %t.out
11+
// TODO: esimd_emulator fails due to unsupported __esimd_svm_atomic0/1/2
12+
// XFAIL: esimd_emulator
13+
// RUN: %clangxx -fsycl %s -o %t.out
1314
// RUN: %GPU_RUN_PLACEHOLDER %t.out
1415

1516
#include "../esimd_test_utils.hpp"
@@ -18,10 +19,6 @@
1819
#include <iostream>
1920
#include <sycl/ext/intel/esimd.hpp>
2021

21-
#ifdef UNDEF_USE_LSC_ATOMICS
22-
#undef USE_LSC_ATOMICS
23-
#endif
24-
2522
using namespace sycl;
2623
using namespace sycl::ext::intel::esimd;
2724
using namespace sycl::ext::intel::experimental::esimd;
@@ -39,7 +36,7 @@ struct Config {
3936
#define PREFER_FULL_BARRIER 0
4037
#endif // PREFER_FULL_BARRIER
4138

42-
#if PREFER_FULL_BARRIER && defined(USE_LSC_ATOMICS)
39+
#if PREFER_FULL_BARRIER && !defined(USE_DWORD_ATOMICS)
4340
#define USE_FULL_BARRIER 1
4441
#else
4542
#define USE_FULL_BARRIER 0
@@ -59,20 +56,20 @@ using LSCAtomicOp = sycl::ext::intel::esimd::native::lsc::atomic_op;
5956
using DWORDAtomicOp = sycl::ext::intel::esimd::atomic_op;
6057

6158
// This macro selects between DWORD ("legacy") and LSC-based atomics.
62-
#ifdef USE_LSC_ATOMICS
63-
using AtomicOp = LSCAtomicOp;
64-
constexpr char MODE[] = "LSC";
65-
#else
59+
#ifdef USE_DWORD_ATOMICS
6660
using AtomicOp = DWORDAtomicOp;
6761
constexpr char MODE[] = "DWORD";
68-
#endif // USE_LSC_ATOMICS
62+
#else
63+
using AtomicOp = LSCAtomicOp;
64+
constexpr char MODE[] = "LSC";
65+
#endif // USE_DWORD_ATOMICS
6966

70-
#ifdef USE_LSC_ATOMICS
67+
#ifndef USE_DWORD_ATOMICS
7168
uint32_t atomic_load(uint32_t *addr) {
7269
auto v = atomic_update<LSCAtomicOp::load, uint32_t, 1>(addr, 0, 1);
7370
return v[0];
7471
}
75-
#endif // USE_LSC_ATOMICS
72+
#endif // USE_DWORD_ATOMICS
7673

7774
template <class, int, template <class, int> class> class TestID;
7875

@@ -415,7 +412,7 @@ struct ImplSMax : ImplMax<T, N, DWORDAtomicOp, DWORDAtomicOp::smax> {};
415412
template <class T, int N>
416413
struct ImplUMax : ImplMax<T, N, DWORDAtomicOp, DWORDAtomicOp::umax> {};
417414

418-
#ifdef USE_LSC_ATOMICS
415+
#ifndef USE_DWORD_ATOMICS
419416
// These will be redirected by API implementation to LSC ones:
420417
template <class T, int N>
421418
struct ImplFadd : ImplAdd<T, N, DWORDAtomicOp, DWORDAtomicOp::fadd> {};
@@ -434,7 +431,7 @@ template <class T, int N>
434431
struct ImplLSCFmin : ImplMin<T, N, LSCAtomicOp, LSCAtomicOp::fmin> {};
435432
template <class T, int N>
436433
struct ImplLSCFmax : ImplMax<T, N, LSCAtomicOp, LSCAtomicOp::fmax> {};
437-
#endif // USE_LSC_ATOMICS
434+
#endif // USE_DWORD_ATOMICS
438435

439436
template <class T, int N, class C, C Op> struct ImplCmpxchgBase {
440437
static constexpr C atomic_op = Op;
@@ -461,7 +458,7 @@ template <class T, int N>
461458
struct ImplCmpxchg
462459
: ImplCmpxchgBase<T, N, DWORDAtomicOp, DWORDAtomicOp::cmpxchg> {};
463460

464-
#ifdef USE_LSC_ATOMICS
461+
#ifndef USE_DWORD_ATOMICS
465462
// This will be redirected by API implementation to LSC one:
466463
template <class T, int N>
467464
struct ImplFcmpwr
@@ -470,7 +467,7 @@ struct ImplFcmpwr
470467
template <class T, int N>
471468
struct ImplLSCFcmpwr
472469
: ImplCmpxchgBase<T, N, LSCAtomicOp, LSCAtomicOp::fcmpxchg> {};
473-
#endif // USE_LSC_ATOMICS
470+
#endif // USE_DWORD_ATOMICS
474471

475472
// ----------------- Main function and test combinations.
476473

@@ -500,6 +497,7 @@ int main(void) {
500497
};
501498

502499
bool passed = true;
500+
#ifndef CMPXCHG_TEST
503501
// Template params:
504502
// - element type, simd size, threads per group, num groups, atomic op,
505503
// verification function, argument generation functions...
@@ -525,7 +523,7 @@ int main(void) {
525523

526524
// TODO: add other operations
527525

528-
#ifdef USE_LSC_ATOMICS
526+
#ifndef USE_DWORD_ATOMICS
529527
passed &= test<float, 8, ImplFadd>(q, cfg);
530528
passed &= test<float, 8, ImplFsub>(q, cfg);
531529
passed &= test<float, 16, ImplFadd>(q, cfg);
@@ -545,15 +543,19 @@ int main(void) {
545543
passed &= test<float, 16, ImplLSCFmin>(q, cfg);
546544
passed &= test<float, 16, ImplLSCFmax>(q, cfg);
547545
passed &= test<float, 32, ImplLSCFmin>(q, cfg);
548-
#endif // USE_LSC_ATOMICS
549-
546+
#endif // USE_DWORD_ATOMICS
547+
#else // CMPXCHG_TEST
550548
// Can't easily reset input to initial state, so just 1 iteration for CAS.
551549
cfg.repeat = 1;
550+
// Decrease number of threads to reduce risk of halting kernel by the driver.
551+
cfg.n_groups = 7;
552+
cfg.threads_per_group = 3;
552553
passed &= test_int_types<8, ImplCmpxchg>(q, cfg);
553-
#ifdef USE_LSC_ATOMICS
554+
#ifndef USE_DWORD_ATOMICS
554555
passed &= test<float, 8, ImplFcmpwr>(q, cfg);
555556
passed &= test<float, 8, ImplLSCFcmpwr>(q, cfg);
556-
#endif // USE_LSC_ATOMICS
557+
#endif // USE_DWORD_ATOMICS
558+
#endif // CMPXCHG_TEST
557559
// TODO: check double other vector lengths in LSC mode.
558560

559561
std::cout << (passed ? "Passed\n" : "FAILED\n");

0 commit comments

Comments
 (0)