Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit a5f90c0

Browse files
authored
[SYCL] Speed up atomic_ref tests (#879)
1 parent e8fa63d commit a5f90c0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+250
-7889
lines changed

SYCL/AtomicRef/add.cpp

Lines changed: 1 addition & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -1,150 +1,7 @@
1-
// Each combination of 64/32 bit atomic, relaxed/acquire/release/acq_rel
2-
// semantic order and sub_group/work_group/device/system scope is tested
3-
// separately. This is controlled by macros, defined by RUN commands. Defaults
4-
// (no macro for a group) are: 32 bit, relaxed and device.
5-
61
// See https://github.com/intel/llvm-test-suite/issues/867 for detailed status
72
// UNSUPPORTED: hip
83

9-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
10-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
11-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
13-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
14-
15-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DATOMIC64
16-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
17-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
18-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
19-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
20-
21-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP
22-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
23-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
24-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
25-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
26-
27-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP -DATOMIC64
28-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
29-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
30-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
31-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
32-
33-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM
34-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
35-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
36-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
37-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
38-
39-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM -DATOMIC64
40-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
41-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
42-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
43-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
44-
45-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE
46-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
47-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
48-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
49-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
50-
51-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DATOMIC64
52-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
53-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
54-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
55-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
56-
57-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP
58-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
59-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
60-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
61-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
62-
63-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP -DATOMIC64
64-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
65-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
66-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
67-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
68-
69-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM
70-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
71-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
72-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
73-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
74-
75-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM -DATOMIC64
76-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
77-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
78-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
79-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
80-
81-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE
82-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
83-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
84-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
85-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
86-
87-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DATOMIC64
88-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
89-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
90-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
91-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
92-
93-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP
94-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
95-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
96-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
97-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
98-
99-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP -DATOMIC64
100-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
101-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
102-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
103-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
104-
105-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM
106-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
107-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
108-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
109-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
110-
111-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM -DATOMIC64
112-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
113-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
114-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
115-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
116-
117-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL
118-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
119-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
120-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
121-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
122-
123-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DATOMIC64
124-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
125-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
126-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
127-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
128-
129-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP
130-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
131-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
132-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
133-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
134-
135-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP -DATOMIC64
136-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
137-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
138-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
139-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
140-
141-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM
142-
// RUN: %HOST_RUN_PLACEHOLDER %t.out
143-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
144-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
145-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
146-
147-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM -DATOMIC64
4+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
1485
// RUN: %HOST_RUN_PLACEHOLDER %t.out
1496
// RUN: %GPU_RUN_PLACEHOLDER %t.out
1507
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/AtomicRef/add.h

Lines changed: 20 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,18 @@ void add_test(queue q, size_t N) {
243243
(space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
244244
constexpr bool do_ext_tests = space != access::address_space::generic_space;
245245
if constexpr (do_local_tests) {
246+
#ifdef RUN_DEPRECATED
246247
if constexpr (do_ext_tests) {
247248
add_fetch_local_test<::sycl::ext::oneapi::atomic_ref, space, T,
248249
Difference, order, scope>(q, N);
249250
}
251+
#else
250252
add_fetch_local_test<::sycl::atomic_ref, space, T, Difference, order,
251253
scope>(q, N);
254+
#endif
252255
}
253256
if constexpr (do_global_tests) {
257+
#ifdef RUN_DEPRECATED
254258
if constexpr (do_ext_tests) {
255259
add_fetch_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
256260
order, scope>(q, N);
@@ -263,6 +267,7 @@ void add_test(queue q, size_t N) {
263267
order, scope>(q, N);
264268
}
265269
}
270+
#else
266271
add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
267272
N);
268273
add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
@@ -273,6 +278,7 @@ void add_test(queue q, size_t N) {
273278
add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
274279
q, N);
275280
}
281+
#endif
276282
}
277283
}
278284

@@ -281,74 +287,46 @@ template <access::address_space space, typename T, typename Difference = T,
281287
void add_test_scopes(queue q, size_t N) {
282288
std::vector<memory_scope> scopes =
283289
q.get_device().get_info<info::device::atomic_memory_scope_capabilities>();
284-
#if defined(SYSTEM)
285-
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
290+
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) !=
286291
scopes.end()) {
287-
std::cout << "Skipping test\n";
288-
return;
292+
add_test<space, T, Difference, order, memory_scope::system>(q, N);
289293
}
290-
add_test<space, T, Difference, order, memory_scope::system>(q, N);
291-
#elif defined(WORK_GROUP)
292-
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
294+
if (std::find(scopes.begin(), scopes.end(), memory_scope::work_group) !=
293295
scopes.end()) {
294-
std::cout << "Skipping test\n";
295-
return;
296+
add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
296297
}
297-
add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
298-
#elif defined(SUB_GROUP)
299-
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
298+
if (std::find(scopes.begin(), scopes.end(), memory_scope::sub_group) !=
300299
scopes.end()) {
301-
std::cout << "Skipping test\n";
302-
return;
300+
add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
303301
}
304-
add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
305-
#else
306302
add_test<space, T, Difference, order, memory_scope::device>(q, N);
307-
#endif
308303
}
309304

310305
template <access::address_space space, typename T, typename Difference = T>
311306
void add_test_orders_scopes(queue q, size_t N) {
312307
std::vector<memory_order> orders =
313308
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
314-
#if defined(ACQ_REL)
315-
if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) ==
309+
if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) !=
316310
orders.end()) {
317-
std::cout << "Skipping test\n";
318-
return;
311+
add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
319312
}
320-
add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
321-
#elif defined(ACQUIRE)
322-
if (std::find(orders.begin(), orders.end(), memory_order::acquire) ==
313+
if (std::find(orders.begin(), orders.end(), memory_order::acquire) !=
323314
orders.end()) {
324-
std::cout << "Skipping test\n";
325-
return;
315+
add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
326316
}
327-
add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
328-
#elif defined(RELEASE)
329-
if (std::find(orders.begin(), orders.end(), memory_order::release) ==
317+
if (std::find(orders.begin(), orders.end(), memory_order::release) !=
330318
orders.end()) {
331-
std::cout << "Skipping test\n";
332-
return;
319+
add_test_scopes<space, T, Difference, memory_order::release>(q, N);
333320
}
334-
add_test_scopes<space, T, Difference, memory_order::release>(q, N);
335-
#else
336321
add_test_scopes<space, T, Difference, memory_order::relaxed>(q, N);
337-
#endif
338322
}
339323

340324
template <access::address_space space> void add_test_all() {
341325
queue q;
342326

343327
constexpr int N = 32;
344-
#ifdef ATOMIC64
345-
if (!q.get_device().has(aspect::atomic64)) {
346-
std::cout << "Skipping test\n";
347-
return;
348-
}
349-
328+
#ifdef FULL_ATOMIC64_COVERAGE
350329
add_test_orders_scopes<space, double>(q, N);
351-
#ifndef FP_TESTS_ONLY
352330
if constexpr (sizeof(long) == 8) {
353331
add_test_orders_scopes<space, long>(q, N);
354332
add_test_orders_scopes<space, unsigned long>(q, N);
@@ -361,9 +339,8 @@ template <access::address_space space> void add_test_all() {
361339
add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
362340
}
363341
#endif
364-
#else
365342
add_test_orders_scopes<space, float>(q, N);
366-
#ifndef FP_TESTS_ONLY
343+
#ifdef FULL_ATOMIC32_COVERAGE
367344
add_test_orders_scopes<space, int>(q, N);
368345
add_test_orders_scopes<space, unsigned int>(q, N);
369346
if constexpr (sizeof(long) == 4) {
@@ -374,7 +351,5 @@ template <access::address_space space> void add_test_all() {
374351
add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
375352
}
376353
#endif
377-
#endif
378-
379354
std::cout << "Test passed." << std::endl;
380355
}

0 commit comments

Comments
 (0)