Skip to content

Commit 05a784f

Browse files
t4c1bb-sycl
authored andcommitted
[SYCL] Add tests for atomics with various memory orders and scopes (intel#534)
Added tests for atomics with various memory orders and scopes. Reductions tests also have updated sm requirements, as they call work group atomics, which are now implemented and have higher sm requirements than device scoped ones. This adds tests for changes introduced in intel/llvm#4820 and intel/llvm#5192.
1 parent 92a40ff commit 05a784f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+8530
-256
lines changed

SYCL/AtomicRef/add.cpp

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,154 @@
1+
<<<<<<< HEAD
12
// See https://github.com/intel/llvm-test-suite/issues/867 for detailed status
23
// UNSUPPORTED: hip
34

45
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
6+
=======
7+
// Each combination of 64/32 bit atomic, relaxed/acquire/release/acq_rel
8+
// semantic order and sub_group/work_group/device/system scope is tested
9+
// separately. This is controlled by macros, defined by RUN commands. Defaults
10+
// (no macro for a group) are: 32 bit, relaxed and device.
11+
12+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
13+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
14+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
15+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
16+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
17+
18+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DATOMIC64
19+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
20+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
21+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
22+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
23+
24+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP
25+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
26+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
27+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
28+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
29+
30+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP -DATOMIC64
31+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
32+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
33+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
34+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
35+
36+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM
37+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
38+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
39+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
40+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
41+
42+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM -DATOMIC64
43+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
44+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
45+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
46+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
47+
48+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE
49+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
50+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
51+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
52+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
53+
54+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DATOMIC64
55+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
56+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
57+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
58+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
59+
60+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP
61+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
62+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
63+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
64+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
65+
66+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP -DATOMIC64
67+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
68+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
69+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
70+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
71+
72+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM
73+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
74+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
75+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
76+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
77+
78+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM -DATOMIC64
79+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
80+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
81+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
82+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
83+
84+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE
85+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
86+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
87+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
88+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
89+
90+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DATOMIC64
91+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
92+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
93+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
94+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
95+
96+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP
97+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
98+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
99+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
100+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
101+
102+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP -DATOMIC64
103+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
104+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
105+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
106+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
107+
108+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM
109+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
110+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
111+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
112+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
113+
114+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM -DATOMIC64
115+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
116+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
117+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
118+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
119+
120+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL
121+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
122+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
123+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
124+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
125+
126+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DATOMIC64
127+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
128+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
129+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
130+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
131+
132+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP
133+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
134+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
135+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
136+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
137+
138+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP -DATOMIC64
139+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
140+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
141+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
142+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
143+
144+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM
145+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
146+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
147+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
148+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
149+
150+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM -DATOMIC64
151+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
5152
// RUN: %HOST_RUN_PLACEHOLDER %t.out
6153
// RUN: %GPU_RUN_PLACEHOLDER %t.out
7154
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/AtomicRef/add.h

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,18 +243,28 @@ void add_test(queue q, size_t N) {
243243
(space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
244244
constexpr bool do_ext_tests = space != access::address_space::generic_space;
245245
if constexpr (do_local_tests) {
246+
<<<<<<< HEAD
246247
#ifdef RUN_DEPRECATED
248+
=======
249+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
247250
if constexpr (do_ext_tests) {
248251
add_fetch_local_test<::sycl::ext::oneapi::atomic_ref, space, T,
249252
Difference, order, scope>(q, N);
250253
}
254+
<<<<<<< HEAD
251255
#else
252256
add_fetch_local_test<::sycl::atomic_ref, space, T, Difference, order,
253257
scope>(q, N);
254258
#endif
255259
}
256260
if constexpr (do_global_tests) {
257261
#ifdef RUN_DEPRECATED
262+
=======
263+
add_fetch_local_test<::sycl::atomic_ref, space, T, Difference, order,
264+
scope>(q, N);
265+
}
266+
if constexpr (do_global_tests) {
267+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
258268
if constexpr (do_ext_tests) {
259269
add_fetch_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
260270
order, scope>(q, N);
@@ -267,7 +277,10 @@ void add_test(queue q, size_t N) {
267277
order, scope>(q, N);
268278
}
269279
}
280+
<<<<<<< HEAD
270281
#else
282+
=======
283+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
271284
add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
272285
N);
273286
add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
@@ -278,7 +291,10 @@ void add_test(queue q, size_t N) {
278291
add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
279292
q, N);
280293
}
294+
<<<<<<< HEAD
281295
#endif
296+
=======
297+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
282298
}
283299
}
284300

@@ -287,6 +303,7 @@ template <access::address_space space, typename T, typename Difference = T,
287303
void add_test_scopes(queue q, size_t N) {
288304
std::vector<memory_scope> scopes =
289305
q.get_device().get_info<info::device::atomic_memory_scope_capabilities>();
306+
<<<<<<< HEAD
290307
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) !=
291308
scopes.end()) {
292309
add_test<space, T, Difference, order, memory_scope::system>(q, N);
@@ -300,12 +317,39 @@ void add_test_scopes(queue q, size_t N) {
300317
add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
301318
}
302319
add_test<space, T, Difference, order, memory_scope::device>(q, N);
320+
=======
321+
#if defined(SYSTEM)
322+
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
323+
scopes.end()) {
324+
std::cout << "Skipping test\n";
325+
return;
326+
}
327+
add_test<space, T, Difference, order, memory_scope::system>(q, N);
328+
#elif defined(WORK_GROUP)
329+
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
330+
scopes.end()) {
331+
std::cout << "Skipping test\n";
332+
return;
333+
}
334+
add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
335+
#elif defined(SUB_GROUP)
336+
if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
337+
scopes.end()) {
338+
std::cout << "Skipping test\n";
339+
return;
340+
}
341+
add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
342+
#else
343+
add_test<space, T, Difference, order, memory_scope::device>(q, N);
344+
#endif
345+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
303346
}
304347

305348
template <access::address_space space, typename T, typename Difference = T>
306349
void add_test_orders_scopes(queue q, size_t N) {
307350
std::vector<memory_order> orders =
308351
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
352+
<<<<<<< HEAD
309353
if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) !=
310354
orders.end()) {
311355
add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
@@ -319,14 +363,51 @@ void add_test_orders_scopes(queue q, size_t N) {
319363
add_test_scopes<space, T, Difference, memory_order::release>(q, N);
320364
}
321365
add_test_scopes<space, T, Difference, memory_order::relaxed>(q, N);
366+
=======
367+
#if defined(ACQ_REL)
368+
if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) ==
369+
orders.end()) {
370+
std::cout << "Skipping test\n";
371+
return;
372+
}
373+
add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
374+
#elif defined(ACQUIRE)
375+
if (std::find(orders.begin(), orders.end(), memory_order::acquire) ==
376+
orders.end()) {
377+
std::cout << "Skipping test\n";
378+
return;
379+
}
380+
add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
381+
#elif defined(RELEASE)
382+
if (std::find(orders.begin(), orders.end(), memory_order::release) ==
383+
orders.end()) {
384+
std::cout << "Skipping test\n";
385+
return;
386+
}
387+
add_test_scopes<space, T, Difference, memory_order::release>(q, N);
388+
#else
389+
add_test_scopes<space, T, Difference, memory_order::relaxed>(q, N);
390+
#endif
391+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
322392
}
323393

324394
template <access::address_space space> void add_test_all() {
325395
queue q;
326396

327397
constexpr int N = 32;
398+
<<<<<<< HEAD
328399
#ifdef FULL_ATOMIC64_COVERAGE
329400
add_test_orders_scopes<space, double>(q, N);
401+
=======
402+
#ifdef ATOMIC64
403+
if (!q.get_device().has(aspect::atomic64)) {
404+
std::cout << "Skipping test\n";
405+
return;
406+
}
407+
408+
add_test_orders_scopes<space, double>(q, N);
409+
#ifndef FP_TESTS_ONLY
410+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
330411
if constexpr (sizeof(long) == 8) {
331412
add_test_orders_scopes<space, long>(q, N);
332413
add_test_orders_scopes<space, unsigned long>(q, N);
@@ -339,8 +420,14 @@ template <access::address_space space> void add_test_all() {
339420
add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
340421
}
341422
#endif
423+
<<<<<<< HEAD
342424
add_test_orders_scopes<space, float>(q, N);
343425
#ifdef FULL_ATOMIC32_COVERAGE
426+
=======
427+
#else
428+
add_test_orders_scopes<space, float>(q, N);
429+
#ifndef FP_TESTS_ONLY
430+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
344431
add_test_orders_scopes<space, int>(q, N);
345432
add_test_orders_scopes<space, unsigned int>(q, N);
346433
if constexpr (sizeof(long) == 4) {
@@ -351,5 +438,10 @@ template <access::address_space space> void add_test_all() {
351438
add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
352439
}
353440
#endif
441+
<<<<<<< HEAD
442+
=======
443+
#endif
444+
445+
>>>>>>> 88ee9d1a0 ([SYCL] Add tests for atomics with various memory orders and scopes (#534))
354446
std::cout << "Test passed." << std::endl;
355447
}

SYCL/AtomicRef/add_atomic64.cpp

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)