Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 88ee9d1

Browse files
authored
[SYCL] Add tests for atomics with various memory orders and scopes (#534)
Added tests for atomics with various memory orders and scopes. Reductions tests also have updated sm requirements, as they call work group atomics, which are now implemented and have higher sm requirements than device scoped ones. This adds tests for changes introduced in intel/llvm#4820 and intel/llvm#5192.
1 parent 803958e commit 88ee9d1

File tree

83 files changed

+10084
-1176
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+10084
-1176
lines changed

SYCL/AtomicRef/add.cpp

Lines changed: 146 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,152 @@
1-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out \
2-
// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
1+
// Each combination of 64/32 bit atomic, relaxed/acquire/release/acq_rel
2+
// semantic order and sub_group/work_group/device/system scope is tested
3+
// separately. This is controlled by macros, defined by RUN commands. Defaults
4+
// (no macro for a group) are: 32 bit, relaxed and device.
5+
6+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
7+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
8+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
10+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
11+
12+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DATOMIC64
13+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
14+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
15+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
16+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
17+
18+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP
19+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
20+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
21+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
22+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
23+
24+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP -DATOMIC64
25+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
26+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
27+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
28+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
29+
30+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM
31+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
32+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
33+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
34+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
35+
36+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM -DATOMIC64
37+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
38+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
39+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
40+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
41+
42+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE
43+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
44+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
45+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
46+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
47+
48+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DATOMIC64
49+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
50+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
51+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
52+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
53+
54+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP
55+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
56+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
57+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
58+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
59+
60+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP -DATOMIC64
61+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
62+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
63+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
64+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
65+
66+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM
67+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
68+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
69+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
70+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
71+
72+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM -DATOMIC64
73+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
74+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
75+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
76+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
77+
78+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE
79+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
80+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
81+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
82+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
83+
84+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DATOMIC64
85+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
86+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
87+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
88+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
89+
90+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP
91+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
92+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
93+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
94+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
95+
96+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP -DATOMIC64
97+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
98+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
99+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
100+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
101+
102+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM
103+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
104+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
105+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
106+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
107+
108+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM -DATOMIC64
109+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
110+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
111+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
112+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
113+
114+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL
115+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
116+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
117+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
118+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
119+
120+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DATOMIC64
121+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
122+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
123+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
124+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
125+
126+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP
127+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
128+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
129+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
130+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
131+
132+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP -DATOMIC64
133+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
134+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
135+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
136+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
137+
138+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM
139+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
140+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
141+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
142+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
143+
144+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM -DATOMIC64
3145
// RUN: %HOST_RUN_PLACEHOLDER %t.out
4146
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5147
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6148
// RUN: %ACC_RUN_PLACEHOLDER %t.out
7149

8150
#include "add.h"
9-
#include <iostream>
10-
using namespace sycl;
11-
12-
// Floating-point types do not support pre- or post-increment
13-
template <> void add_test<float>(queue q, size_t N) {
14-
add_fetch_test<::sycl::ext::oneapi::atomic_ref,
15-
access::address_space::global_space, float>(q, N);
16-
add_fetch_test<::sycl::atomic_ref, access::address_space::global_space,
17-
float>(q, N);
18-
add_plus_equal_test<::sycl::ext::oneapi::atomic_ref,
19-
access::address_space::global_space, float>(q, N);
20-
add_plus_equal_test<::sycl::atomic_ref, access::address_space::global_space,
21-
float>(q, N);
22-
}
23-
24-
int main() {
25-
queue q;
26-
27-
constexpr int N = 32;
28-
add_test<int>(q, N);
29-
add_test<unsigned int>(q, N);
30-
add_test<float>(q, N);
31-
32-
// Include long tests if they are 32 bits wide
33-
if constexpr (sizeof(long) == 4) {
34-
add_test<long>(q, N);
35-
add_test<unsigned long>(q, N);
36-
}
37-
38-
// Include pointer tests if they are 32 bits wide
39-
if constexpr (sizeof(char *) == 4) {
40-
add_test<char *, ptrdiff_t>(q, N);
41-
}
42-
43-
std::cout << "Test passed." << std::endl;
44-
}
151+
152+
int main() { add_test_all<access::address_space::global_space>(); }

0 commit comments

Comments
 (0)