Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 82a7eec

Browse files
steffenlarsenSteffen Larsen
andauthored
[SYCL][CUDA] Adds tests for atomic memory ordering (#363)
* [SYCL][CUDA] Adds tests for atomic memory ordering Signed-off-by: Steffen Larsen <[email protected]> * Fix formatting Signed-off-by: Steffen Larsen <[email protected]> Co-authored-by: Steffen Larsen <[email protected]>
1 parent b36a1f9 commit 82a7eec

File tree

10 files changed

+177
-42
lines changed

10 files changed

+177
-42
lines changed

SYCL/AtomicRef/accessor.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,21 @@ template <typename T> void accessor_test(queue q, size_t N) {
2626
static_assert(
2727
std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
2828
device_scope)),
29-
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
30-
ONEAPI::memory_scope::device>>::value,
29+
atomic_accessor<T, 1, memory_order::relaxed,
30+
memory_scope::device>>::value,
3131
"atomic_accessor type incorrectly deduced");
3232
#endif
33-
auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
34-
ONEAPI::memory_scope::device>(sum_buf, cgh);
33+
auto sum =
34+
atomic_accessor<T, 1, memory_order::relaxed, memory_scope::device>(
35+
sum_buf, cgh);
3536
auto out =
3637
output_buf.template get_access<access::mode::discard_write>(cgh);
3738
cgh.parallel_for(range<1>(N), [=](item<1> it) {
3839
int gid = it.get_id(0);
3940
static_assert(
4041
std::is_same<
4142
decltype(sum[0]),
42-
atomic_ref<T, ONEAPI::memory_order::relaxed,
43-
ONEAPI::memory_scope::device,
43+
atomic_ref<T, memory_order::relaxed, memory_scope::device,
4444
access::address_space::global_space>>::value,
4545
"atomic_accessor returns incorrect atomic_ref");
4646
out[gid] = sum[0].fetch_add(T(1));
@@ -70,19 +70,18 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
7070
buffer<T> output_buf(output.data(), output.size());
7171
q.submit([&](handler &cgh) {
7272
auto sum =
73-
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
74-
ONEAPI::memory_scope::device, access::target::local>(
75-
1, cgh);
73+
atomic_accessor<T, 1, memory_order::relaxed, memory_scope::device,
74+
access::target::local>(1, cgh);
7675
auto out = output_buf.template get_access<access::mode::read_write>(cgh);
7776
cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
7877
int grp = it.get_group(0);
7978
sum[0].store(0);
8079
it.barrier();
8180
static_assert(
82-
std::is_same<decltype(sum[0]),
83-
atomic_ref<T, ONEAPI::memory_order::relaxed,
84-
ONEAPI::memory_scope::device,
85-
access::address_space::local_space>>::value,
81+
std::is_same<
82+
decltype(sum[0]),
83+
atomic_ref<T, memory_order::relaxed, memory_scope::device,
84+
access::address_space::local_space>>::value,
8685
"local atomic_accessor returns incorrect atomic_ref");
8786
T result = sum[0].fetch_add(T(1));
8887
if (result == it.get_local_range(0) - 1) {

SYCL/AtomicRef/add.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ void add_fetch_test(queue q, size_t N) {
2828
output_buf.template get_access<access::mode::discard_write>(cgh);
2929
cgh.parallel_for(range<1>(N), [=](item<1> it) {
3030
int gid = it.get_id(0);
31-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
32-
ONEAPI::memory_scope::device,
31+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3332
access::address_space::global_space>(sum[0]);
3433
out[gid] = atm.fetch_add(Difference(1));
3534
});
@@ -64,8 +63,7 @@ void add_plus_equal_test(queue q, size_t N) {
6463
output_buf.template get_access<access::mode::discard_write>(cgh);
6564
cgh.parallel_for(range<1>(N), [=](item<1> it) {
6665
int gid = it.get_id(0);
67-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
68-
ONEAPI::memory_scope::device,
66+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
6967
access::address_space::global_space>(sum[0]);
7068
out[gid] = atm += Difference(1);
7169
});
@@ -100,8 +98,7 @@ void add_pre_inc_test(queue q, size_t N) {
10098
output_buf.template get_access<access::mode::discard_write>(cgh);
10199
cgh.parallel_for(range<1>(N), [=](item<1> it) {
102100
int gid = it.get_id(0);
103-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
104-
ONEAPI::memory_scope::device,
101+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
105102
access::address_space::global_space>(sum[0]);
106103
out[gid] = ++atm;
107104
});
@@ -136,8 +133,7 @@ void add_post_inc_test(queue q, size_t N) {
136133
output_buf.template get_access<access::mode::discard_write>(cgh);
137134
cgh.parallel_for(range<1>(N), [=](item<1> it) {
138135
int gid = it.get_id(0);
139-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
140-
ONEAPI::memory_scope::device,
136+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
141137
access::address_space::global_space>(sum[0]);
142138
out[gid] = atm++;
143139
});
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// L0, OpenCL, and ROCm backends don't currently support
6+
// info::device::atomic_memory_order_capabilities and aspect::atomic64
7+
// XFAIL: level_zero || opencl || rocm
8+
9+
// NOTE: Tests load and store for supported memory orderings.
10+
11+
#include <CL/sycl.hpp>
12+
#include <algorithm>
13+
#include <cassert>
14+
#include <numeric>
15+
#include <vector>
16+
using namespace sycl;
17+
using namespace sycl::ONEAPI;
18+
19+
template <typename T, memory_order MO> class memory_order_kernel;
20+
21+
template <typename T> void acq_rel_test(queue q, size_t N) {
22+
T a = 0;
23+
{
24+
buffer<T> a_buf(&a, 1);
25+
26+
q.submit([&](handler &cgh) {
27+
auto a_acc = a_buf.template get_access<access::mode::read_write>(cgh);
28+
cgh.parallel_for<memory_order_kernel<T, memory_order::acq_rel>>(
29+
range<1>(N), [=](item<1> it) {
30+
int gid = it.get_id(0);
31+
auto aar =
32+
atomic_ref<T, memory_order::acq_rel, memory_scope::device,
33+
access::address_space::global_space>(a_acc[0]);
34+
auto ld = aar.load();
35+
ld += 1;
36+
aar.store(ld);
37+
});
38+
});
39+
}
40+
41+
// All work-items increment by 1, so final value should be equal to N
42+
assert(a == T(N));
43+
}
44+
45+
template <typename T> void seq_cst_test(queue q, size_t N) {
46+
T a = 0;
47+
T b = 0;
48+
{
49+
buffer<T> a_buf(&a, 1);
50+
buffer<T> b_buf(&b, 1);
51+
52+
q.submit([&](handler &cgh) {
53+
auto a_acc = a_buf.template get_access<access::mode::read_write>(cgh);
54+
auto b_acc = b_buf.template get_access<access::mode::read_write>(cgh);
55+
cgh.parallel_for<memory_order_kernel<T, memory_order::seq_cst>>(
56+
range<1>(N), [=](item<1> it) {
57+
int gid = it.get_id(0);
58+
auto aar =
59+
atomic_ref<T, memory_order::seq_cst, memory_scope::device,
60+
access::address_space::global_space>(a_acc[0]);
61+
auto bar =
62+
atomic_ref<T, memory_order::seq_cst, memory_scope::device,
63+
access::address_space::global_space>(b_acc[0]);
64+
auto ald = aar.load();
65+
auto bld = bar.load();
66+
ald += 1;
67+
bld += ald;
68+
bar.store(bld);
69+
aar.store(ald);
70+
});
71+
});
72+
}
73+
74+
// All work-items increment a by 1, so final value should be equal to N
75+
assert(a == T(N));
76+
// b is the sum of [1..N]
77+
size_t rsum = 0;
78+
for (size_t i = 1; i <= N; ++i)
79+
rsum += i;
80+
assert(b == T(rsum));
81+
}
82+
83+
bool is_supported(std::vector<memory_order> capabilities,
84+
memory_order mem_order) {
85+
return std::find(capabilities.begin(), capabilities.end(), mem_order) !=
86+
capabilities.end();
87+
}
88+
89+
int main() {
90+
queue q;
91+
92+
std::vector<memory_order> supported_memory_orders =
93+
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
94+
bool atomic64_support = q.get_device().has(aspect::atomic64);
95+
96+
constexpr int N = 32;
97+
98+
// Relaxed memory order must be supported. This ordering is used in other
99+
// tests.
100+
assert(is_supported(supported_memory_orders, memory_order::relaxed));
101+
102+
if (is_supported(supported_memory_orders, memory_order::acq_rel)) {
103+
// Acquire-release memory order must also support both acquire and release
104+
// orderings.
105+
assert(is_supported(supported_memory_orders, memory_order::acquire) &&
106+
is_supported(supported_memory_orders, memory_order::release));
107+
acq_rel_test<int>(q, N);
108+
acq_rel_test<unsigned int>(q, N);
109+
acq_rel_test<float>(q, N);
110+
if (sizeof(long) == 4) {
111+
// long is 32-bit
112+
acq_rel_test<long>(q, N);
113+
acq_rel_test<unsigned long>(q, N);
114+
}
115+
if (atomic64_support) {
116+
if (sizeof(long) == 8) {
117+
// long is 64-bit
118+
acq_rel_test<long>(q, N);
119+
acq_rel_test<unsigned long>(q, N);
120+
}
121+
acq_rel_test<long long>(q, N);
122+
acq_rel_test<unsigned long long>(q, N);
123+
acq_rel_test<double>(q, N);
124+
}
125+
}
126+
127+
if (is_supported(supported_memory_orders, memory_order::seq_cst)) {
128+
seq_cst_test<int>(q, N);
129+
seq_cst_test<unsigned int>(q, N);
130+
seq_cst_test<float>(q, N);
131+
if (sizeof(long) == 4) {
132+
// long is 32-bit
133+
seq_cst_test<long>(q, N);
134+
seq_cst_test<unsigned long>(q, N);
135+
}
136+
if (atomic64_support) {
137+
if (sizeof(long) == 8) {
138+
// long is 64-bit
139+
seq_cst_test<long>(q, N);
140+
seq_cst_test<unsigned long>(q, N);
141+
}
142+
seq_cst_test<long long>(q, N);
143+
seq_cst_test<unsigned long long>(q, N);
144+
seq_cst_test<double>(q, N);
145+
}
146+
}
147+
148+
std::cout << "Test passed." << std::endl;
149+
}

SYCL/AtomicRef/compare_exchange.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ template <typename T> void compare_exchange_test(queue q, size_t N) {
3131
cgh.parallel_for<compare_exchange_kernel<T>>(
3232
range<1>(N), [=](item<1> it) {
3333
size_t gid = it.get_id(0);
34-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
35-
ONEAPI::memory_scope::device,
36-
access::address_space::global_space>(exc[0]);
34+
auto atm =
35+
atomic_ref<T, memory_order::relaxed, memory_scope::device,
36+
access::address_space::global_space>(exc[0]);
3737
T result = T(N); // Avoid copying pointer
3838
bool success = atm.compare_exchange_strong(result, (T)gid);
3939
if (success) {

SYCL/AtomicRef/exchange.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ template <typename T> void exchange_test(queue q, size_t N) {
2929
output_buf.template get_access<access::mode::discard_write>(cgh);
3030
cgh.parallel_for<exchange_kernel<T>>(range<1>(N), [=](item<1> it) {
3131
size_t gid = it.get_id(0);
32-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
33-
ONEAPI::memory_scope::device,
32+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3433
access::address_space::global_space>(exc[0]);
3534
out[gid] = atm.exchange(T(gid));
3635
});

SYCL/AtomicRef/load.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ template <typename T> void load_test(queue q, size_t N) {
2828
output_buf.template get_access<access::mode::discard_write>(cgh);
2929
cgh.parallel_for<load_kernel<T>>(range<1>(N), [=](item<1> it) {
3030
size_t gid = it.get_id(0);
31-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
32-
ONEAPI::memory_scope::device,
31+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3332
access::address_space::global_space>(ld[0]);
3433
out[gid] = atm.load();
3534
});

SYCL/AtomicRef/max.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ template <typename T> void max_test(queue q, size_t N) {
2727
output_buf.template get_access<access::mode::discard_write>(cgh);
2828
cgh.parallel_for(range<1>(N), [=](item<1> it) {
2929
int gid = it.get_id(0);
30-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
31-
ONEAPI::memory_scope::device,
30+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3231
access::address_space::global_space>(val[0]);
3332

3433
// +1 accounts for lowest() returning 0 for unsigned types

SYCL/AtomicRef/min.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ template <typename T> void min_test(queue q, size_t N) {
2727
output_buf.template get_access<access::mode::discard_write>(cgh);
2828
cgh.parallel_for(range<1>(N), [=](item<1> it) {
2929
int gid = it.get_id(0);
30-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
31-
ONEAPI::memory_scope::device,
30+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3231
access::address_space::global_space>(val[0]);
3332
out[gid] = atm.fetch_min(T(gid));
3433
});

SYCL/AtomicRef/store.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ template <typename T> void store_test(queue q, size_t N) {
2222
auto st = store_buf.template get_access<access::mode::read_write>(cgh);
2323
cgh.parallel_for<store_kernel<T>>(range<1>(N), [=](item<1> it) {
2424
size_t gid = it.get_id(0);
25-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
26-
ONEAPI::memory_scope::device,
25+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
2726
access::address_space::global_space>(st[0]);
2827
atm.store(T(gid));
2928
});

SYCL/AtomicRef/sub.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ void sub_fetch_test(queue q, size_t N) {
2828
output_buf.template get_access<access::mode::discard_write>(cgh);
2929
cgh.parallel_for(range<1>(N), [=](item<1> it) {
3030
int gid = it.get_id(0);
31-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
32-
ONEAPI::memory_scope::device,
31+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
3332
access::address_space::global_space>(val[0]);
3433
out[gid] = atm.fetch_sub(Difference(1));
3534
});
@@ -64,8 +63,7 @@ void sub_plus_equal_test(queue q, size_t N) {
6463
output_buf.template get_access<access::mode::discard_write>(cgh);
6564
cgh.parallel_for(range<1>(N), [=](item<1> it) {
6665
int gid = it.get_id(0);
67-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
68-
ONEAPI::memory_scope::device,
66+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
6967
access::address_space::global_space>(val[0]);
7068
out[gid] = atm -= Difference(1);
7169
});
@@ -100,8 +98,7 @@ void sub_pre_dec_test(queue q, size_t N) {
10098
output_buf.template get_access<access::mode::discard_write>(cgh);
10199
cgh.parallel_for(range<1>(N), [=](item<1> it) {
102100
int gid = it.get_id(0);
103-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
104-
ONEAPI::memory_scope::device,
101+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
105102
access::address_space::global_space>(val[0]);
106103
out[gid] = --atm;
107104
});
@@ -136,8 +133,7 @@ void sub_post_dec_test(queue q, size_t N) {
136133
output_buf.template get_access<access::mode::discard_write>(cgh);
137134
cgh.parallel_for(range<1>(N), [=](item<1> it) {
138135
int gid = it.get_id(0);
139-
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
140-
ONEAPI::memory_scope::device,
136+
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
141137
access::address_space::global_space>(val[0]);
142138
out[gid] = atm--;
143139
});

0 commit comments

Comments
 (0)