Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit a081db5

Browse files
authored
[SYCL] add extra tests (#37)
- add tests from intel/llvm; - make SYCL/ESIMD/mandelbrot tests generate data files in build directory and remove data file from GIT; - rename regression directory to match llvm-test-suite naming (Regression); - fix clang-format issues.
1 parent 5274b13 commit a081db5

File tree

82 files changed

+3901
-230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+3901
-230
lines changed

SYCL/AtomicRef/accessor.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
#include <CL/sycl.hpp>
8+
#include <algorithm>
9+
#include <cassert>
10+
#include <iostream>
11+
#include <numeric>
12+
#include <vector>
13+
using namespace sycl;
14+
using namespace sycl::ONEAPI;
15+
16+
// Equivalent to add_test from add.cpp
17+
// Uses atomic_accessor instead of atomic_ref
18+
template <typename T> void accessor_test(queue q, size_t N) {
19+
T sum = 0;
20+
std::vector<T> output(N, 0);
21+
{
22+
buffer<T> sum_buf(&sum, 1);
23+
buffer<T> output_buf(output.data(), output.size());
24+
25+
q.submit([&](handler &cgh) {
26+
#if __cplusplus > 201402L
27+
static_assert(
28+
std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
29+
device_scope)),
30+
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
31+
ONEAPI::memory_scope::device>>::value,
32+
"atomic_accessor type incorrectly deduced");
33+
#endif
34+
auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
35+
ONEAPI::memory_scope::device>(sum_buf, cgh);
36+
auto out =
37+
output_buf.template get_access<access::mode::discard_write>(cgh);
38+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
39+
int gid = it.get_id(0);
40+
static_assert(
41+
std::is_same<
42+
decltype(sum[0]),
43+
atomic_ref<T, ONEAPI::memory_order::relaxed,
44+
ONEAPI::memory_scope::device,
45+
access::address_space::global_space>>::value,
46+
"atomic_accessor returns incorrect atomic_ref");
47+
out[gid] = sum[0].fetch_add(T(1));
48+
});
49+
});
50+
}
51+
52+
// All work-items increment by 1, so final value should be equal to N
53+
assert(sum == N);
54+
55+
// Intermediate values should be unique
56+
std::sort(output.begin(), output.end());
57+
assert(std::unique(output.begin(), output.end()) == output.end());
58+
59+
// Fetch returns original value: will be in [0, N-1]
60+
auto min_e = output[0];
61+
auto max_e = output[output.size() - 1];
62+
assert(min_e == 0 && max_e == N - 1);
63+
}
64+
65+
// Simplified form of accessor_test for local memory
66+
template <typename T>
67+
void local_accessor_test(queue q, size_t N, size_t L = 8) {
68+
assert(N % L == 0);
69+
std::vector<T> output(N / L, 0);
70+
{
71+
buffer<T> output_buf(output.data(), output.size());
72+
q.submit([&](handler &cgh) {
73+
auto sum =
74+
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
75+
ONEAPI::memory_scope::device, access::target::local>(
76+
1, cgh);
77+
auto out = output_buf.template get_access<access::mode::read_write>(cgh);
78+
cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
79+
int grp = it.get_group(0);
80+
sum[0].store(0);
81+
it.barrier();
82+
static_assert(
83+
std::is_same<decltype(sum[0]),
84+
atomic_ref<T, ONEAPI::memory_order::relaxed,
85+
ONEAPI::memory_scope::device,
86+
access::address_space::local_space>>::value,
87+
"local atomic_accessor returns incorrect atomic_ref");
88+
T result = sum[0].fetch_add(T(1));
89+
if (result == it.get_local_range(0) - 1) {
90+
out[grp] = result;
91+
}
92+
});
93+
});
94+
}
95+
96+
// All work-items increment by 1, and last in the group writes out old value
97+
// All values should be L-1
98+
assert(std::all_of(output.begin(), output.end(),
99+
[=](T x) { return x == L - 1; }));
100+
}
101+
102+
int main() {
103+
queue q;
104+
constexpr int N = 32;
105+
accessor_test<int>(q, N);
106+
// TODO: Enable local accessor test for host when barrier is supported
107+
if (!q.get_device().is_host()) {
108+
local_accessor_test<int>(q, N);
109+
}
110+
std::cout << "Test passed." << std::endl;
111+
}

SYCL/AtomicRef/add.cpp

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
#include <CL/sycl.hpp>
8+
#include <algorithm>
9+
#include <cassert>
10+
#include <iostream>
11+
#include <numeric>
12+
#include <vector>
13+
using namespace sycl;
14+
using namespace sycl::ONEAPI;
15+
16+
template <typename T, typename Difference = T>
17+
void add_fetch_test(queue q, size_t N) {
18+
T sum = 0;
19+
std::vector<T> output(N);
20+
std::fill(output.begin(), output.end(), T(0));
21+
{
22+
buffer<T> sum_buf(&sum, 1);
23+
buffer<T> output_buf(output.data(), output.size());
24+
25+
q.submit([&](handler &cgh) {
26+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
27+
auto out =
28+
output_buf.template get_access<access::mode::discard_write>(cgh);
29+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
30+
int gid = it.get_id(0);
31+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
32+
ONEAPI::memory_scope::device,
33+
access::address_space::global_space>(sum[0]);
34+
out[gid] = atm.fetch_add(Difference(1));
35+
});
36+
});
37+
}
38+
39+
// All work-items increment by 1, so final value should be equal to N
40+
assert(sum == T(N));
41+
42+
// Fetch returns original value: will be in [0, N-1]
43+
auto min_e = std::min_element(output.begin(), output.end());
44+
auto max_e = std::max_element(output.begin(), output.end());
45+
assert(*min_e == T(0) && *max_e == T(N - 1));
46+
47+
// Intermediate values should be unique
48+
std::sort(output.begin(), output.end());
49+
assert(std::unique(output.begin(), output.end()) == output.end());
50+
}
51+
52+
template <typename T, typename Difference = T>
53+
void add_plus_equal_test(queue q, size_t N) {
54+
T sum = 0;
55+
std::vector<T> output(N);
56+
std::fill(output.begin(), output.end(), T(0));
57+
{
58+
buffer<T> sum_buf(&sum, 1);
59+
buffer<T> output_buf(output.data(), output.size());
60+
61+
q.submit([&](handler &cgh) {
62+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
63+
auto out =
64+
output_buf.template get_access<access::mode::discard_write>(cgh);
65+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
66+
int gid = it.get_id(0);
67+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
68+
ONEAPI::memory_scope::device,
69+
access::address_space::global_space>(sum[0]);
70+
out[gid] = atm += Difference(1);
71+
});
72+
});
73+
}
74+
75+
// All work-items increment by 1, so final value should be equal to N
76+
assert(sum == T(N));
77+
78+
// += returns updated value: will be in [1, N]
79+
auto min_e = std::min_element(output.begin(), output.end());
80+
auto max_e = std::max_element(output.begin(), output.end());
81+
assert(*min_e == T(1) && *max_e == T(N));
82+
83+
// Intermediate values should be unique
84+
std::sort(output.begin(), output.end());
85+
assert(std::unique(output.begin(), output.end()) == output.end());
86+
}
87+
88+
template <typename T, typename Difference = T>
89+
void add_pre_inc_test(queue q, size_t N) {
90+
T sum = 0;
91+
std::vector<T> output(N);
92+
std::fill(output.begin(), output.end(), T(0));
93+
{
94+
buffer<T> sum_buf(&sum, 1);
95+
buffer<T> output_buf(output.data(), output.size());
96+
97+
q.submit([&](handler &cgh) {
98+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
99+
auto out =
100+
output_buf.template get_access<access::mode::discard_write>(cgh);
101+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
102+
int gid = it.get_id(0);
103+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
104+
ONEAPI::memory_scope::device,
105+
access::address_space::global_space>(sum[0]);
106+
out[gid] = ++atm;
107+
});
108+
});
109+
}
110+
111+
// All work-items increment by 1, so final value should be equal to N
112+
assert(sum == T(N));
113+
114+
// Pre-increment returns updated value: will be in [1, N]
115+
auto min_e = std::min_element(output.begin(), output.end());
116+
auto max_e = std::max_element(output.begin(), output.end());
117+
assert(*min_e == T(1) && *max_e == T(N));
118+
119+
// Intermediate values should be unique
120+
std::sort(output.begin(), output.end());
121+
assert(std::unique(output.begin(), output.end()) == output.end());
122+
}
123+
124+
template <typename T, typename Difference = T>
125+
void add_post_inc_test(queue q, size_t N) {
126+
T sum = 0;
127+
std::vector<T> output(N);
128+
std::fill(output.begin(), output.end(), T(0));
129+
{
130+
buffer<T> sum_buf(&sum, 1);
131+
buffer<T> output_buf(output.data(), output.size());
132+
133+
q.submit([&](handler &cgh) {
134+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
135+
auto out =
136+
output_buf.template get_access<access::mode::discard_write>(cgh);
137+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
138+
int gid = it.get_id(0);
139+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
140+
ONEAPI::memory_scope::device,
141+
access::address_space::global_space>(sum[0]);
142+
out[gid] = atm++;
143+
});
144+
});
145+
}
146+
147+
// All work-items increment by 1, so final value should be equal to N
148+
assert(sum == T(N));
149+
150+
// Post-increment returns original value: will be in [0, N-1]
151+
auto min_e = std::min_element(output.begin(), output.end());
152+
auto max_e = std::max_element(output.begin(), output.end());
153+
assert(*min_e == T(0) && *max_e == T(N - 1));
154+
155+
// Intermediate values should be unique
156+
std::sort(output.begin(), output.end());
157+
assert(std::unique(output.begin(), output.end()) == output.end());
158+
}
159+
160+
template <typename T, typename Difference = T>
161+
void add_test(queue q, size_t N) {
162+
add_fetch_test<T, Difference>(q, N);
163+
add_plus_equal_test<T, Difference>(q, N);
164+
add_pre_inc_test<T, Difference>(q, N);
165+
add_post_inc_test<T, Difference>(q, N);
166+
}
167+
168+
// Floating-point types do not support pre- or post-increment
169+
template <> void add_test<float>(queue q, size_t N) {
170+
add_fetch_test<float>(q, N);
171+
add_plus_equal_test<float>(q, N);
172+
}
173+
template <> void add_test<double>(queue q, size_t N) {
174+
add_fetch_test<double>(q, N);
175+
add_plus_equal_test<double>(q, N);
176+
}
177+
178+
int main() {
179+
queue q;
180+
std::string version = q.get_device().get_info<info::device::version>();
181+
if (version < std::string("2.0")) {
182+
std::cout << "Skipping test\n";
183+
return 0;
184+
}
185+
186+
constexpr int N = 32;
187+
add_test<int>(q, N);
188+
add_test<unsigned int>(q, N);
189+
add_test<long>(q, N);
190+
add_test<unsigned long>(q, N);
191+
add_test<long long>(q, N);
192+
add_test<unsigned long long>(q, N);
193+
add_test<float>(q, N);
194+
add_test<double>(q, N);
195+
add_test<char *, ptrdiff_t>(q, N);
196+
197+
std::cout << "Test passed." << std::endl;
198+
}

SYCL/AtomicRef/compare_exchange.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
#include <CL/sycl.hpp>
8+
#include <algorithm>
9+
#include <cassert>
10+
#include <numeric>
11+
#include <vector>
12+
using namespace sycl;
13+
using namespace sycl::ONEAPI;
14+
15+
template <typename T> class compare_exchange_kernel;
16+
17+
template <typename T> void compare_exchange_test(queue q, size_t N) {
18+
const T initial = T(N);
19+
T compare_exchange = initial;
20+
std::vector<T> output(N);
21+
std::fill(output.begin(), output.end(), T(0));
22+
{
23+
buffer<T> compare_exchange_buf(&compare_exchange, 1);
24+
buffer<T> output_buf(output.data(), output.size());
25+
26+
q.submit([&](handler &cgh) {
27+
auto exc =
28+
compare_exchange_buf.template get_access<access::mode::read_write>(
29+
cgh);
30+
auto out =
31+
output_buf.template get_access<access::mode::discard_write>(cgh);
32+
cgh.parallel_for<compare_exchange_kernel<T>>(
33+
range<1>(N), [=](item<1> it) {
34+
size_t gid = it.get_id(0);
35+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
36+
ONEAPI::memory_scope::device,
37+
access::address_space::global_space>(exc[0]);
38+
T result = T(N); // Avoid copying pointer
39+
bool success = atm.compare_exchange_strong(result, (T)gid);
40+
if (success) {
41+
out[gid] = result;
42+
} else {
43+
out[gid] = T(gid);
44+
}
45+
});
46+
});
47+
}
48+
49+
// Only one work-item should have received the initial sentinel value
50+
assert(std::count(output.begin(), output.end(), initial) == 1);
51+
52+
// All other values should be the index itself or the sentinel value
53+
for (size_t i = 0; i < N; ++i) {
54+
assert(output[i] == T(i) || output[i] == initial);
55+
}
56+
}
57+
58+
int main() {
59+
queue q;
60+
std::string version = q.get_device().get_info<info::device::version>();
61+
if (version < std::string("2.0")) {
62+
std::cout << "Skipping test\n";
63+
return 0;
64+
}
65+
66+
constexpr int N = 32;
67+
compare_exchange_test<int>(q, N);
68+
compare_exchange_test<unsigned int>(q, N);
69+
compare_exchange_test<long>(q, N);
70+
compare_exchange_test<unsigned long>(q, N);
71+
compare_exchange_test<long long>(q, N);
72+
compare_exchange_test<unsigned long long>(q, N);
73+
compare_exchange_test<float>(q, N);
74+
compare_exchange_test<double>(q, N);
75+
compare_exchange_test<char *>(q, N);
76+
77+
std::cout << "Test passed." << std::endl;
78+
}

0 commit comments

Comments
 (0)