Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 70c275c

Browse files
author
Artem Gindinson
committed
Address the atomics' macro rework: emulation runs by default
Signed-off-by: Artem Gindinson <[email protected]>
1 parent e2ea274 commit 70c275c

File tree

5 files changed

+400
-16
lines changed

5 files changed

+400
-16
lines changed

SYCL/AtomicRef/add.cpp renamed to SYCL/AtomicRef/add-emulated.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
2-
// RUN: -o %t.out %gpu_atomics_config
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
32
// RUN: %HOST_RUN_PLACEHOLDER %t.out
43
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5-
// TODO: Drop the separate "emulation" compilation once "native" OpenCL CPU
6-
// support is provided.
7-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
8-
// RUN: -o %t.out.emulated -D__SYCL_EMULATE_FLOAT_ATOMICS__=1
9-
// RUN: %CPU_RUN_PLACEHOLDER %t.out.emulated
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
105

116
#include <CL/sycl.hpp>
127
#include <algorithm>

SYCL/AtomicRef/add-native.cpp

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \
2+
// RUN: -fsycl-targets=%sycl_triple %s -o %t.out
3+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6+
// TODO: Remove XFAIL/UNSUPPORTED once corresponding backends support
7+
// "native" SPIR-V
8+
// XFAIL: cpu
9+
// UNSUPPORTED: cuda
10+
11+
#include <CL/sycl.hpp>
12+
#include <algorithm>
13+
#include <cassert>
14+
#include <iostream>
15+
#include <numeric>
16+
#include <vector>
17+
using namespace sycl;
18+
using namespace sycl::ONEAPI;
19+
20+
template <typename T, typename Difference = T>
21+
void add_fetch_test(queue q, size_t N) {
22+
T sum = 0;
23+
std::vector<T> output(N);
24+
std::fill(output.begin(), output.end(), T(0));
25+
{
26+
buffer<T> sum_buf(&sum, 1);
27+
buffer<T> output_buf(output.data(), output.size());
28+
29+
q.submit([&](handler &cgh) {
30+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
31+
auto out =
32+
output_buf.template get_access<access::mode::discard_write>(cgh);
33+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
34+
int gid = it.get_id(0);
35+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
36+
ONEAPI::memory_scope::device,
37+
access::address_space::global_space>(sum[0]);
38+
out[gid] = atm.fetch_add(Difference(1));
39+
});
40+
});
41+
}
42+
43+
// All work-items increment by 1, so final value should be equal to N
44+
assert(sum == T(N));
45+
46+
// Fetch returns original value: will be in [0, N-1]
47+
auto min_e = std::min_element(output.begin(), output.end());
48+
auto max_e = std::max_element(output.begin(), output.end());
49+
assert(*min_e == T(0) && *max_e == T(N - 1));
50+
51+
// Intermediate values should be unique
52+
std::sort(output.begin(), output.end());
53+
assert(std::unique(output.begin(), output.end()) == output.end());
54+
}
55+
56+
template <typename T, typename Difference = T>
57+
void add_plus_equal_test(queue q, size_t N) {
58+
T sum = 0;
59+
std::vector<T> output(N);
60+
std::fill(output.begin(), output.end(), T(0));
61+
{
62+
buffer<T> sum_buf(&sum, 1);
63+
buffer<T> output_buf(output.data(), output.size());
64+
65+
q.submit([&](handler &cgh) {
66+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
67+
auto out =
68+
output_buf.template get_access<access::mode::discard_write>(cgh);
69+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
70+
int gid = it.get_id(0);
71+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
72+
ONEAPI::memory_scope::device,
73+
access::address_space::global_space>(sum[0]);
74+
out[gid] = atm += Difference(1);
75+
});
76+
});
77+
}
78+
79+
// All work-items increment by 1, so final value should be equal to N
80+
assert(sum == T(N));
81+
82+
// += returns updated value: will be in [1, N]
83+
auto min_e = std::min_element(output.begin(), output.end());
84+
auto max_e = std::max_element(output.begin(), output.end());
85+
assert(*min_e == T(1) && *max_e == T(N));
86+
87+
// Intermediate values should be unique
88+
std::sort(output.begin(), output.end());
89+
assert(std::unique(output.begin(), output.end()) == output.end());
90+
}
91+
92+
template <typename T, typename Difference = T>
93+
void add_pre_inc_test(queue q, size_t N) {
94+
T sum = 0;
95+
std::vector<T> output(N);
96+
std::fill(output.begin(), output.end(), T(0));
97+
{
98+
buffer<T> sum_buf(&sum, 1);
99+
buffer<T> output_buf(output.data(), output.size());
100+
101+
q.submit([&](handler &cgh) {
102+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
103+
auto out =
104+
output_buf.template get_access<access::mode::discard_write>(cgh);
105+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
106+
int gid = it.get_id(0);
107+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
108+
ONEAPI::memory_scope::device,
109+
access::address_space::global_space>(sum[0]);
110+
out[gid] = ++atm;
111+
});
112+
});
113+
}
114+
115+
// All work-items increment by 1, so final value should be equal to N
116+
assert(sum == T(N));
117+
118+
// Pre-increment returns updated value: will be in [1, N]
119+
auto min_e = std::min_element(output.begin(), output.end());
120+
auto max_e = std::max_element(output.begin(), output.end());
121+
assert(*min_e == T(1) && *max_e == T(N));
122+
123+
// Intermediate values should be unique
124+
std::sort(output.begin(), output.end());
125+
assert(std::unique(output.begin(), output.end()) == output.end());
126+
}
127+
128+
template <typename T, typename Difference = T>
129+
void add_post_inc_test(queue q, size_t N) {
130+
T sum = 0;
131+
std::vector<T> output(N);
132+
std::fill(output.begin(), output.end(), T(0));
133+
{
134+
buffer<T> sum_buf(&sum, 1);
135+
buffer<T> output_buf(output.data(), output.size());
136+
137+
q.submit([&](handler &cgh) {
138+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
139+
auto out =
140+
output_buf.template get_access<access::mode::discard_write>(cgh);
141+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
142+
int gid = it.get_id(0);
143+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
144+
ONEAPI::memory_scope::device,
145+
access::address_space::global_space>(sum[0]);
146+
out[gid] = atm++;
147+
});
148+
});
149+
}
150+
151+
// All work-items increment by 1, so final value should be equal to N
152+
assert(sum == T(N));
153+
154+
// Post-increment returns original value: will be in [0, N-1]
155+
auto min_e = std::min_element(output.begin(), output.end());
156+
auto max_e = std::max_element(output.begin(), output.end());
157+
assert(*min_e == T(0) && *max_e == T(N - 1));
158+
159+
// Intermediate values should be unique
160+
std::sort(output.begin(), output.end());
161+
assert(std::unique(output.begin(), output.end()) == output.end());
162+
}
163+
164+
template <typename T, typename Difference = T>
165+
void add_test(queue q, size_t N) {
166+
add_fetch_test<T, Difference>(q, N);
167+
add_plus_equal_test<T, Difference>(q, N);
168+
add_pre_inc_test<T, Difference>(q, N);
169+
add_post_inc_test<T, Difference>(q, N);
170+
}
171+
172+
// Floating-point types do not support pre- or post-increment
173+
template <> void add_test<float>(queue q, size_t N) {
174+
add_fetch_test<float>(q, N);
175+
add_plus_equal_test<float>(q, N);
176+
}
177+
template <> void add_test<double>(queue q, size_t N) {
178+
add_fetch_test<double>(q, N);
179+
add_plus_equal_test<double>(q, N);
180+
}
181+
182+
int main() {
183+
queue q;
184+
std::string version = q.get_device().get_info<info::device::version>();
185+
186+
constexpr int N = 32;
187+
add_test<int>(q, N);
188+
add_test<unsigned int>(q, N);
189+
add_test<long>(q, N);
190+
add_test<unsigned long>(q, N);
191+
add_test<long long>(q, N);
192+
add_test<unsigned long long>(q, N);
193+
add_test<float>(q, N);
194+
add_test<double>(q, N);
195+
add_test<char *, ptrdiff_t>(q, N);
196+
197+
std::cout << "Test passed." << std::endl;
198+
}

SYCL/AtomicRef/sub.cpp renamed to SYCL/AtomicRef/sub-emulated.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
2-
// RUN: -o %t.out %gpu_atomics_config
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
32
// RUN: %HOST_RUN_PLACEHOLDER %t.out
43
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5-
// TODO: Drop the separate "emulation" compilation once "native" OpenCL CPU
6-
// support is provided.
7-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
8-
// RUN: -o %t.out.emulated -D__SYCL_EMULATE_FLOAT_ATOMICS__=1
9-
// RUN: %CPU_RUN_PLACEHOLDER %t.out.emulated
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
105

116
#include <CL/sycl.hpp>
127
#include <algorithm>

0 commit comments

Comments
 (0)