Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 6cf85c6

Browse files
author
Artem Gindinson
authored
Address the atomics' macro rework: emulation runs by default (#104)
Signed-off-by: Artem Gindinson <[email protected]>
1 parent e2ea274 commit 6cf85c6

File tree

5 files changed

+398
-16
lines changed

5 files changed

+398
-16
lines changed

SYCL/AtomicRef/add.cpp renamed to SYCL/AtomicRef/add-emulated.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
2-
// RUN: -o %t.out %gpu_atomics_config
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
32
// RUN: %HOST_RUN_PLACEHOLDER %t.out
43
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5-
// TODO: Drop the separate "emulation" compilation once "native" OpenCL CPU
6-
// support is provided.
7-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
8-
// RUN: -o %t.out.emulated -D__SYCL_EMULATE_FLOAT_ATOMICS__=1
9-
// RUN: %CPU_RUN_PLACEHOLDER %t.out.emulated
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
105

116
#include <CL/sycl.hpp>
127
#include <algorithm>

SYCL/AtomicRef/add-native.cpp

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \
2+
// RUN: -fsycl-targets=%sycl_triple %s -o %t.out
3+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6+
// TODO: Remove items from UNSUPPORTED once corresponding backends support
7+
// "native" implementation
8+
// UNSUPPORTED: cpu, cuda
9+
10+
#include <CL/sycl.hpp>
11+
#include <algorithm>
12+
#include <cassert>
13+
#include <iostream>
14+
#include <numeric>
15+
#include <vector>
16+
using namespace sycl;
17+
using namespace sycl::ONEAPI;
18+
19+
template <typename T, typename Difference = T>
20+
void add_fetch_test(queue q, size_t N) {
21+
T sum = 0;
22+
std::vector<T> output(N);
23+
std::fill(output.begin(), output.end(), T(0));
24+
{
25+
buffer<T> sum_buf(&sum, 1);
26+
buffer<T> output_buf(output.data(), output.size());
27+
28+
q.submit([&](handler &cgh) {
29+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
30+
auto out =
31+
output_buf.template get_access<access::mode::discard_write>(cgh);
32+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
33+
int gid = it.get_id(0);
34+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
35+
ONEAPI::memory_scope::device,
36+
access::address_space::global_space>(sum[0]);
37+
out[gid] = atm.fetch_add(Difference(1));
38+
});
39+
});
40+
}
41+
42+
// All work-items increment by 1, so final value should be equal to N
43+
assert(sum == T(N));
44+
45+
// Fetch returns original value: will be in [0, N-1]
46+
auto min_e = std::min_element(output.begin(), output.end());
47+
auto max_e = std::max_element(output.begin(), output.end());
48+
assert(*min_e == T(0) && *max_e == T(N - 1));
49+
50+
// Intermediate values should be unique
51+
std::sort(output.begin(), output.end());
52+
assert(std::unique(output.begin(), output.end()) == output.end());
53+
}
54+
55+
template <typename T, typename Difference = T>
56+
void add_plus_equal_test(queue q, size_t N) {
57+
T sum = 0;
58+
std::vector<T> output(N);
59+
std::fill(output.begin(), output.end(), T(0));
60+
{
61+
buffer<T> sum_buf(&sum, 1);
62+
buffer<T> output_buf(output.data(), output.size());
63+
64+
q.submit([&](handler &cgh) {
65+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
66+
auto out =
67+
output_buf.template get_access<access::mode::discard_write>(cgh);
68+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
69+
int gid = it.get_id(0);
70+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
71+
ONEAPI::memory_scope::device,
72+
access::address_space::global_space>(sum[0]);
73+
out[gid] = atm += Difference(1);
74+
});
75+
});
76+
}
77+
78+
// All work-items increment by 1, so final value should be equal to N
79+
assert(sum == T(N));
80+
81+
// += returns updated value: will be in [1, N]
82+
auto min_e = std::min_element(output.begin(), output.end());
83+
auto max_e = std::max_element(output.begin(), output.end());
84+
assert(*min_e == T(1) && *max_e == T(N));
85+
86+
// Intermediate values should be unique
87+
std::sort(output.begin(), output.end());
88+
assert(std::unique(output.begin(), output.end()) == output.end());
89+
}
90+
91+
template <typename T, typename Difference = T>
92+
void add_pre_inc_test(queue q, size_t N) {
93+
T sum = 0;
94+
std::vector<T> output(N);
95+
std::fill(output.begin(), output.end(), T(0));
96+
{
97+
buffer<T> sum_buf(&sum, 1);
98+
buffer<T> output_buf(output.data(), output.size());
99+
100+
q.submit([&](handler &cgh) {
101+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
102+
auto out =
103+
output_buf.template get_access<access::mode::discard_write>(cgh);
104+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
105+
int gid = it.get_id(0);
106+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
107+
ONEAPI::memory_scope::device,
108+
access::address_space::global_space>(sum[0]);
109+
out[gid] = ++atm;
110+
});
111+
});
112+
}
113+
114+
// All work-items increment by 1, so final value should be equal to N
115+
assert(sum == T(N));
116+
117+
// Pre-increment returns updated value: will be in [1, N]
118+
auto min_e = std::min_element(output.begin(), output.end());
119+
auto max_e = std::max_element(output.begin(), output.end());
120+
assert(*min_e == T(1) && *max_e == T(N));
121+
122+
// Intermediate values should be unique
123+
std::sort(output.begin(), output.end());
124+
assert(std::unique(output.begin(), output.end()) == output.end());
125+
}
126+
127+
template <typename T, typename Difference = T>
128+
void add_post_inc_test(queue q, size_t N) {
129+
T sum = 0;
130+
std::vector<T> output(N);
131+
std::fill(output.begin(), output.end(), T(0));
132+
{
133+
buffer<T> sum_buf(&sum, 1);
134+
buffer<T> output_buf(output.data(), output.size());
135+
136+
q.submit([&](handler &cgh) {
137+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
138+
auto out =
139+
output_buf.template get_access<access::mode::discard_write>(cgh);
140+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
141+
int gid = it.get_id(0);
142+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
143+
ONEAPI::memory_scope::device,
144+
access::address_space::global_space>(sum[0]);
145+
out[gid] = atm++;
146+
});
147+
});
148+
}
149+
150+
// All work-items increment by 1, so final value should be equal to N
151+
assert(sum == T(N));
152+
153+
// Post-increment returns original value: will be in [0, N-1]
154+
auto min_e = std::min_element(output.begin(), output.end());
155+
auto max_e = std::max_element(output.begin(), output.end());
156+
assert(*min_e == T(0) && *max_e == T(N - 1));
157+
158+
// Intermediate values should be unique
159+
std::sort(output.begin(), output.end());
160+
assert(std::unique(output.begin(), output.end()) == output.end());
161+
}
162+
163+
template <typename T, typename Difference = T>
164+
void add_test(queue q, size_t N) {
165+
add_fetch_test<T, Difference>(q, N);
166+
add_plus_equal_test<T, Difference>(q, N);
167+
add_pre_inc_test<T, Difference>(q, N);
168+
add_post_inc_test<T, Difference>(q, N);
169+
}
170+
171+
// Floating-point types do not support pre- or post-increment
172+
template <> void add_test<float>(queue q, size_t N) {
173+
add_fetch_test<float>(q, N);
174+
add_plus_equal_test<float>(q, N);
175+
}
176+
template <> void add_test<double>(queue q, size_t N) {
177+
add_fetch_test<double>(q, N);
178+
add_plus_equal_test<double>(q, N);
179+
}
180+
181+
int main() {
182+
queue q;
183+
std::string version = q.get_device().get_info<info::device::version>();
184+
185+
constexpr int N = 32;
186+
add_test<int>(q, N);
187+
add_test<unsigned int>(q, N);
188+
add_test<long>(q, N);
189+
add_test<unsigned long>(q, N);
190+
add_test<long long>(q, N);
191+
add_test<unsigned long long>(q, N);
192+
add_test<float>(q, N);
193+
add_test<double>(q, N);
194+
add_test<char *, ptrdiff_t>(q, N);
195+
196+
std::cout << "Test passed." << std::endl;
197+
}

SYCL/AtomicRef/sub.cpp renamed to SYCL/AtomicRef/sub-emulated.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
2-
// RUN: -o %t.out %gpu_atomics_config
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
32
// RUN: %HOST_RUN_PLACEHOLDER %t.out
43
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5-
// TODO: Drop the separate "emulation" compilation once "native" OpenCL CPU
6-
// support is provided.
7-
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s \
8-
// RUN: -o %t.out.emulated -D__SYCL_EMULATE_FLOAT_ATOMICS__=1
9-
// RUN: %CPU_RUN_PLACEHOLDER %t.out.emulated
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
105

116
#include <CL/sycl.hpp>
127
#include <algorithm>

0 commit comments

Comments
 (0)