Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 5aca873

Browse files
author
Artem Gindinson
authored
Rework float atomic tests for min/max (#109)
A follow-up to #104 that will set up the infrastructure for min/max implementation changes. The native tests will be temporarily disabled on all targets, and then enabled in steps (hence `UNSUPPORTED: *` is not a good choice). Signed-off-by: Artem Gindinson <[email protected]>
1 parent f189f84 commit 5aca873

File tree

4 files changed

+142
-0
lines changed

4 files changed

+142
-0
lines changed
File renamed without changes.

SYCL/AtomicRef/max-native.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \
2+
// RUN: -fsycl-targets=%sycl_triple %s -o %t.out
3+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6+
// TODO: Remove items from UNSUPPORTED once corresponding backends support
7+
// "native" implementation
8+
// UNSUPPORTED: gpu, cpu, cuda
9+
10+
#include <CL/sycl.hpp>
11+
#include <algorithm>
12+
#include <cassert>
13+
#include <iostream>
14+
#include <numeric>
15+
#include <vector>
16+
using namespace sycl;
17+
using namespace sycl::ONEAPI;
18+
19+
template <typename T> void max_test(queue q, size_t N) {
20+
T initial = std::numeric_limits<T>::lowest();
21+
T val = initial;
22+
std::vector<T> output(N);
23+
std::fill(output.begin(), output.end(), std::numeric_limits<T>::max());
24+
{
25+
buffer<T> val_buf(&val, 1);
26+
buffer<T> output_buf(output.data(), output.size());
27+
28+
q.submit([&](handler &cgh) {
29+
auto val = val_buf.template get_access<access::mode::read_write>(cgh);
30+
auto out =
31+
output_buf.template get_access<access::mode::discard_write>(cgh);
32+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
33+
int gid = it.get_id(0);
34+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
35+
ONEAPI::memory_scope::device,
36+
access::address_space::global_space>(val[0]);
37+
38+
// +1 accounts for lowest() returning 0 for unsigned types
39+
out[gid] = atm.fetch_max(T(gid) + 1);
40+
});
41+
});
42+
}
43+
44+
// Final value should be equal to N
45+
assert(val == N);
46+
47+
// Only one work-item should have received the initial value
48+
assert(std::count(output.begin(), output.end(), initial) == 1);
49+
50+
// fetch_max returns original value
51+
// Intermediate values should all be >= initial value
52+
for (int i = 0; i < N; ++i) {
53+
assert(output[i] >= initial);
54+
}
55+
}
56+
57+
int main() {
58+
queue q;
59+
std::string version = q.get_device().get_info<info::device::version>();
60+
61+
constexpr int N = 32;
62+
max_test<int>(q, N);
63+
max_test<unsigned int>(q, N);
64+
max_test<long>(q, N);
65+
max_test<unsigned long>(q, N);
66+
max_test<long long>(q, N);
67+
max_test<unsigned long long>(q, N);
68+
max_test<float>(q, N);
69+
max_test<double>(q, N);
70+
71+
std::cout << "Test passed." << std::endl;
72+
}
File renamed without changes.

SYCL/AtomicRef/min-native.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \
2+
// RUN: -fsycl-targets=%sycl_triple %s -o %t.out
3+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6+
// TODO: Remove items from UNSUPPORTED once corresponding backends support
7+
// "native" implementation
8+
// UNSUPPORTED: gpu, cpu, cuda
9+
10+
#include <CL/sycl.hpp>
11+
#include <algorithm>
12+
#include <cassert>
13+
#include <iostream>
14+
#include <numeric>
15+
#include <vector>
16+
using namespace sycl;
17+
using namespace sycl::ONEAPI;
18+
19+
template <typename T> void min_test(queue q, size_t N) {
20+
T initial = std::numeric_limits<T>::max();
21+
T val = initial;
22+
std::vector<T> output(N);
23+
std::fill(output.begin(), output.end(), 0);
24+
{
25+
buffer<T> val_buf(&val, 1);
26+
buffer<T> output_buf(output.data(), output.size());
27+
28+
q.submit([&](handler &cgh) {
29+
auto val = val_buf.template get_access<access::mode::read_write>(cgh);
30+
auto out =
31+
output_buf.template get_access<access::mode::discard_write>(cgh);
32+
cgh.parallel_for(range<1>(N), [=](item<1> it) {
33+
int gid = it.get_id(0);
34+
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
35+
ONEAPI::memory_scope::device,
36+
access::address_space::global_space>(val[0]);
37+
out[gid] = atm.fetch_min(T(gid));
38+
});
39+
});
40+
}
41+
42+
// Final value should be equal to 0
43+
assert(val == 0);
44+
45+
// Only one work-item should have received the initial value
46+
assert(std::count(output.begin(), output.end(), initial) == 1);
47+
48+
// fetch_min returns original value
49+
// Intermediate values should all be <= initial value
50+
for (int i = 0; i < N; ++i) {
51+
assert(output[i] <= initial);
52+
}
53+
}
54+
55+
int main() {
56+
queue q;
57+
std::string version = q.get_device().get_info<info::device::version>();
58+
59+
constexpr int N = 32;
60+
min_test<int>(q, N);
61+
min_test<unsigned int>(q, N);
62+
min_test<long>(q, N);
63+
min_test<unsigned long>(q, N);
64+
min_test<long long>(q, N);
65+
min_test<unsigned long long>(q, N);
66+
min_test<float>(q, N);
67+
min_test<double>(q, N);
68+
69+
std::cout << "Test passed." << std::endl;
70+
}

0 commit comments

Comments
 (0)