Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][CUDA] Adds tests for atomic memory ordering #363

Merged
merged 2 commits into from
Aug 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions SYCL/AtomicRef/accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ template <typename T> void accessor_test(queue q, size_t N) {
static_assert(
std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
device_scope)),
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device>>::value,
atomic_accessor<T, 1, memory_order::relaxed,
memory_scope::device>>::value,
"atomic_accessor type incorrectly deduced");
#endif
auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device>(sum_buf, cgh);
auto sum =
atomic_accessor<T, 1, memory_order::relaxed, memory_scope::device>(
sum_buf, cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
static_assert(
std::is_same<
decltype(sum[0]),
atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>>::value,
"atomic_accessor returns incorrect atomic_ref");
out[gid] = sum[0].fetch_add(T(1));
Expand Down Expand Up @@ -70,19 +70,18 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
buffer<T> output_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
auto sum =
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device, access::target::local>(
1, cgh);
atomic_accessor<T, 1, memory_order::relaxed, memory_scope::device,
access::target::local>(1, cgh);
auto out = output_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
int grp = it.get_group(0);
sum[0].store(0);
it.barrier();
static_assert(
std::is_same<decltype(sum[0]),
atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::local_space>>::value,
std::is_same<
decltype(sum[0]),
atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::local_space>>::value,
"local atomic_accessor returns incorrect atomic_ref");
T result = sum[0].fetch_add(T(1));
if (result == it.get_local_range(0) - 1) {
Expand Down
12 changes: 4 additions & 8 deletions SYCL/AtomicRef/add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ void add_fetch_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = atm.fetch_add(Difference(1));
});
Expand Down Expand Up @@ -64,8 +63,7 @@ void add_plus_equal_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = atm += Difference(1);
});
Expand Down Expand Up @@ -100,8 +98,7 @@ void add_pre_inc_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = ++atm;
});
Expand Down Expand Up @@ -136,8 +133,7 @@ void add_post_inc_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = atm++;
});
Expand Down
149 changes: 149 additions & 0 deletions SYCL/AtomicRef/atomic_memory_order.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// L0, OpenCL, and ROCm backends don't currently support
// info::device::atomic_memory_order_capabilities and aspect::atomic64
// XFAIL: level_zero || opencl || rocm

// NOTE: Tests load and store for supported memory orderings.

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <numeric>
#include <vector>
using namespace sycl;
using namespace sycl::ONEAPI;

template <typename T, memory_order MO> class memory_order_kernel;

template <typename T> void acq_rel_test(queue q, size_t N) {
T a = 0;
{
buffer<T> a_buf(&a, 1);

q.submit([&](handler &cgh) {
auto a_acc = a_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<memory_order_kernel<T, memory_order::acq_rel>>(
range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto aar =
atomic_ref<T, memory_order::acq_rel, memory_scope::device,
access::address_space::global_space>(a_acc[0]);
auto ld = aar.load();
ld += 1;
aar.store(ld);
});
});
}

// All work-items increment by 1, so final value should be equal to N
assert(a == T(N));
}

template <typename T> void seq_cst_test(queue q, size_t N) {
T a = 0;
T b = 0;
{
buffer<T> a_buf(&a, 1);
buffer<T> b_buf(&b, 1);

q.submit([&](handler &cgh) {
auto a_acc = a_buf.template get_access<access::mode::read_write>(cgh);
auto b_acc = b_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<memory_order_kernel<T, memory_order::seq_cst>>(
range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto aar =
atomic_ref<T, memory_order::seq_cst, memory_scope::device,
access::address_space::global_space>(a_acc[0]);
auto bar =
atomic_ref<T, memory_order::seq_cst, memory_scope::device,
access::address_space::global_space>(b_acc[0]);
auto ald = aar.load();
auto bld = bar.load();
ald += 1;
bld += ald;
bar.store(bld);
aar.store(ald);
});
});
}

// All work-items increment a by 1, so final value should be equal to N
assert(a == T(N));
// b is the sum of [1..N]
size_t rsum = 0;
for (size_t i = 1; i <= N; ++i)
rsum += i;
assert(b == T(rsum));
}

bool is_supported(std::vector<memory_order> capabilities,
memory_order mem_order) {
return std::find(capabilities.begin(), capabilities.end(), mem_order) !=
capabilities.end();
}

int main() {
queue q;

std::vector<memory_order> supported_memory_orders =
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
bool atomic64_support = q.get_device().has(aspect::atomic64);

constexpr int N = 32;

// Relaxed memory order must be supported. This ordering is used in other
// tests.
assert(is_supported(supported_memory_orders, memory_order::relaxed));

if (is_supported(supported_memory_orders, memory_order::acq_rel)) {
// Acquire-release memory order must also support both acquire and release
// orderings.
assert(is_supported(supported_memory_orders, memory_order::acquire) &&
is_supported(supported_memory_orders, memory_order::release));
acq_rel_test<int>(q, N);
acq_rel_test<unsigned int>(q, N);
acq_rel_test<float>(q, N);
if (sizeof(long) == 4) {
// long is 32-bit
acq_rel_test<long>(q, N);
acq_rel_test<unsigned long>(q, N);
}
if (atomic64_support) {
if (sizeof(long) == 8) {
// long is 64-bit
acq_rel_test<long>(q, N);
acq_rel_test<unsigned long>(q, N);
}
acq_rel_test<long long>(q, N);
acq_rel_test<unsigned long long>(q, N);
acq_rel_test<double>(q, N);
}
}

if (is_supported(supported_memory_orders, memory_order::seq_cst)) {
seq_cst_test<int>(q, N);
seq_cst_test<unsigned int>(q, N);
seq_cst_test<float>(q, N);
if (sizeof(long) == 4) {
// long is 32-bit
seq_cst_test<long>(q, N);
seq_cst_test<unsigned long>(q, N);
}
if (atomic64_support) {
if (sizeof(long) == 8) {
// long is 64-bit
seq_cst_test<long>(q, N);
seq_cst_test<unsigned long>(q, N);
}
seq_cst_test<long long>(q, N);
seq_cst_test<unsigned long long>(q, N);
seq_cst_test<double>(q, N);
}
}

std::cout << "Test passed." << std::endl;
}
6 changes: 3 additions & 3 deletions SYCL/AtomicRef/compare_exchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ template <typename T> void compare_exchange_test(queue q, size_t N) {
cgh.parallel_for<compare_exchange_kernel<T>>(
range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>(exc[0]);
auto atm =
atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(exc[0]);
T result = T(N); // Avoid copying pointer
bool success = atm.compare_exchange_strong(result, (T)gid);
if (success) {
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/exchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ template <typename T> void exchange_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for<exchange_kernel<T>>(range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(exc[0]);
out[gid] = atm.exchange(T(gid));
});
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ template <typename T> void load_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for<load_kernel<T>>(range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(ld[0]);
out[gid] = atm.load();
});
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ template <typename T> void max_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(val[0]);

// +1 accounts for lowest() returning 0 for unsigned types
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ template <typename T> void min_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(val[0]);
out[gid] = atm.fetch_min(T(gid));
});
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ template <typename T> void store_test(queue q, size_t N) {
auto st = store_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<store_kernel<T>>(range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(st[0]);
atm.store(T(gid));
});
Expand Down
12 changes: 4 additions & 8 deletions SYCL/AtomicRef/sub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ void sub_fetch_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(val[0]);
out[gid] = atm.fetch_sub(Difference(1));
});
Expand Down Expand Up @@ -64,8 +63,7 @@ void sub_plus_equal_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(val[0]);
out[gid] = atm -= Difference(1);
});
Expand Down Expand Up @@ -100,8 +98,7 @@ void sub_pre_dec_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(val[0]);
out[gid] = --atm;
});
Expand Down Expand Up @@ -136,8 +133,7 @@ void sub_post_dec_test(queue q, size_t N) {
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
auto atm = atomic_ref<T, memory_order::relaxed, memory_scope::device,
access::address_space::global_space>(val[0]);
out[gid] = atm--;
});
Expand Down