Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] add extra tests #37

Merged
merged 4 commits into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions SYCL/AtomicRef/accessor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <iostream>
#include <numeric>
#include <vector>
using namespace sycl;
using namespace sycl::ONEAPI;

// Equivalent to add_test from add.cpp
// Uses atomic_accessor instead of atomic_ref
template <typename T> void accessor_test(queue q, size_t N) {
T sum = 0;
std::vector<T> output(N, 0);
{
buffer<T> sum_buf(&sum, 1);
buffer<T> output_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
#if __cplusplus > 201402L
static_assert(
std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
device_scope)),
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device>>::value,
"atomic_accessor type incorrectly deduced");
#endif
auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device>(sum_buf, cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
static_assert(
std::is_same<
decltype(sum[0]),
atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>>::value,
"atomic_accessor returns incorrect atomic_ref");
out[gid] = sum[0].fetch_add(T(1));
});
});
}

// All work-items increment by 1, so final value should be equal to N
assert(sum == N);

// Intermediate values should be unique
std::sort(output.begin(), output.end());
assert(std::unique(output.begin(), output.end()) == output.end());

// Fetch returns original value: will be in [0, N-1]
auto min_e = output[0];
auto max_e = output[output.size() - 1];
assert(min_e == 0 && max_e == N - 1);
}

// Simplified form of accessor_test for local memory
template <typename T>
void local_accessor_test(queue q, size_t N, size_t L = 8) {
assert(N % L == 0);
std::vector<T> output(N / L, 0);
{
buffer<T> output_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
auto sum =
atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device, access::target::local>(
1, cgh);
auto out = output_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
int grp = it.get_group(0);
sum[0].store(0);
it.barrier();
static_assert(
std::is_same<decltype(sum[0]),
atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::local_space>>::value,
"local atomic_accessor returns incorrect atomic_ref");
T result = sum[0].fetch_add(T(1));
if (result == it.get_local_range(0) - 1) {
out[grp] = result;
}
});
});
}

// All work-items increment by 1, and last in the group writes out old value
// All values should be L-1
assert(std::all_of(output.begin(), output.end(),
[=](T x) { return x == L - 1; }));
}

int main() {
queue q;
constexpr int N = 32;
accessor_test<int>(q, N);
// TODO: Enable local accessor test for host when barrier is supported
if (!q.get_device().is_host()) {
local_accessor_test<int>(q, N);
}
std::cout << "Test passed." << std::endl;
}
198 changes: 198 additions & 0 deletions SYCL/AtomicRef/add.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <iostream>
#include <numeric>
#include <vector>
using namespace sycl;
using namespace sycl::ONEAPI;

template <typename T, typename Difference = T>
void add_fetch_test(queue q, size_t N) {
T sum = 0;
std::vector<T> output(N);
std::fill(output.begin(), output.end(), T(0));
{
buffer<T> sum_buf(&sum, 1);
buffer<T> output_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = atm.fetch_add(Difference(1));
});
});
}

// All work-items increment by 1, so final value should be equal to N
assert(sum == T(N));

// Fetch returns original value: will be in [0, N-1]
auto min_e = std::min_element(output.begin(), output.end());
auto max_e = std::max_element(output.begin(), output.end());
assert(*min_e == T(0) && *max_e == T(N - 1));

// Intermediate values should be unique
std::sort(output.begin(), output.end());
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <typename T, typename Difference = T>
void add_plus_equal_test(queue q, size_t N) {
T sum = 0;
std::vector<T> output(N);
std::fill(output.begin(), output.end(), T(0));
{
buffer<T> sum_buf(&sum, 1);
buffer<T> output_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = atm += Difference(1);
});
});
}

// All work-items increment by 1, so final value should be equal to N
assert(sum == T(N));

// += returns updated value: will be in [1, N]
auto min_e = std::min_element(output.begin(), output.end());
auto max_e = std::max_element(output.begin(), output.end());
assert(*min_e == T(1) && *max_e == T(N));

// Intermediate values should be unique
std::sort(output.begin(), output.end());
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <typename T, typename Difference = T>
void add_pre_inc_test(queue q, size_t N) {
T sum = 0;
std::vector<T> output(N);
std::fill(output.begin(), output.end(), T(0));
{
buffer<T> sum_buf(&sum, 1);
buffer<T> output_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = ++atm;
});
});
}

// All work-items increment by 1, so final value should be equal to N
assert(sum == T(N));

// Pre-increment returns updated value: will be in [1, N]
auto min_e = std::min_element(output.begin(), output.end());
auto max_e = std::max_element(output.begin(), output.end());
assert(*min_e == T(1) && *max_e == T(N));

// Intermediate values should be unique
std::sort(output.begin(), output.end());
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <typename T, typename Difference = T>
void add_post_inc_test(queue q, size_t N) {
T sum = 0;
std::vector<T> output(N);
std::fill(output.begin(), output.end(), T(0));
{
buffer<T> sum_buf(&sum, 1);
buffer<T> output_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>(sum[0]);
out[gid] = atm++;
});
});
}

// All work-items increment by 1, so final value should be equal to N
assert(sum == T(N));

// Post-increment returns original value: will be in [0, N-1]
auto min_e = std::min_element(output.begin(), output.end());
auto max_e = std::max_element(output.begin(), output.end());
assert(*min_e == T(0) && *max_e == T(N - 1));

// Intermediate values should be unique
std::sort(output.begin(), output.end());
assert(std::unique(output.begin(), output.end()) == output.end());
}

template <typename T, typename Difference = T>
void add_test(queue q, size_t N) {
add_fetch_test<T, Difference>(q, N);
add_plus_equal_test<T, Difference>(q, N);
add_pre_inc_test<T, Difference>(q, N);
add_post_inc_test<T, Difference>(q, N);
}

// Floating-point types do not support pre- or post-increment
template <> void add_test<float>(queue q, size_t N) {
add_fetch_test<float>(q, N);
add_plus_equal_test<float>(q, N);
}
template <> void add_test<double>(queue q, size_t N) {
add_fetch_test<double>(q, N);
add_plus_equal_test<double>(q, N);
}

int main() {
queue q;
std::string version = q.get_device().get_info<info::device::version>();
if (version < std::string("2.0")) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 32;
add_test<int>(q, N);
add_test<unsigned int>(q, N);
add_test<long>(q, N);
add_test<unsigned long>(q, N);
add_test<long long>(q, N);
add_test<unsigned long long>(q, N);
add_test<float>(q, N);
add_test<double>(q, N);
add_test<char *, ptrdiff_t>(q, N);

std::cout << "Test passed." << std::endl;
}
78 changes: 78 additions & 0 deletions SYCL/AtomicRef/compare_exchange.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <numeric>
#include <vector>
using namespace sycl;
using namespace sycl::ONEAPI;

template <typename T> class compare_exchange_kernel;

template <typename T> void compare_exchange_test(queue q, size_t N) {
const T initial = T(N);
T compare_exchange = initial;
std::vector<T> output(N);
std::fill(output.begin(), output.end(), T(0));
{
buffer<T> compare_exchange_buf(&compare_exchange, 1);
buffer<T> output_buf(output.data(), output.size());

q.submit([&](handler &cgh) {
auto exc =
compare_exchange_buf.template get_access<access::mode::read_write>(
cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
cgh.parallel_for<compare_exchange_kernel<T>>(
range<1>(N), [=](item<1> it) {
size_t gid = it.get_id(0);
auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
ONEAPI::memory_scope::device,
access::address_space::global_space>(exc[0]);
T result = T(N); // Avoid copying pointer
bool success = atm.compare_exchange_strong(result, (T)gid);
if (success) {
out[gid] = result;
} else {
out[gid] = T(gid);
}
});
});
}

// Only one work-item should have received the initial sentinel value
assert(std::count(output.begin(), output.end(), initial) == 1);

// All other values should be the index itself or the sentinel value
for (size_t i = 0; i < N; ++i) {
assert(output[i] == T(i) || output[i] == initial);
}
}

int main() {
queue q;
std::string version = q.get_device().get_info<info::device::version>();
if (version < std::string("2.0")) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 32;
compare_exchange_test<int>(q, N);
compare_exchange_test<unsigned int>(q, N);
compare_exchange_test<long>(q, N);
compare_exchange_test<unsigned long>(q, N);
compare_exchange_test<long long>(q, N);
compare_exchange_test<unsigned long long>(q, N);
compare_exchange_test<float>(q, N);
compare_exchange_test<double>(q, N);
compare_exchange_test<char *>(q, N);

std::cout << "Test passed." << std::endl;
}
Loading