Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Extend coverage of USM memcpy, fill, and memset #144

Merged
merged 1 commit into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions SYCL/USM/allocator_container.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
// RUN: %ACC_RUN_PLACEHOLDER %t1.out

//==------ allocator_container.cpp - USM allocator in containers test ------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>

#include <cassert>

using namespace cl::sycl;

constexpr int N = 100;

template <usm::alloc AllocMode, class KernelName>
void runTest(device dev, context ctxt, queue q) {
usm_allocator<int, AllocMode> alloc(ctxt, dev);

std::vector<int, decltype(alloc)> vec(alloc);
vec.resize(N);

for (int i = 0; i < N; i++) {
vec[i] = i;
}

int *vals = &vec[0];

q.submit([=](handler &h) {
h.single_task<KernelName>([=]() {
for (int i = 1; i < N; i++) {
vals[0] += vals[i];
}
});
}).wait();

assert(vals[0] == ((N * (N - 1)) / 2));
}

int main() {
queue q;
auto dev = q.get_device();
auto ctxt = q.get_context();

if (dev.get_info<info::device::usm_shared_allocations>()) {
runTest<usm::alloc::shared, class shared_test>(dev, ctxt, q);
}

if (dev.get_info<info::device::usm_host_allocations>()) {
runTest<usm::alloc::host, class host_test>(dev, ctxt, q);
}

// usm::alloc::device is not supported by usm_allocator

return 0;
}
149 changes: 137 additions & 12 deletions SYCL/USM/fill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,149 @@

using namespace cl::sycl;

constexpr int count = 100;
constexpr int pattern = 42;
template <typename T> class usm_device_transfer;
template <typename T> class usm_aligned_device_transfer;

static constexpr int N = 100;

struct test_struct {
short a;
int b;
long c;
long long d;
sycl::half e;
float f;
double g;
};

bool operator==(const test_struct &lhs, const test_struct &rhs) {
return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g;
}

template <typename T>
void runHostTests(device dev, context ctxt, queue q, T val) {
T *array;

array = (T *)malloc_host(N * sizeof(T), q);
q.submit([&](handler &h) { h.fill(array, val, N); }).wait();
for (int i = 0; i < N; ++i) {
assert(array[i] == val);
}
free(array, ctxt);

array = (T *)aligned_alloc_host(alignof(long long), N * sizeof(T), ctxt);
q.submit([&](handler &h) { h.fill(array, val, N); }).wait();
for (int i = 0; i < N; ++i) {
assert(array[i] == val);
}
free(array, ctxt);
}

template <typename T>
void runSharedTests(device dev, context ctxt, queue q, T val) {
T *array;

array = (T *)malloc_shared(N * sizeof(T), q);
q.submit([&](handler &h) { h.fill(array, val, N); }).wait();
for (int i = 0; i < N; ++i) {
assert(array[i] == val);
}
free(array, ctxt);

array =
(T *)aligned_alloc_shared(alignof(long long), N * sizeof(T), dev, ctxt);
q.submit([&](handler &h) { h.fill(array, val, N); }).wait();
for (int i = 0; i < N; ++i) {
assert(array[i] == val);
}
free(array, ctxt);
}

template <typename T>
void runDeviceTests(device dev, context ctxt, queue q, T val) {
T *array;
std::vector<T> out;
out.resize(N);

array = (T *)malloc_device(N * sizeof(T), q);
q.submit([&](handler &h) { h.fill(array, val, N); }).wait();

{
buffer<T, 1> buf{&out[0], range<1>{N}};
q.submit([&](handler &h) {
auto acc = buf.template get_access<access::mode::write>(h);
h.parallel_for<usm_device_transfer<T>>(
range<1>(N), [=](id<1> item) { acc[item] = array[item]; });
}).wait();
}

for (int i = 0; i < N; ++i) {
assert(out[i] == val);
}
free(array, ctxt);

out.clear();
out.resize(N);

array =
(T *)aligned_alloc_device(alignof(long long), N * sizeof(T), dev, ctxt);
q.submit([&](handler &h) { h.fill(array, val, N); }).wait();

{
buffer<T, 1> buf{&out[0], range<1>{N}};
q.submit([&](handler &h) {
auto acc = buf.template get_access<access::mode::write>(h);
h.parallel_for<usm_aligned_device_transfer<T>>(
range<1>(N), [=](id<1> item) { acc[item] = array[item]; });
}).wait();
}

for (int i = 0; i < N; ++i) {
assert(out[i] == val);
}
free(array, ctxt);
}

int main() {
queue q;
if (q.get_device().get_info<info::device::usm_shared_allocations>()) {
int *mem = malloc_shared<int>(count, q);
auto dev = q.get_device();
auto ctxt = q.get_context();

for (int i = 0; i < count; i++)
mem[i] = 0;
test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242, 4.24242};

q.fill(mem, pattern, count);
q.wait();
if (dev.get_info<info::device::usm_host_allocations>()) {
runHostTests<short>(dev, ctxt, q, 4);
runHostTests<int>(dev, ctxt, q, 42);
runHostTests<long>(dev, ctxt, q, 424);
runHostTests<long long>(dev, ctxt, q, 4242);
runHostTests<sycl::half>(dev, ctxt, q, sycl::half(4.2f));
runHostTests<float>(dev, ctxt, q, 4.242f);
runHostTests<double>(dev, ctxt, q, 4.24242);
runHostTests<test_struct>(dev, ctxt, q, test_obj);
}

if (dev.get_info<info::device::usm_shared_allocations>()) {
runSharedTests<short>(dev, ctxt, q, 4);
runSharedTests<int>(dev, ctxt, q, 42);
runSharedTests<long>(dev, ctxt, q, 424);
runSharedTests<long long>(dev, ctxt, q, 4242);
runSharedTests<sycl::half>(dev, ctxt, q, sycl::half(4.2f));
runSharedTests<float>(dev, ctxt, q, 4.242f);
runSharedTests<double>(dev, ctxt, q, 4.24242);
runSharedTests<test_struct>(dev, ctxt, q, test_obj);
}

for (int i = 0; i < count; i++) {
assert(mem[i] == pattern);
}
if (dev.get_info<info::device::usm_device_allocations>()) {
runDeviceTests<short>(dev, ctxt, q, 4);
runDeviceTests<int>(dev, ctxt, q, 42);
runDeviceTests<long>(dev, ctxt, q, 420);
runDeviceTests<long long>(dev, ctxt, q, 4242);
runDeviceTests<sycl::half>(dev, ctxt, q, sycl::half(4.2f));
runDeviceTests<float>(dev, ctxt, q, 4.242f);
runDeviceTests<double>(dev, ctxt, q, 4.24242);
runDeviceTests<test_struct>(dev, ctxt, q, test_obj);
}
std::cout << "Passed\n";

return 0;
}
Loading