Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Add tests for USM copy, dependencies, and mem_advise #342

Merged
merged 2 commits into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions SYCL/USM/copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
//==---- copy.cpp - USM copy test ------------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
// RUN: %ACC_RUN_PLACEHOLDER %t1.out

#include <CL/sycl.hpp>

using namespace sycl;
using namespace sycl::usm;

template <typename T> class transfer;

static constexpr int N = 100; // should be even

struct test_struct {
short a;
int b;
long c;
long long d;
half e;
float f;
double g;
};

bool operator==(const test_struct &lhs, const test_struct &rhs) {
return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g;
}

template <typename T> T *regular(queue q, alloc kind) {
return malloc<T>(N, q, kind);
}

template <typename T> T *aligned(queue q, alloc kind) {
return aligned_alloc<T>(alignof(long long), N, q, kind);
}

template <typename T> void test(queue q, T val, T *src, T *dst, bool dev_dst) {
q.fill(src, val, N).wait();

// Use queue::copy for the first half and handler::copy for the second
q.copy(src, dst, N / 2).wait();
q.submit([&](handler &h) { h.copy(src + N / 2, dst + N / 2, N / 2); }).wait();

T *out = dst;

std::array<T, N> arr;
if (dev_dst) { // if copied to device, transfer data back to host
buffer buf{arr};
q.submit([&](handler &h) {
accessor acc{buf, h};
h.parallel_for<transfer<T>>(N, [=](id<1> i) { acc[i] = dst[i]; });
});
out = arr.data();
}

for (int i = 0; i < N; ++i) {
assert(out[i] == val);
}

free(src, q);
free(dst, q);
}

template <typename T> void runTests(queue q, T val, alloc kind1, alloc kind2) {
bool dev_dst1 = (kind1 == alloc::device);
bool dev_dst2 = (kind2 == alloc::device);
test(q, val, regular<T>(q, kind1), regular<T>(q, kind2), dev_dst2);
test(q, val, regular<T>(q, kind2), regular<T>(q, kind1), dev_dst1);
test(q, val, aligned<T>(q, kind1), aligned<T>(q, kind2), dev_dst2);
test(q, val, aligned<T>(q, kind2), aligned<T>(q, kind1), dev_dst1);
test(q, val, regular<T>(q, kind1), aligned<T>(q, kind2), dev_dst2);
test(q, val, regular<T>(q, kind2), aligned<T>(q, kind1), dev_dst1);
test(q, val, aligned<T>(q, kind1), regular<T>(q, kind2), dev_dst2);
test(q, val, aligned<T>(q, kind2), regular<T>(q, kind1), dev_dst1);
}

int main() {
queue q;
auto dev = q.get_device();

test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f, 4.24242};

if (dev.has(aspect::usm_host_allocations)) {
runTests<short>(q, 4, alloc::host, alloc::host);
runTests<int>(q, 42, alloc::host, alloc::host);
runTests<long>(q, 424, alloc::host, alloc::host);
runTests<long long>(q, 4242, alloc::host, alloc::host);
runTests<half>(q, half(4.2f), alloc::host, alloc::host);
runTests<float>(q, 4.242f, alloc::host, alloc::host);
runTests<double>(q, 4.24242, alloc::host, alloc::host);
runTests<test_struct>(q, test_obj, alloc::host, alloc::host);
}

if (dev.has(aspect::usm_shared_allocations)) {
runTests<short>(q, 4, alloc::shared, alloc::shared);
runTests<int>(q, 42, alloc::shared, alloc::shared);
runTests<long>(q, 424, alloc::shared, alloc::shared);
runTests<long long>(q, 4242, alloc::shared, alloc::shared);
runTests<half>(q, half(4.2f), alloc::shared, alloc::shared);
runTests<float>(q, 4.242f, alloc::shared, alloc::shared);
runTests<double>(q, 4.24242, alloc::shared, alloc::shared);
runTests<test_struct>(q, test_obj, alloc::shared, alloc::shared);
}

if (dev.has(aspect::usm_device_allocations)) {
runTests<short>(q, 4, alloc::device, alloc::device);
runTests<int>(q, 42, alloc::device, alloc::device);
runTests<long>(q, 424, alloc::device, alloc::device);
runTests<long long>(q, 4242, alloc::device, alloc::device);
runTests<half>(q, half(4.2f), alloc::device, alloc::device);
runTests<float>(q, 4.242f, alloc::device, alloc::device);
runTests<double>(q, 4.24242, alloc::device, alloc::device);
runTests<test_struct>(q, test_obj, alloc::device, alloc::device);
}

if (dev.has(aspect::usm_host_allocations) &&
dev.has(aspect::usm_shared_allocations)) {
runTests<short>(q, 4, alloc::host, alloc::shared);
runTests<int>(q, 42, alloc::host, alloc::shared);
runTests<long>(q, 424, alloc::host, alloc::shared);
runTests<long long>(q, 4242, alloc::host, alloc::shared);
runTests<half>(q, half(4.2f), alloc::host, alloc::shared);
runTests<float>(q, 4.242f, alloc::host, alloc::shared);
runTests<double>(q, 4.24242, alloc::host, alloc::shared);
runTests<test_struct>(q, test_obj, alloc::host, alloc::shared);
}

if (dev.has(aspect::usm_host_allocations) &&
dev.has(aspect::usm_device_allocations)) {
runTests<short>(q, 4, alloc::host, alloc::device);
runTests<int>(q, 42, alloc::host, alloc::device);
runTests<long>(q, 424, alloc::host, alloc::device);
runTests<long long>(q, 4242, alloc::host, alloc::device);
runTests<half>(q, half(4.2f), alloc::host, alloc::device);
runTests<float>(q, 4.242f, alloc::host, alloc::device);
runTests<double>(q, 4.24242, alloc::host, alloc::device);
runTests<test_struct>(q, test_obj, alloc::host, alloc::device);
}

if (dev.has(aspect::usm_shared_allocations) &&
dev.has(aspect::usm_device_allocations)) {
runTests<short>(q, 4, alloc::shared, alloc::device);
runTests<int>(q, 42, alloc::shared, alloc::device);
runTests<long>(q, 424, alloc::shared, alloc::device);
runTests<long long>(q, 4242, alloc::shared, alloc::device);
runTests<half>(q, half(4.2f), alloc::shared, alloc::device);
runTests<float>(q, 4.242f, alloc::shared, alloc::device);
runTests<double>(q, 4.24242, alloc::shared, alloc::device);
runTests<test_struct>(q, test_obj, alloc::shared, alloc::device);
}

return 0;
}
45 changes: 45 additions & 0 deletions SYCL/USM/dep_events.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//==---- dep_events.cpp - USM dependency test ------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
// RUN: %ACC_RUN_PLACEHOLDER %t1.out
//
// XFAIL: cuda
// TODO enable the test when cuda_piextUSMEnqueuePrefetch starts handling flags

#include <CL/sycl.hpp>

using namespace sycl;

int main() {
queue q;

int *x = malloc_shared<int>(1, q);
int *y = malloc_shared<int>(1, q);
int *z = malloc_shared<int>(1, q);

event eMemset1 = q.memset(x, 0, sizeof(int), event{}); // x = 0
event eMemset2 = q.memset(y, 0, sizeof(int), std::vector<event>{}); // y = 0
event eFill = q.fill(x, 1, 1, {eMemset1, eMemset2}); // x = 1
event eMemcpy = q.memcpy(y, x, sizeof(int), eFill); // y = 1
event eCopy = q.copy(y, z, 1, eMemcpy); // z = 1
event ePrefetch = q.prefetch(z, sizeof(int), eCopy); //
q.single_task<class kernel>(ePrefetch, [=] { *z *= 2; }).wait(); // z = 2

int error = (*z != 2) ? 1 : 0;
std::cout << (error ? "failed\n" : "passed\n");

free(x, q);
free(y, q);
free(z, q);

return error;
}
10 changes: 7 additions & 3 deletions SYCL/USM/memadvise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
// RUN: %ACC_RUN_PLACEHOLDER %t1.out

//==---------------- memadvise.cpp - Shared Memory Linked List test --------==//
//
Expand Down Expand Up @@ -37,7 +38,8 @@ int main() {
if (s_head == nullptr) {
return -1;
}
q.mem_advise(s_head, sizeof(Node), (pi_mem_advice)mem_advice);
// Test queue::mem_advise
q.mem_advise(s_head, sizeof(Node), pi_mem_advice(mem_advice));
Node *s_cur = s_head;

for (int i = 0; i < numNodes; i++) {
Expand All @@ -48,7 +50,10 @@ int main() {
if (s_cur->pNext == nullptr) {
return -1;
}
q.mem_advise(s_cur->pNext, sizeof(Node), (pi_mem_advice)mem_advice);
// Test handler::mem_advise
q.submit([&](handler &cgh) {
cgh.mem_advise(s_cur->pNext, sizeof(Node), pi_mem_advice(mem_advice));
});
} else {
s_cur->pNext = nullptr;
}
Expand All @@ -69,7 +74,6 @@ int main() {
e1.wait();

s_cur = s_head;
int mismatches = 0;
for (int i = 0; i < numNodes; i++) {
const int want = i * 4 + 1;
if (s_cur->Num != want) {
Expand Down