Skip to content

Commit 44fb86d

Browse files
dnmokhovsmaslov-intel
authored andcommitted
[SYCL] Add tests for USM copy, dependencies, and mem_advise (intel#342)
1 parent cd36342 commit 44fb86d

File tree

3 files changed

+215
-3
lines changed

3 files changed

+215
-3
lines changed

SYCL/USM/copy.cpp

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//==---- copy.cpp - USM copy test ------------------------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
10+
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
11+
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
12+
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
13+
// RUN: %ACC_RUN_PLACEHOLDER %t1.out
14+
15+
#include <CL/sycl.hpp>
16+
17+
using namespace sycl;
18+
using namespace sycl::usm;
19+
20+
template <typename T> class transfer;
21+
22+
static constexpr int N = 100; // should be even
23+
24+
struct test_struct {
25+
short a;
26+
int b;
27+
long c;
28+
long long d;
29+
half e;
30+
float f;
31+
double g;
32+
};
33+
34+
bool operator==(const test_struct &lhs, const test_struct &rhs) {
35+
return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d &&
36+
lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g;
37+
}
38+
39+
template <typename T> T *regular(queue q, alloc kind) {
40+
return malloc<T>(N, q, kind);
41+
}
42+
43+
template <typename T> T *aligned(queue q, alloc kind) {
44+
return aligned_alloc<T>(alignof(long long), N, q, kind);
45+
}
46+
47+
template <typename T> void test(queue q, T val, T *src, T *dst, bool dev_dst) {
48+
q.fill(src, val, N).wait();
49+
50+
// Use queue::copy for the first half and handler::copy for the second
51+
q.copy(src, dst, N / 2).wait();
52+
q.submit([&](handler &h) { h.copy(src + N / 2, dst + N / 2, N / 2); }).wait();
53+
54+
T *out = dst;
55+
56+
std::array<T, N> arr;
57+
if (dev_dst) { // if copied to device, transfer data back to host
58+
buffer buf{arr};
59+
q.submit([&](handler &h) {
60+
accessor acc{buf, h};
61+
h.parallel_for<transfer<T>>(N, [=](id<1> i) { acc[i] = dst[i]; });
62+
});
63+
out = arr.data();
64+
}
65+
66+
for (int i = 0; i < N; ++i) {
67+
assert(out[i] == val);
68+
}
69+
70+
free(src, q);
71+
free(dst, q);
72+
}
73+
74+
template <typename T> void runTests(queue q, T val, alloc kind1, alloc kind2) {
75+
bool dev_dst1 = (kind1 == alloc::device);
76+
bool dev_dst2 = (kind2 == alloc::device);
77+
test(q, val, regular<T>(q, kind1), regular<T>(q, kind2), dev_dst2);
78+
test(q, val, regular<T>(q, kind2), regular<T>(q, kind1), dev_dst1);
79+
test(q, val, aligned<T>(q, kind1), aligned<T>(q, kind2), dev_dst2);
80+
test(q, val, aligned<T>(q, kind2), aligned<T>(q, kind1), dev_dst1);
81+
test(q, val, regular<T>(q, kind1), aligned<T>(q, kind2), dev_dst2);
82+
test(q, val, regular<T>(q, kind2), aligned<T>(q, kind1), dev_dst1);
83+
test(q, val, aligned<T>(q, kind1), regular<T>(q, kind2), dev_dst2);
84+
test(q, val, aligned<T>(q, kind2), regular<T>(q, kind1), dev_dst1);
85+
}
86+
87+
int main() {
88+
queue q;
89+
auto dev = q.get_device();
90+
91+
test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f, 4.24242};
92+
93+
if (dev.has(aspect::usm_host_allocations)) {
94+
runTests<short>(q, 4, alloc::host, alloc::host);
95+
runTests<int>(q, 42, alloc::host, alloc::host);
96+
runTests<long>(q, 424, alloc::host, alloc::host);
97+
runTests<long long>(q, 4242, alloc::host, alloc::host);
98+
runTests<half>(q, half(4.2f), alloc::host, alloc::host);
99+
runTests<float>(q, 4.242f, alloc::host, alloc::host);
100+
runTests<double>(q, 4.24242, alloc::host, alloc::host);
101+
runTests<test_struct>(q, test_obj, alloc::host, alloc::host);
102+
}
103+
104+
if (dev.has(aspect::usm_shared_allocations)) {
105+
runTests<short>(q, 4, alloc::shared, alloc::shared);
106+
runTests<int>(q, 42, alloc::shared, alloc::shared);
107+
runTests<long>(q, 424, alloc::shared, alloc::shared);
108+
runTests<long long>(q, 4242, alloc::shared, alloc::shared);
109+
runTests<half>(q, half(4.2f), alloc::shared, alloc::shared);
110+
runTests<float>(q, 4.242f, alloc::shared, alloc::shared);
111+
runTests<double>(q, 4.24242, alloc::shared, alloc::shared);
112+
runTests<test_struct>(q, test_obj, alloc::shared, alloc::shared);
113+
}
114+
115+
if (dev.has(aspect::usm_device_allocations)) {
116+
runTests<short>(q, 4, alloc::device, alloc::device);
117+
runTests<int>(q, 42, alloc::device, alloc::device);
118+
runTests<long>(q, 424, alloc::device, alloc::device);
119+
runTests<long long>(q, 4242, alloc::device, alloc::device);
120+
runTests<half>(q, half(4.2f), alloc::device, alloc::device);
121+
runTests<float>(q, 4.242f, alloc::device, alloc::device);
122+
runTests<double>(q, 4.24242, alloc::device, alloc::device);
123+
runTests<test_struct>(q, test_obj, alloc::device, alloc::device);
124+
}
125+
126+
if (dev.has(aspect::usm_host_allocations) &&
127+
dev.has(aspect::usm_shared_allocations)) {
128+
runTests<short>(q, 4, alloc::host, alloc::shared);
129+
runTests<int>(q, 42, alloc::host, alloc::shared);
130+
runTests<long>(q, 424, alloc::host, alloc::shared);
131+
runTests<long long>(q, 4242, alloc::host, alloc::shared);
132+
runTests<half>(q, half(4.2f), alloc::host, alloc::shared);
133+
runTests<float>(q, 4.242f, alloc::host, alloc::shared);
134+
runTests<double>(q, 4.24242, alloc::host, alloc::shared);
135+
runTests<test_struct>(q, test_obj, alloc::host, alloc::shared);
136+
}
137+
138+
if (dev.has(aspect::usm_host_allocations) &&
139+
dev.has(aspect::usm_device_allocations)) {
140+
runTests<short>(q, 4, alloc::host, alloc::device);
141+
runTests<int>(q, 42, alloc::host, alloc::device);
142+
runTests<long>(q, 424, alloc::host, alloc::device);
143+
runTests<long long>(q, 4242, alloc::host, alloc::device);
144+
runTests<half>(q, half(4.2f), alloc::host, alloc::device);
145+
runTests<float>(q, 4.242f, alloc::host, alloc::device);
146+
runTests<double>(q, 4.24242, alloc::host, alloc::device);
147+
runTests<test_struct>(q, test_obj, alloc::host, alloc::device);
148+
}
149+
150+
if (dev.has(aspect::usm_shared_allocations) &&
151+
dev.has(aspect::usm_device_allocations)) {
152+
runTests<short>(q, 4, alloc::shared, alloc::device);
153+
runTests<int>(q, 42, alloc::shared, alloc::device);
154+
runTests<long>(q, 424, alloc::shared, alloc::device);
155+
runTests<long long>(q, 4242, alloc::shared, alloc::device);
156+
runTests<half>(q, half(4.2f), alloc::shared, alloc::device);
157+
runTests<float>(q, 4.242f, alloc::shared, alloc::device);
158+
runTests<double>(q, 4.24242, alloc::shared, alloc::device);
159+
runTests<test_struct>(q, test_obj, alloc::shared, alloc::device);
160+
}
161+
162+
return 0;
163+
}

SYCL/USM/dep_events.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//==---- dep_events.cpp - USM dependency test ------------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
10+
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
11+
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
12+
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
13+
// RUN: %ACC_RUN_PLACEHOLDER %t1.out
14+
//
15+
// XFAIL: cuda
16+
// TODO enable the test when cuda_piextUSMEnqueuePrefetch starts handling flags
17+
18+
#include <CL/sycl.hpp>
19+
20+
using namespace sycl;
21+
22+
int main() {
23+
queue q;
24+
25+
int *x = malloc_shared<int>(1, q);
26+
int *y = malloc_shared<int>(1, q);
27+
int *z = malloc_shared<int>(1, q);
28+
29+
event eMemset1 = q.memset(x, 0, sizeof(int), event{}); // x = 0
30+
event eMemset2 = q.memset(y, 0, sizeof(int), std::vector<event>{}); // y = 0
31+
event eFill = q.fill(x, 1, 1, {eMemset1, eMemset2}); // x = 1
32+
event eMemcpy = q.memcpy(y, x, sizeof(int), eFill); // y = 1
33+
event eCopy = q.copy(y, z, 1, eMemcpy); // z = 1
34+
event ePrefetch = q.prefetch(z, sizeof(int), eCopy); //
35+
q.single_task<class kernel>(ePrefetch, [=] { *z *= 2; }).wait(); // z = 2
36+
37+
int error = (*z != 2) ? 1 : 0;
38+
std::cout << (error ? "failed\n" : "passed\n");
39+
40+
free(x, q);
41+
free(y, q);
42+
free(z, q);
43+
44+
return error;
45+
}

SYCL/USM/memadvise.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// RUN: %HOST_RUN_PLACEHOLDER %t1.out
33
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
44
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t1.out
56

67
//==---------------- memadvise.cpp - Shared Memory Linked List test --------==//
78
//
@@ -37,7 +38,8 @@ int main() {
3738
if (s_head == nullptr) {
3839
return -1;
3940
}
40-
q.mem_advise(s_head, sizeof(Node), (pi_mem_advice)mem_advice);
41+
// Test queue::mem_advise
42+
q.mem_advise(s_head, sizeof(Node), pi_mem_advice(mem_advice));
4143
Node *s_cur = s_head;
4244

4345
for (int i = 0; i < numNodes; i++) {
@@ -48,7 +50,10 @@ int main() {
4850
if (s_cur->pNext == nullptr) {
4951
return -1;
5052
}
51-
q.mem_advise(s_cur->pNext, sizeof(Node), (pi_mem_advice)mem_advice);
53+
// Test handler::mem_advise
54+
q.submit([&](handler &cgh) {
55+
cgh.mem_advise(s_cur->pNext, sizeof(Node), pi_mem_advice(mem_advice));
56+
});
5257
} else {
5358
s_cur->pNext = nullptr;
5459
}
@@ -69,7 +74,6 @@ int main() {
6974
e1.wait();
7075

7176
s_cur = s_head;
72-
int mismatches = 0;
7377
for (int i = 0; i < numNodes; i++) {
7478
const int want = i * 4 + 1;
7579
if (s_cur->Num != want) {

0 commit comments

Comments
 (0)