Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit d7cf954

Browse files
authored
[SYCL][ESIMD] Add new test for simd::copy_to/from with alignment flags (#560)
* [SYCL][ESIMD] Add new test for simd::copy_to/from with alignment flags Signed-off-by: Sergey Dmitriev <[email protected]>
1 parent 083def3 commit d7cf954

File tree

1 file changed

+246
-0
lines changed

1 file changed

+246
-0
lines changed

SYCL/ESIMD/api/simd_copy_to_from.cpp

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
//==----- simd_copy_to_from.cpp - DPC++ ESIMD simd::copy_to/from test -----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
13+
// This test checks simd::copy_from/to methods with alignment flags.
14+
15+
#include "../esimd_test_utils.hpp"
16+
17+
#include <CL/sycl.hpp>
18+
#include <CL/sycl/builtins_esimd.hpp>
19+
#include <algorithm>
20+
#include <array>
21+
#include <cstdlib>
22+
#include <iostream>
23+
#ifdef _WIN32
24+
#include <malloc.h>
25+
#endif // _WIN32
26+
27+
#include <sycl/ext/intel/experimental/esimd.hpp>
28+
29+
// Workaround for absense of std::aligned_alloc on Windows.
30+
#ifdef _WIN32
31+
#define aligned_malloc(align, size) _aligned_malloc(size, align)
32+
#define aligned_free(ptr) _aligned_free(ptr)
33+
#else // _WIN32
34+
#define aligned_malloc(align, size) std::aligned_alloc(align, size)
35+
#define aligned_free(ptr) std::free(ptr)
36+
#endif // _WIN32
37+
38+
using namespace cl::sycl;
39+
using namespace sycl::ext::intel::experimental;
40+
using namespace sycl::ext::intel::experimental::esimd;
41+
42+
template <typename T, int N, typename Flags>
43+
bool testUSM(queue &Q, T *Src, T *Dst, unsigned Off, const std::string &Title,
44+
Flags) {
45+
std::cout << " Running USM " << Title << " test, N=" << N << "...\n";
46+
47+
for (int I = 0; I < N; ++I) {
48+
Src[I + Off] = I + 1;
49+
Dst[I + Off] = 0;
50+
}
51+
52+
try {
53+
Q.submit([&](handler &CGH) {
54+
CGH.parallel_for(sycl::range<1>{1}, [=](id<1>) SYCL_ESIMD_KERNEL {
55+
simd<T, N> Vals;
56+
Vals.copy_from(Src + Off, Flags{});
57+
Vals.copy_to(Dst + Off, Flags{});
58+
});
59+
}).wait();
60+
} catch (cl::sycl::exception const &E) {
61+
std::cout << "ERROR. SYCL exception caught: " << E.what() << std::endl;
62+
return false;
63+
}
64+
65+
unsigned NumErrs = 0;
66+
for (int I = 0; I < N; ++I)
67+
if (Dst[I + Off] != Src[I + Off])
68+
if (++NumErrs <= 10)
69+
std::cout << "failed at " << I << ": " << Dst[I + Off]
70+
<< " (Dst) != " << Src[I + Off] << " (Src)\n";
71+
72+
std::cout << (NumErrs == 0 ? " Passed\n" : " FAILED\n");
73+
return NumErrs == 0;
74+
}
75+
76+
template <typename T, int N, typename Flags>
77+
bool testAcc(queue &Q, T *Src, T *Dst, unsigned Off, const std::string &Title,
78+
Flags) {
79+
std::cout << " Running accessor " << Title << " test, N=" << N << "...\n";
80+
81+
for (int I = 0; I < N; ++I) {
82+
Src[I + Off] = I + 1;
83+
Dst[I + Off] = 0;
84+
}
85+
86+
try {
87+
buffer<T, 1> SrcB(Src, range<1>(Off + N));
88+
buffer<T, 1> DstB(Dst, range<1>(Off + N));
89+
90+
Q.submit([&](handler &CGH) {
91+
auto SrcA = SrcB.template get_access<access::mode::read>(CGH);
92+
auto DstA = DstB.template get_access<access::mode::write>(CGH);
93+
94+
CGH.parallel_for(sycl::range<1>{1}, [=](id<1>) SYCL_ESIMD_KERNEL {
95+
simd<T, N> Vals;
96+
Vals.copy_from(SrcA, Off * sizeof(T), Flags{});
97+
Vals.copy_to(DstA, Off * sizeof(T), Flags{});
98+
});
99+
}).wait();
100+
} catch (cl::sycl::exception const &E) {
101+
std::cout << "ERROR. SYCL exception caught: " << E.what() << std::endl;
102+
return false;
103+
}
104+
105+
unsigned NumErrs = 0;
106+
for (int I = 0; I < N; ++I)
107+
if (Dst[I + Off] != Src[I + Off])
108+
if (++NumErrs <= 10)
109+
std::cout << "failed at " << I << ": " << Dst[I + Off]
110+
<< " (Dst) != " << Src[I + Off] << " (Src)\n";
111+
112+
std::cout << (NumErrs == 0 ? " Passed\n" : " FAILED\n");
113+
return NumErrs == 0;
114+
}
115+
116+
template <typename T, int N> bool testUSM(const std::string &Type, queue &Q) {
117+
struct Deleter {
118+
queue Q;
119+
void operator()(T *Ptr) {
120+
if (Ptr) {
121+
sycl::free(Ptr, Q);
122+
}
123+
}
124+
};
125+
126+
std::unique_ptr<T, Deleter> Src(sycl::aligned_alloc_shared<T>(1024u, 512u, Q),
127+
Deleter{Q});
128+
std::unique_ptr<T, Deleter> Dst(sycl::aligned_alloc_shared<T>(1024u, 512u, Q),
129+
Deleter{Q});
130+
131+
constexpr unsigned VecAlignOffset = esimd::detail::getNextPowerOf2<N>();
132+
133+
bool Pass = true;
134+
135+
Pass &= testUSM<T, N>(Q, Src.get(), Dst.get(), VecAlignOffset + 1u,
136+
Type + " element_aligned", element_aligned);
137+
Pass &= testUSM<T, N>(Q, Src.get(), Dst.get(), VecAlignOffset,
138+
Type + " vector_aligned", vector_aligned);
139+
Pass &= testUSM<T, N>(Q, Src.get(), Dst.get(), 128u / sizeof(T),
140+
Type + " overaligned<128>", overaligned<128u>);
141+
142+
return Pass;
143+
}
144+
145+
template <typename T> bool testUSM(const std::string &Type, queue &Q) {
146+
bool Pass = true;
147+
148+
Pass &= testUSM<T, 1>(Type, Q);
149+
Pass &= testUSM<T, 2>(Type, Q);
150+
Pass &= testUSM<T, 3>(Type, Q);
151+
Pass &= testUSM<T, 4>(Type, Q);
152+
153+
Pass &= testUSM<T, 7>(Type, Q);
154+
Pass &= testUSM<T, 8>(Type, Q);
155+
156+
Pass &= testUSM<T, 15>(Type, Q);
157+
Pass &= testUSM<T, 16>(Type, Q);
158+
159+
if constexpr (sizeof(T) < 8) {
160+
Pass &= testUSM<T, 24>(Type, Q);
161+
Pass &= testUSM<T, 25>(Type, Q);
162+
163+
Pass &= testUSM<T, 31>(Type, Q);
164+
Pass &= testUSM<T, 32>(Type, Q);
165+
}
166+
167+
return Pass;
168+
}
169+
170+
template <typename T, int N> bool testAcc(const std::string &Type, queue &Q) {
171+
struct Deleter {
172+
void operator()(T *Ptr) {
173+
if (Ptr) {
174+
aligned_free(Ptr);
175+
}
176+
}
177+
};
178+
179+
std::unique_ptr<T, Deleter> Src(
180+
static_cast<T *>(aligned_malloc(1024u, 512u * sizeof(T))), Deleter{});
181+
std::unique_ptr<T, Deleter> Dst(
182+
static_cast<T *>(aligned_malloc(1024u, 512u * sizeof(T))), Deleter{});
183+
184+
constexpr unsigned VecAlignOffset = esimd::detail::getNextPowerOf2<N>();
185+
186+
bool Pass = true;
187+
188+
Pass &= testAcc<T, N>(Q, Src.get(), Dst.get(), VecAlignOffset + 1u,
189+
Type + " element_aligned", element_aligned);
190+
Pass &= testAcc<T, N>(Q, Src.get(), Dst.get(), VecAlignOffset,
191+
Type + " vector_aligned", vector_aligned);
192+
Pass &= testAcc<T, N>(Q, Src.get(), Dst.get(), 128u / sizeof(T),
193+
Type + " overaligned<128>", overaligned<128u>);
194+
195+
return Pass;
196+
}
197+
198+
template <typename T> bool testAcc(const std::string &Type, queue &Q) {
199+
bool Pass = true;
200+
201+
Pass &= testAcc<T, 1>(Type, Q);
202+
Pass &= testAcc<T, 2>(Type, Q);
203+
Pass &= testAcc<T, 3>(Type, Q);
204+
Pass &= testAcc<T, 4>(Type, Q);
205+
206+
Pass &= testAcc<T, 7>(Type, Q);
207+
Pass &= testAcc<T, 8>(Type, Q);
208+
209+
Pass &= testAcc<T, 15>(Type, Q);
210+
Pass &= testAcc<T, 16>(Type, Q);
211+
212+
if constexpr (sizeof(T) < 8) {
213+
Pass &= testAcc<T, 24>(Type, Q);
214+
Pass &= testAcc<T, 25>(Type, Q);
215+
216+
Pass &= testAcc<T, 31>(Type, Q);
217+
Pass &= testAcc<T, 32>(Type, Q);
218+
}
219+
220+
return Pass;
221+
}
222+
223+
int main(void) {
224+
queue Q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
225+
auto Dev = Q.get_device();
226+
std::cout << "Running on " << Dev.get_info<info::device::name>() << "\n";
227+
228+
bool Pass = true;
229+
230+
Pass &= testUSM<int8_t>("int8_t", Q);
231+
Pass &= testUSM<int16_t>("int16_t", Q);
232+
Pass &= testUSM<int32_t>("int32_t", Q);
233+
Pass &= testUSM<int64_t>("int64_t", Q);
234+
Pass &= testUSM<float>("float", Q);
235+
Pass &= testUSM<double>("double", Q);
236+
237+
Pass &= testAcc<int8_t>("int8_t", Q);
238+
Pass &= testAcc<int16_t>("int16_t", Q);
239+
Pass &= testAcc<int32_t>("int32_t", Q);
240+
Pass &= testAcc<int64_t>("int64_t", Q);
241+
Pass &= testAcc<float>("float", Q);
242+
Pass &= testAcc<double>("double", Q);
243+
244+
std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
245+
return Pass ? 0 : 1;
246+
}

0 commit comments

Comments
 (0)