Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 8e3692c

Browse files
authored
[ESIMD] Extend tests to cover sycl::half. (#640)
* [ESIMD] Add infra for C++ ops testing, add heavy 'half' tests, extend existing. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 195d427 commit 8e3692c

12 files changed

+1083
-153
lines changed
Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
//==--------------- bin_un_cmp_ops_heavy.cpp - DPC++ ESIMD on-device test -==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
13+
// Tests various binary operations applied to simd objects.
14+
15+
// TODO
16+
// Arithmetic operations behaviour depends on Gen's control regiter's rounding
17+
// mode, which is RTNE by default:
18+
// cr0.5:4 is 00b = Round to Nearest or Even (RTNE)
19+
// For half this leads to divergence between Gen and host (emulated) results
20+
// larger than certain threshold. Might need to tune the cr0 once this feature
21+
// is available in ESIMD.
22+
//
23+
24+
#include "../esimd_test_utils.hpp"
25+
26+
#include <CL/sycl.hpp>
27+
#include <iostream>
28+
#include <sycl/ext/intel/experimental/esimd.hpp>
29+
30+
using namespace cl::sycl;
31+
using namespace sycl::ext::intel::experimental::esimd;
32+
33+
template <class T1, class T2, int VL, class OpClass, class Ops> class TestID;
34+
35+
// Result type of a scalar binary Op
36+
template <class T1, class T2, class OpClass>
37+
using scalar_comp_t =
38+
std::conditional_t<std::is_same_v<OpClass, esimd_test::CmpOp>,
39+
typename simd_mask<8>::element_type,
40+
__SEIEED::computation_type_t<T1, T2>>;
41+
42+
// Result type of a vector binary Op
43+
template <class T1, class T2, class OpClass, int N = 0>
44+
using comp_t = std::conditional_t<
45+
N == 0, scalar_comp_t<T1, T2, OpClass>,
46+
std::conditional_t<std::is_same_v<OpClass, esimd_test::CmpOp>, simd_mask<N>,
47+
simd<__SEIEED::computation_type_t<T1, T2>, N>>>;
48+
49+
// Helpers for printing
50+
template <class T> auto cast(T val) { return val; }
51+
template <> auto cast<char>(char val) { return (int)val; }
52+
template <> auto cast<unsigned char>(unsigned char val) {
53+
return (unsigned int)val;
54+
}
55+
#ifdef __SYCL_DEVICE_ONLY__
56+
template <> auto cast<_Float16>(_Float16 val) { return (float)val; }
57+
#endif
58+
59+
// Main test function.
60+
// T1, T2 - operand types,
61+
// VL - vector length,
62+
// OpClass - binary or comparison operations,
63+
// VerifyF and InitF - verification and initialization function types
64+
// (instantiated within the test function),
65+
// Ops - a compile-time sequence of operations to test.
66+
//
67+
template <class T1, class T2, int VL, class OpClass,
68+
template <class, class, class> class VerifyF,
69+
template <class, class, class> class InitF, class Ops>
70+
bool test(Ops ops, queue &q, comp_t<T1, T2, OpClass> epsilon = 0) {
71+
// Log test case info
72+
std::cout << "Testing T1=" << typeid(T1).name() << " T2=" << typeid(T2).name()
73+
<< ", VL=" << VL << " ...\n";
74+
std::cout << "Operations:";
75+
esimd_test::iterate_ops(ops, [=](OpClass op) {
76+
std::cout << " '" << esimd_test::Op2Str(op) << "'";
77+
});
78+
std::cout << "\n";
79+
80+
// initialize test data
81+
constexpr int Size = 1024 * 7;
82+
T1 *A = sycl::malloc_shared<T1>(Size, q);
83+
T2 *B = sycl::malloc_shared<T2>(Size, q);
84+
constexpr int NumOps = (int)Ops::size;
85+
int CSize = NumOps * Size;
86+
using T = comp_t<T1, T2, OpClass>;
87+
// Result array. For each pair of A[i] and B[i] elements it reserves NumOps
88+
// elements to store result of all operations under test applied to the A[i]
89+
// and B[i]
90+
T *C = sycl::malloc_shared<T>(CSize, q);
91+
InitF<T1, T2, OpClass> init;
92+
93+
for (int i = 0; i < Size; ++i) {
94+
init(A, B, C, i);
95+
}
96+
97+
// submit the kernel
98+
try {
99+
auto e = q.submit([&](handler &cgh) {
100+
cgh.parallel_for<TestID<T1, T2, VL, OpClass, Ops>>(
101+
Size / VL, [=](id<1> i) SYCL_ESIMD_KERNEL {
102+
unsigned off = i * VL;
103+
simd<T1, VL> va(A + off, vector_aligned_tag{});
104+
simd<T2, VL> vb(B + off, vector_aligned_tag{});
105+
106+
// applies each of the input operations to the va and vb vectors,
107+
// then invokes the lambda below, passing the result of the
108+
// operation, its ID and sequential number within the input sequence
109+
esimd_test::apply_ops(
110+
ops, va, vb,
111+
[=](comp_t<T1, T2, OpClass, VL> res, OpClass op,
112+
unsigned op_num) {
113+
unsigned res_off = off * NumOps + op_num * VL;
114+
res.copy_to(C + res_off, vector_aligned_tag{});
115+
});
116+
});
117+
});
118+
e.wait();
119+
} catch (sycl::exception const &e) {
120+
std::cout << "SYCL exception caught: " << e.what() << '\n';
121+
sycl::free(A, q);
122+
sycl::free(B, q);
123+
sycl::free(C, q);
124+
return false;
125+
}
126+
127+
int err_cnt = 0;
128+
129+
// now verify the results using provided verification function type
130+
for (unsigned i = 0; i < Size / VL; ++i) {
131+
unsigned off = i * VL;
132+
133+
for (int j = 0; j < VL; ++j) {
134+
T1 a = A[off + j];
135+
T2 b = B[off + j];
136+
137+
esimd_test::apply_ops(
138+
ops, a, b, [&](T Gold, OpClass op, unsigned op_num) {
139+
unsigned res_off = off * NumOps + op_num * VL;
140+
T Res = C[res_off + j];
141+
using Tint = esimd_test::int_type_t<sizeof(T)>;
142+
Tint ResBits = *(Tint *)&Res;
143+
Tint GoldBits = *(Tint *)&Gold;
144+
VerifyF<T1, T2, OpClass> verify_f(epsilon);
145+
146+
if (!verify_f(Gold, Res, op)) {
147+
if (++err_cnt < 10) {
148+
std::cout << " failed at index " << (res_off + j) << ", op "
149+
<< esimd_test::Op2Str(op) << ": " << cast(Res)
150+
<< "(0x" << std::hex << ResBits << ")"
151+
<< " != " << std::dec << cast(Gold) << "(0x"
152+
<< std::hex << GoldBits << ") [" << std::dec
153+
<< cast(a) << " " << esimd_test::Op2Str(op) << " "
154+
<< cast(b) << "]\n";
155+
}
156+
}
157+
});
158+
}
159+
}
160+
if (err_cnt > 0) {
161+
auto Size1 = NumOps * Size;
162+
std::cout << " pass rate: "
163+
<< ((float)(Size1 - err_cnt) / (float)Size1) * 100.0f << "% ("
164+
<< (Size1 - err_cnt) << "/" << Size1 << ")\n";
165+
}
166+
167+
free(A, q);
168+
free(B, q);
169+
free(C, q);
170+
std::cout << (err_cnt > 0 ? " FAILED\n" : " Passed\n");
171+
return err_cnt == 0;
172+
}
173+
174+
// Flavours of verification function types.
175+
176+
template <class T1, class T2, class OpClass> struct verify_strict {
177+
using T = comp_t<T1, T2, OpClass>;
178+
179+
verify_strict(T) {}
180+
181+
bool operator()(T res, T gold, OpClass op) { return res == gold; }
182+
};
183+
184+
#define EQ(x, y, epsilon) \
185+
((x) > (y) ? (x) - (y) <= epsilon : (y) - (x) <= epsilon)
186+
187+
template <class T1, class T2, class OpClass> struct verify_epsilon {
188+
using T = comp_t<T1, T2, OpClass>;
189+
T epsilon;
190+
verify_epsilon(T epsilon) : epsilon(epsilon) {}
191+
192+
bool operator()(T res, T gold, OpClass op) {
193+
if constexpr (std::is_same_v<OpClass, esimd_test::BinaryOp>) {
194+
if (op == esimd_test::BinaryOp::div) {
195+
return EQ(res, gold, epsilon);
196+
}
197+
}
198+
return res == gold;
199+
}
200+
};
201+
202+
template <class T1, class T2, class OpClass> struct verify_n {
203+
using T = comp_t<T1, T2, OpClass>;
204+
int n;
205+
verify_n(int n) : n(n) {}
206+
207+
bool operator()(T res, T gold, OpClass op) {
208+
using Tint = esimd_test::int_type_t<sizeof(T)>;
209+
Tint res_bits = *(Tint *)&res;
210+
Tint gold_bits = *(Tint *)&gold;
211+
return (abs(gold_bits - res_bits) > n) ? false : true;
212+
}
213+
};
214+
215+
// Flavours of initialization function types.
216+
217+
template <class T1, class T2, class OpClass> struct init_default {
218+
using T = comp_t<T1, T2, OpClass>;
219+
220+
void operator()(T1 *A, T2 *B, T *C, int i) {
221+
A[i] = (i % 3) * 90 + 10; /*10, 100, 190, 10, ...*/
222+
if constexpr (std::is_unsigned_v<T2>) {
223+
B[i] = (i % 3) * 99 + 1 /*1, 100, 199, 1, ...*/;
224+
} else {
225+
B[i] = (i % 4) * 180 - 170; /*-170, 10, 190, 370, -170,...*/
226+
}
227+
C[i] = 0;
228+
}
229+
};
230+
231+
template <class T1, class T2, class OpClass> struct init_for_shift {
232+
using T = comp_t<T1, T2, OpClass>;
233+
234+
void operator()(T1 *A, T2 *B, T *C, int i) {
235+
if constexpr (std::is_unsigned_v<T1>) {
236+
A[i] = (i % 3) + 100; /*100, 101, 102, 100, ...*/
237+
} else {
238+
A[i] = (i % 4) * 100 - 150; /*-150, -50, 50, 150, -150, ...*/
239+
}
240+
B[i] = (i % 3);
241+
C[i] = 0;
242+
}
243+
};
244+
245+
// shortcuts for less clutter
246+
template <class T1, class T2, class C> using VSf = verify_strict<T1, T2, C>;
247+
template <class T1, class T2, class C> using VEf = verify_epsilon<T1, T2, C>;
248+
template <class T1, class T2, class C> using VNf = verify_n<T1, T2, C>;
249+
template <class T1, class T2, class C> using IDf = init_default<T1, T2, C>;
250+
template <class T1, class T2, class C> using ISf = init_for_shift<T1, T2, C>;
251+
252+
int main(void) {
253+
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
254+
255+
auto dev = q.get_device();
256+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
257+
bool passed = true;
258+
using BinOp = esimd_test::BinaryOp;
259+
260+
auto arith_ops = esimd_test::ArithBinaryOps;
261+
passed &= test<unsigned char, int, 1, BinOp, VSf, IDf>(arith_ops, q);
262+
passed &= test<char, float, 7, BinOp, VEf, IDf>(arith_ops, q, 0.000001f);
263+
passed &= test<short, double, 7, BinOp, VSf, IDf>(arith_ops, q);
264+
passed &= test<float, float, 32, BinOp, VEf, IDf>(arith_ops, q, 0.000001f);
265+
passed &= test<half, char, 1, BinOp, verify_n, IDf>(arith_ops, q, 1);
266+
passed &= test<half, unsigned int, 32, BinOp, VSf, IDf>(arith_ops, q, 1);
267+
passed &= test<double, half, 7, BinOp, VSf, IDf>(arith_ops, q);
268+
passed &= test<short, uint64_t, 7, BinOp, VSf, IDf>(arith_ops, q);
269+
270+
auto int_ops =
271+
esimd_test::IntBinaryOpsNoShift; // different data needed for shift
272+
passed &= test<unsigned char, unsigned int, 1, BinOp, VSf, IDf>(int_ops, q);
273+
passed &= test<char, uint64_t, 1, BinOp, VSf, IDf>(int_ops, q);
274+
passed &= test<uint64_t, char, 32, BinOp, VSf, IDf>(int_ops, q);
275+
passed &= test<int, short, 1, BinOp, VSf, IDf>(int_ops, q);
276+
passed &= test<short, int, 8, BinOp, VSf, IDf>(int_ops, q);
277+
passed &= test<int, int, 7, BinOp, VSf, IDf>(int_ops, q);
278+
279+
auto sh_ops = esimd_test::BinaryOpSeq<BinOp::shl, BinOp::shr>{};
280+
passed &= test<unsigned char, unsigned int, 1, BinOp, VSf, ISf>(sh_ops, q);
281+
passed &= test<char, int64_t, 1, BinOp, VSf, ISf>(sh_ops, q);
282+
passed &= test<uint64_t, char, 32, BinOp, VSf, ISf>(sh_ops, q);
283+
passed &= test<int, short, 1, BinOp, VSf, ISf>(sh_ops, q);
284+
passed &= test<short, int, 8, BinOp, VSf, ISf>(sh_ops, q);
285+
passed &= test<int, int, 7, BinOp, VSf, ISf>(sh_ops, q);
286+
287+
using CmpOp = esimd_test::CmpOp;
288+
auto cmp_ops = esimd_test::CmpOps;
289+
passed &= test<unsigned char, int, 1, CmpOp, VSf, IDf>(cmp_ops, q);
290+
passed &= test<char, float, 7, CmpOp, VSf, IDf>(cmp_ops, q);
291+
passed &= test<short, double, 7, CmpOp, VSf, IDf>(cmp_ops, q);
292+
passed &= test<float, float, 32, CmpOp, VSf, IDf>(cmp_ops, q);
293+
passed &= test<half, char, 1, CmpOp, VSf, IDf>(cmp_ops, q, 1);
294+
passed &= test<half, unsigned int, 32, CmpOp, VSf, IDf>(cmp_ops, q, 1);
295+
passed &= test<double, half, 7, CmpOp, VSf, IDf>(cmp_ops, q);
296+
passed &= test<short, uint64_t, 7, CmpOp, VSf, IDf>(cmp_ops, q);
297+
298+
std::cout << (passed ? "Test PASSED\n" : "Test FAILED\n");
299+
return passed ? 0 : 1;
300+
}

0 commit comments

Comments
 (0)