Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit fa80b79

Browse files
authored
[ESIMD] Add scalar argument test cases to esimd_math.cpp (#864)
Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 6f29028 commit fa80b79

File tree

1 file changed

+145
-78
lines changed

1 file changed

+145
-78
lines changed

SYCL/ESIMD/ext_math.cpp

Lines changed: 145 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ using namespace sycl::ext::intel::experimental::esimd;
3838

3939
constexpr int accuracy_limit = 32767 * 3.14 - 1;
4040

41-
template <class T> struct InitDataFuncTrig {
41+
template <class T> struct InitTrig {
4242
void operator()(T *In, T *Out, size_t Size) const {
4343
for (auto I = 0; I < Size; ++I) {
4444
In[I] = (I + 1) % accuracy_limit;
@@ -47,7 +47,7 @@ template <class T> struct InitDataFuncTrig {
4747
}
4848
};
4949

50-
template <class T> struct InitDataFuncWide {
50+
template <class T> struct InitWide {
5151
void operator()(T *In, T *Out, size_t Size) const {
5252
for (auto I = 0; I < Size; ++I) {
5353
In[I] = I + 1.0;
@@ -56,7 +56,7 @@ template <class T> struct InitDataFuncWide {
5656
}
5757
};
5858

59-
template <class T> struct InitDataFuncNarrow {
59+
template <class T> struct InitNarrow {
6060
void operator()(T *In, T *Out, size_t Size) const {
6161
for (auto I = 0; I < Size; ++I) {
6262
In[I] = 2.0f + 16.0f * ((T)I / (T)(Size - 1)); // in [2..18] range
@@ -65,7 +65,7 @@ template <class T> struct InitDataFuncNarrow {
6565
}
6666
};
6767

68-
template <class T> struct InitDataInRange0_5 {
68+
template <class T> struct InitInRange0_5 {
6969
void operator()(T *In, T *Out, size_t Size) const {
7070
for (auto I = 0; I < Size; ++I) {
7171
In[I] = 5.0f * ((T)I / (T)(Size - 1)); // in [0..5] range
@@ -74,7 +74,7 @@ template <class T> struct InitDataInRange0_5 {
7474
}
7575
};
7676

77-
template <class T> struct InitDataBinFuncNarrow {
77+
template <class T> struct InitBin {
7878
void operator()(T *In1, T *In2, T *Out, size_t Size) const {
7979
for (auto I = 0; I < Size; ++I) {
8080
In1[I] = I % 17 + 1;
@@ -104,9 +104,17 @@ enum class MathOp {
104104

105105
// --- Template functions calculating given math operation on host and device
106106

107-
template <class T, int VL, MathOp Op> struct FuncESIMD;
108-
template <class T, int VL, MathOp Op> struct BinFuncESIMD;
109-
template <class T, int VL, MathOp Op> struct FuncSYCL;
107+
enum ArgKind {
108+
AllVec,
109+
AllSca,
110+
Sca1Vec2,
111+
Sca2Vec1
112+
};
113+
114+
template <class T, int N, MathOp Op, int Args=AllVec> struct ESIMDf;
115+
template <class T, int N, MathOp Op, int Args=AllVec> struct BinESIMDf;
116+
template <class T, int N, MathOp Op, int Args=AllVec> struct SYCLf;
117+
110118
template <class T, MathOp Op> struct HostFunc;
111119

112120
#define DEFINE_HOST_OP(Op, HostOp) \
@@ -137,9 +145,14 @@ DEFINE_HOST_BIN_OP(pow, std::pow(X, Y));
137145
// --- Specializations per each extended math operation
138146

139147
#define DEFINE_ESIMD_DEVICE_OP(Op) \
140-
template <class T, int VL> struct FuncESIMD<T, VL, MathOp::Op> { \
141-
simd<T, VL> operator()(const simd<T, VL> &X) const SYCL_ESIMD_FUNCTION { \
142-
return esimd::Op<T, VL>(X); \
148+
template <class T, int N> struct ESIMDf<T, N, MathOp::Op, AllVec> { \
149+
simd<T, N> operator()(simd<T, N>X) const SYCL_ESIMD_FUNCTION { \
150+
return esimd::Op<T, N>(X); \
151+
} \
152+
}; \
153+
template <class T, int N> struct ESIMDf<T, N, MathOp::Op, AllSca> { \
154+
simd<T, N> operator()(T X) const SYCL_ESIMD_FUNCTION { \
155+
return esimd::Op<T, N>(X); \
143156
} \
144157
};
145158

@@ -156,21 +169,43 @@ DEFINE_ESIMD_DEVICE_OP(exp2);
156169
DEFINE_ESIMD_DEVICE_OP(log2);
157170

158171
#define DEFINE_ESIMD_DEVICE_BIN_OP(Op) \
159-
template <class T, int VL> struct BinFuncESIMD<T, VL, MathOp::Op> { \
160-
simd<T, VL> operator()(const simd<T, VL> &X, \
161-
const simd<T, VL> &Y) const SYCL_ESIMD_FUNCTION { \
162-
return esimd::Op<T, VL>(X, Y); \
172+
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, AllSca> { \
173+
simd<T, N> operator()(T X, \
174+
T Y) const SYCL_ESIMD_FUNCTION { \
175+
return esimd::Op<T, N>(X, Y); \
176+
} \
177+
}; \
178+
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, AllVec> { \
179+
simd<T, N> operator()(simd<T, N>X, \
180+
simd<T, N>Y) const SYCL_ESIMD_FUNCTION { \
181+
return esimd::Op<T, N>(X, Y); \
182+
} \
183+
}; \
184+
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, Sca1Vec2> { \
185+
simd<T, N> operator()(T X, simd<T, N> Y) const SYCL_ESIMD_FUNCTION { \
186+
return esimd::Op<T, N>(X, Y); \
187+
} \
188+
}; \
189+
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, Sca2Vec1> { \
190+
simd<T, N> operator()(simd<T, N>X, \
191+
T Y) const SYCL_ESIMD_FUNCTION { \
192+
return esimd::Op<T, N>(X, Y); \
163193
} \
164194
};
165195

166196
DEFINE_ESIMD_DEVICE_BIN_OP(div_ieee);
167197
DEFINE_ESIMD_DEVICE_BIN_OP(pow);
168198

169199
#define DEFINE_SYCL_DEVICE_OP(Op) \
170-
template <class T, int VL> struct FuncSYCL<T, VL, MathOp::Op> { \
171-
simd<T, VL> operator()(const simd<T, VL> &X) const SYCL_ESIMD_FUNCTION { \
200+
template <class T, int N> struct SYCLf<T, N, MathOp::Op, AllVec> { \
201+
simd<T, N> operator()(simd<T, N>X) const SYCL_ESIMD_FUNCTION { \
172202
/* T must be float for SYCL, so not a template parameter for sycl::Op*/ \
173-
return sycl::Op<VL>(X); \
203+
return sycl::Op<N>(X); \
204+
} \
205+
}; \
206+
template <class T, int N> struct SYCLf<T, N, MathOp::Op, AllSca> { \
207+
simd<T, N> operator()(T X) const SYCL_ESIMD_FUNCTION { \
208+
return sycl::Op<N>(X); \
174209
} \
175210
};
176211

@@ -181,8 +216,8 @@ DEFINE_SYCL_DEVICE_OP(log);
181216

182217
// --- Generic kernel calculating an extended math operation on array elements
183218

184-
template <class T, int VL, MathOp Op,
185-
template <class, int, MathOp> class Kernel, typename AccIn,
219+
template <class T, int N, MathOp Op,
220+
template <class, int, MathOp, int> class Kernel, typename AccIn,
186221
typename AccOut>
187222
struct UnaryDeviceFunc {
188223
AccIn In;
@@ -191,17 +226,27 @@ struct UnaryDeviceFunc {
191226
UnaryDeviceFunc(AccIn &In, AccOut &Out) : In(In), Out(Out) {}
192227

193228
void operator()(id<1> I) const SYCL_ESIMD_KERNEL {
194-
unsigned int Offset = I * VL * sizeof(T);
195-
simd<T, VL> Vx;
229+
unsigned int Offset = I * N * sizeof(T);
230+
simd<T, N> Vx;
196231
Vx.copy_from(In, Offset);
197-
Kernel<T, VL, Op> DevF{};
198-
Vx = DevF(Vx);
232+
233+
if (I.get(0) % 2 == 0) {
234+
for (int J = 0; J < N; J++) {
235+
Kernel<T, N, Op, AllSca> DevF{};
236+
T Val = Vx[J];
237+
simd<T, N> V = DevF(Val); // scalar arg
238+
Vx[J] = V[J];
239+
}
240+
} else {
241+
Kernel<T, N, Op, AllVec> DevF{};
242+
Vx = DevF(Vx); // vector arg
243+
}
199244
Vx.copy_to(Out, Offset);
200245
};
201246
};
202247

203-
template <class T, int VL, MathOp Op,
204-
template <class, int, MathOp> class Kernel, typename AccIn,
248+
template <class T, int N, MathOp Op,
249+
template <class, int, MathOp, int> class Kernel, typename AccIn,
205250
typename AccOut>
206251
struct BinaryDeviceFunc {
207252
AccIn In1;
@@ -212,22 +257,50 @@ struct BinaryDeviceFunc {
212257
: In1(In1), In2(In2), Out(Out) {}
213258

214259
void operator()(id<1> I) const SYCL_ESIMD_KERNEL {
215-
unsigned int Offset = I * VL * sizeof(T);
216-
simd<T, VL> V1(In1, Offset);
217-
simd<T, VL> V2(In2, Offset);
218-
Kernel<T, VL, Op> DevF{};
219-
simd<T, VL> V = DevF(V1, V2);
260+
unsigned int Offset = I * N * sizeof(T);
261+
simd<T, N> V1(In1, Offset);
262+
simd<T, N> V2(In2, Offset);
263+
simd<T, N> V;
264+
265+
if (I.get(0) % 2 == 0) {
266+
int Ind = 0;
267+
{
268+
Kernel<T, N, Op, AllSca> DevF{};
269+
T Val2 = V2[Ind];
270+
simd<T, N> Vv = DevF(V1[Ind], Val2); // both arguments are scalar
271+
V[Ind] = Vv[Ind];
272+
}
273+
Ind++;
274+
{
275+
Kernel<T, N, Op, Sca1Vec2> DevF{};
276+
T Val1 = V1[Ind];
277+
simd<T, N> Vv = DevF(Val1, V2); // scalar, vector
278+
V[Ind] = Vv[Ind];
279+
}
280+
Ind++;
281+
{
282+
for (int J = Ind; J < N; ++J) {
283+
Kernel<T, N, Op, Sca2Vec1> DevF{};
284+
T Val2 = V2[J];
285+
simd<T, N> Vv = DevF(V1, Val2); // scalar 2nd arg
286+
V[J] = Vv[J];
287+
}
288+
}
289+
} else {
290+
Kernel<T, N, Op, AllVec> DevF{};
291+
V = DevF(V1, V2); // vec 2nd arg
292+
}
220293
V.copy_to(Out, Offset);
221294
};
222295
};
223296

224297
// --- Generic test function for an extended math operation
225298

226-
template <class T, int VL, MathOp Op,
227-
template <class, int, MathOp> class Kernel,
228-
typename InitF = InitDataFuncNarrow<T>>
299+
template <class T, int N, MathOp Op,
300+
template <class, int, MathOp, int> class Kernel,
301+
typename InitF = InitNarrow<T>>
229302
bool test(queue &Q, const std::string &Name,
230-
InitF InitData = InitDataFuncNarrow<T>{}, float delta = 0.0f) {
303+
InitF Init = InitNarrow<T>{}, float delta = 0.0f) {
231304

232305
constexpr size_t Size = 1024 * 128;
233306
constexpr bool IsBinOp = (Op == MathOp::div_ieee) || (Op == MathOp::pow);
@@ -236,11 +309,12 @@ bool test(queue &Q, const std::string &Name,
236309
T *B = new T[Size];
237310
T *C = new T[Size];
238311
if constexpr (IsBinOp) {
239-
InitData(A, B, C, Size);
312+
Init(A, B, C, Size);
240313
} else {
241-
InitData(A, B, Size);
314+
Init(A, B, Size);
242315
}
243-
const char *kind = std::is_same_v<Kernel<T, VL, Op>, FuncESIMD<T, VL, Op>>
316+
const char *kind =
317+
std::is_same_v<Kernel<T, N, Op, AllVec>, ESIMDf<T, N, Op, AllVec>>
244318
? "ESIMD"
245319
: "SYCL";
246320
std::cout << " " << Name << " test, kind=" << kind << "...\n";
@@ -251,7 +325,7 @@ bool test(queue &Q, const std::string &Name,
251325
buffer<T, 1> BufC(C, range<1>(Size));
252326

253327
// number of workgroups
254-
cl::sycl::range<1> GlobalRange{Size / VL};
328+
cl::sycl::range<1> GlobalRange{Size / N};
255329

256330
// threads (workitems) in each workgroup
257331
cl::sycl::range<1> LocalRange{1};
@@ -261,12 +335,12 @@ bool test(queue &Q, const std::string &Name,
261335
auto PC = BufC.template get_access<access::mode::write>(CGH);
262336
if constexpr (IsBinOp) {
263337
auto PB = BufB.template get_access<access::mode::read>(CGH);
264-
BinaryDeviceFunc<T, VL, Op, Kernel, decltype(PA), decltype(PC)> F(
338+
BinaryDeviceFunc<T, N, Op, Kernel, decltype(PA), decltype(PC)> F(
265339
PA, PB, PC);
266340
CGH.parallel_for(nd_range<1>{GlobalRange, LocalRange}, F);
267341
} else {
268-
UnaryDeviceFunc<T, VL, Op, Kernel, decltype(PA), decltype(PC)> F(PA,
269-
PC);
342+
UnaryDeviceFunc<T, N, Op, Kernel, decltype(PA), decltype(PC)> F(PA,
343+
PC);
270344
CGH.parallel_for(nd_range<1>{GlobalRange, LocalRange}, F);
271345
}
272346
});
@@ -316,69 +390,62 @@ bool test(queue &Q, const std::string &Name,
316390

317391
// --- Tests all extended math operations with given vector length
318392

319-
template <class T, int VL> bool testESIMD(queue &Q) {
393+
template <class T, int N> bool testESIMD(queue &Q) {
320394
bool Pass = true;
321395

322396
std::cout << "--- TESTING ESIMD functions, T=" << typeid(T).name()
323-
<< ", VL = " << VL << "...\n";
324-
325-
Pass &=
326-
test<T, VL, MathOp::sqrt, FuncESIMD>(Q, "sqrt", InitDataFuncWide<T>{});
327-
Pass &= test<T, VL, MathOp::inv, FuncESIMD>(Q, "inv");
328-
Pass &= test<T, VL, MathOp::rsqrt, FuncESIMD>(Q, "rsqrt");
329-
Pass &= test<T, VL, MathOp::sin, FuncESIMD>(Q, "sin", InitDataFuncTrig<T>{});
330-
Pass &= test<T, VL, MathOp::cos, FuncESIMD>(Q, "cos", InitDataFuncTrig<T>{});
331-
Pass &=
332-
test<T, VL, MathOp::exp, FuncESIMD>(Q, "exp", InitDataInRange0_5<T>{});
333-
Pass &= test<T, VL, MathOp::log, FuncESIMD>(Q, "log", InitDataFuncWide<T>{});
334-
Pass &=
335-
test<T, VL, MathOp::exp2, FuncESIMD>(Q, "exp2", InitDataInRange0_5<T>{});
336-
Pass &=
337-
test<T, VL, MathOp::log2, FuncESIMD>(Q, "log2", InitDataFuncWide<T>{});
338-
Pass &=
339-
test<T, VL, MathOp::trunc, FuncESIMD>(Q, "trunc", InitDataFuncWide<T>{});
397+
<< ", N = " << N << "...\n";
398+
399+
Pass &= test<T, N, MathOp::sqrt, ESIMDf>(Q, "sqrt", InitWide<T>{});
400+
Pass &= test<T, N, MathOp::inv, ESIMDf>(Q, "inv");
401+
Pass &= test<T, N, MathOp::rsqrt, ESIMDf>(Q, "rsqrt");
402+
Pass &= test<T, N, MathOp::sin, ESIMDf>(Q, "sin", InitTrig<T>{});
403+
Pass &= test<T, N, MathOp::cos, ESIMDf>(Q, "cos", InitTrig<T>{});
404+
Pass &= test<T, N, MathOp::exp, ESIMDf>(Q, "exp", InitInRange0_5<T>{});
405+
Pass &= test<T, N, MathOp::log, ESIMDf>(Q, "log", InitWide<T>{});
406+
Pass &= test<T, N, MathOp::exp2, ESIMDf>(Q, "exp2", InitInRange0_5<T>{});
407+
Pass &= test<T, N, MathOp::log2, ESIMDf>(Q, "log2", InitWide<T>{});
408+
Pass &= test<T, N, MathOp::trunc, ESIMDf>(Q, "trunc", InitWide<T>{});
340409
return Pass;
341410
}
342411

343-
template <class T, int VL> bool testESIMDSqrtIEEE(queue &Q) {
412+
template <class T, int N> bool testESIMDSqrtIEEE(queue &Q) {
344413
bool Pass = true;
345414
std::cout << "--- TESTING ESIMD sqrt_ieee, T=" << typeid(T).name()
346-
<< ", VL = " << VL << "...\n";
347-
Pass &= test<T, VL, MathOp::sqrt_ieee, FuncESIMD>(Q, "sqrt_ieee",
348-
InitDataFuncWide<T>{});
415+
<< ", N = " << N << "...\n";
416+
Pass &= test<T, N, MathOp::sqrt_ieee, ESIMDf>(Q, "sqrt_ieee", InitWide<T>{});
349417
return Pass;
350418
}
351419

352-
template <class T, int VL> bool testESIMDDivIEEE(queue &Q) {
420+
template <class T, int N> bool testESIMDDivIEEE(queue &Q) {
353421
bool Pass = true;
354422
std::cout << "--- TESTING ESIMD div_ieee, T=" << typeid(T).name()
355-
<< ", VL = " << VL << "...\n";
356-
Pass &= test<T, VL, MathOp::div_ieee, BinFuncESIMD>(
357-
Q, "div_ieee", InitDataBinFuncNarrow<T>{});
423+
<< ", N = " << N << "...\n";
424+
Pass &= test<T, N, MathOp::div_ieee, BinESIMDf>(Q, "div_ieee", InitBin<T>{});
358425
return Pass;
359426
}
360427

361-
template <class T, int VL> bool testESIMDPow(queue &Q) {
428+
template <class T, int N> bool testESIMDPow(queue &Q) {
362429
bool Pass = true;
363430
std::cout << "--- TESTING ESIMD pow, T=" << typeid(T).name()
364-
<< ", VL = " << VL << "...\n";
365-
Pass &= test<T, VL, MathOp::pow, BinFuncESIMD>(
366-
Q, "pow", InitDataBinFuncNarrow<T>{}, 0.1);
431+
<< ", N = " << N << "...\n";
432+
Pass &= test<T, N, MathOp::pow, BinESIMDf>(
433+
Q, "pow", InitBin<T>{}, 0.1);
367434
return Pass;
368435
}
369436

370-
template <class T, int VL> bool testSYCL(queue &Q) {
437+
template <class T, int N> bool testSYCL(queue &Q) {
371438
bool Pass = true;
372439
// TODO SYCL currently supports only these 4 functions, extend the test when
373440
// more are available.
374441
std::cout << "--- TESTING SYCL functions, T=" << typeid(T).name()
375-
<< ", VL = " << VL << "...\n";
442+
<< ", N = " << N << "...\n";
376443
// SYCL functions will have good accuracy for any argument, unlike bare h/w
377444
// ESIMD versions, so init with "wide" data set.
378-
Pass &= test<T, VL, MathOp::sin, FuncSYCL>(Q, "sin", InitDataFuncWide<T>{});
379-
Pass &= test<T, VL, MathOp::cos, FuncSYCL>(Q, "cos", InitDataFuncWide<T>{});
380-
Pass &= test<T, VL, MathOp::exp, FuncSYCL>(Q, "exp", InitDataInRange0_5<T>{});
381-
Pass &= test<T, VL, MathOp::log, FuncSYCL>(Q, "log", InitDataFuncWide<T>{});
445+
Pass &= test<T, N, MathOp::sin, SYCLf>(Q, "sin", InitWide<T>{});
446+
Pass &= test<T, N, MathOp::cos, SYCLf>(Q, "cos", InitWide<T>{});
447+
Pass &= test<T, N, MathOp::exp, SYCLf>(Q, "exp", InitInRange0_5<T>{});
448+
Pass &= test<T, N, MathOp::log, SYCLf>(Q, "log", InitWide<T>{});
382449
return Pass;
383450
}
384451

0 commit comments

Comments
 (0)