@@ -38,7 +38,7 @@ using namespace sycl::ext::intel::experimental::esimd;
38
38
39
39
constexpr int accuracy_limit = 32767 * 3.14 - 1 ;
40
40
41
- template <class T > struct InitDataFuncTrig {
41
+ template <class T > struct InitTrig {
42
42
void operator ()(T *In, T *Out, size_t Size) const {
43
43
for (auto I = 0 ; I < Size; ++I) {
44
44
In[I] = (I + 1 ) % accuracy_limit;
@@ -47,7 +47,7 @@ template <class T> struct InitDataFuncTrig {
47
47
}
48
48
};
49
49
50
- template <class T > struct InitDataFuncWide {
50
+ template <class T > struct InitWide {
51
51
void operator ()(T *In, T *Out, size_t Size) const {
52
52
for (auto I = 0 ; I < Size; ++I) {
53
53
In[I] = I + 1.0 ;
@@ -56,7 +56,7 @@ template <class T> struct InitDataFuncWide {
56
56
}
57
57
};
58
58
59
- template <class T > struct InitDataFuncNarrow {
59
+ template <class T > struct InitNarrow {
60
60
void operator ()(T *In, T *Out, size_t Size) const {
61
61
for (auto I = 0 ; I < Size; ++I) {
62
62
In[I] = 2 .0f + 16 .0f * ((T)I / (T)(Size - 1 )); // in [2..18] range
@@ -65,7 +65,7 @@ template <class T> struct InitDataFuncNarrow {
65
65
}
66
66
};
67
67
68
- template <class T > struct InitDataInRange0_5 {
68
+ template <class T > struct InitInRange0_5 {
69
69
void operator ()(T *In, T *Out, size_t Size) const {
70
70
for (auto I = 0 ; I < Size; ++I) {
71
71
In[I] = 5 .0f * ((T)I / (T)(Size - 1 )); // in [0..5] range
@@ -74,7 +74,7 @@ template <class T> struct InitDataInRange0_5 {
74
74
}
75
75
};
76
76
77
- template <class T > struct InitDataBinFuncNarrow {
77
+ template <class T > struct InitBin {
78
78
void operator ()(T *In1, T *In2, T *Out, size_t Size) const {
79
79
for (auto I = 0 ; I < Size; ++I) {
80
80
In1[I] = I % 17 + 1 ;
@@ -104,9 +104,17 @@ enum class MathOp {
104
104
105
105
// --- Template functions calculating given math operation on host and device
106
106
107
- template <class T , int VL, MathOp Op> struct FuncESIMD ;
108
- template <class T , int VL, MathOp Op> struct BinFuncESIMD ;
109
- template <class T , int VL, MathOp Op> struct FuncSYCL ;
107
+ enum ArgKind {
108
+ AllVec,
109
+ AllSca,
110
+ Sca1Vec2,
111
+ Sca2Vec1
112
+ };
113
+
114
+ template <class T , int N, MathOp Op, int Args=AllVec> struct ESIMDf ;
115
+ template <class T , int N, MathOp Op, int Args=AllVec> struct BinESIMDf ;
116
+ template <class T , int N, MathOp Op, int Args=AllVec> struct SYCLf ;
117
+
110
118
template <class T , MathOp Op> struct HostFunc ;
111
119
112
120
#define DEFINE_HOST_OP (Op, HostOp ) \
@@ -137,9 +145,14 @@ DEFINE_HOST_BIN_OP(pow, std::pow(X, Y));
137
145
// --- Specializations per each extended math operation
138
146
139
147
#define DEFINE_ESIMD_DEVICE_OP (Op ) \
140
- template <class T , int VL> struct FuncESIMD <T, VL, MathOp::Op> { \
141
- simd<T, VL> operator ()(const simd<T, VL> &X) const SYCL_ESIMD_FUNCTION { \
142
- return esimd::Op<T, VL>(X); \
148
+ template <class T , int N> struct ESIMDf <T, N, MathOp::Op, AllVec> { \
149
+ simd<T, N> operator ()(simd<T, N>X) const SYCL_ESIMD_FUNCTION { \
150
+ return esimd::Op<T, N>(X); \
151
+ } \
152
+ }; \
153
+ template <class T , int N> struct ESIMDf <T, N, MathOp::Op, AllSca> { \
154
+ simd<T, N> operator ()(T X) const SYCL_ESIMD_FUNCTION { \
155
+ return esimd::Op<T, N>(X); \
143
156
} \
144
157
};
145
158
@@ -156,21 +169,43 @@ DEFINE_ESIMD_DEVICE_OP(exp2);
156
169
DEFINE_ESIMD_DEVICE_OP (log2);
157
170
158
171
#define DEFINE_ESIMD_DEVICE_BIN_OP (Op ) \
159
- template <class T , int VL> struct BinFuncESIMD <T, VL, MathOp::Op> { \
160
- simd<T, VL> operator ()(const simd<T, VL> &X, \
161
- const simd<T, VL> &Y) const SYCL_ESIMD_FUNCTION { \
162
- return esimd::Op<T, VL>(X, Y); \
172
+ template <class T , int N> struct BinESIMDf <T, N, MathOp::Op, AllSca> { \
173
+ simd<T, N> operator ()(T X, \
174
+ T Y) const SYCL_ESIMD_FUNCTION { \
175
+ return esimd::Op<T, N>(X, Y); \
176
+ } \
177
+ }; \
178
+ template <class T , int N> struct BinESIMDf <T, N, MathOp::Op, AllVec> { \
179
+ simd<T, N> operator ()(simd<T, N>X, \
180
+ simd<T, N>Y) const SYCL_ESIMD_FUNCTION { \
181
+ return esimd::Op<T, N>(X, Y); \
182
+ } \
183
+ }; \
184
+ template <class T , int N> struct BinESIMDf <T, N, MathOp::Op, Sca1Vec2> { \
185
+ simd<T, N> operator ()(T X, simd<T, N> Y) const SYCL_ESIMD_FUNCTION { \
186
+ return esimd::Op<T, N>(X, Y); \
187
+ } \
188
+ }; \
189
+ template <class T , int N> struct BinESIMDf <T, N, MathOp::Op, Sca2Vec1> { \
190
+ simd<T, N> operator ()(simd<T, N>X, \
191
+ T Y) const SYCL_ESIMD_FUNCTION { \
192
+ return esimd::Op<T, N>(X, Y); \
163
193
} \
164
194
};
165
195
166
196
DEFINE_ESIMD_DEVICE_BIN_OP (div_ieee);
167
197
DEFINE_ESIMD_DEVICE_BIN_OP (pow);
168
198
169
199
#define DEFINE_SYCL_DEVICE_OP (Op ) \
170
- template <class T , int VL > struct FuncSYCL <T, VL , MathOp::Op> { \
171
- simd<T, VL > operator ()(const simd<T, VL> & X) const SYCL_ESIMD_FUNCTION { \
200
+ template <class T , int N > struct SYCLf <T, N , MathOp::Op, AllVec > { \
201
+ simd<T, N > operator ()(simd<T, N> X) const SYCL_ESIMD_FUNCTION { \
172
202
/* T must be float for SYCL, so not a template parameter for sycl::Op*/ \
173
- return sycl::Op<VL>(X); \
203
+ return sycl::Op<N>(X); \
204
+ } \
205
+ }; \
206
+ template <class T , int N> struct SYCLf <T, N, MathOp::Op, AllSca> { \
207
+ simd<T, N> operator ()(T X) const SYCL_ESIMD_FUNCTION { \
208
+ return sycl::Op<N>(X); \
174
209
} \
175
210
};
176
211
@@ -181,8 +216,8 @@ DEFINE_SYCL_DEVICE_OP(log);
181
216
182
217
// --- Generic kernel calculating an extended math operation on array elements
183
218
184
- template <class T , int VL , MathOp Op,
185
- template <class , int , MathOp> class Kernel , typename AccIn,
219
+ template <class T , int N , MathOp Op,
220
+ template <class , int , MathOp, int > class Kernel , typename AccIn,
186
221
typename AccOut>
187
222
struct UnaryDeviceFunc {
188
223
AccIn In;
@@ -191,17 +226,27 @@ struct UnaryDeviceFunc {
191
226
UnaryDeviceFunc (AccIn &In, AccOut &Out) : In(In), Out(Out) {}
192
227
193
228
void operator ()(id<1 > I) const SYCL_ESIMD_KERNEL {
194
- unsigned int Offset = I * VL * sizeof (T);
195
- simd<T, VL > Vx;
229
+ unsigned int Offset = I * N * sizeof (T);
230
+ simd<T, N > Vx;
196
231
Vx.copy_from (In, Offset);
197
- Kernel<T, VL, Op> DevF{};
198
- Vx = DevF (Vx);
232
+
233
+ if (I.get (0 ) % 2 == 0 ) {
234
+ for (int J = 0 ; J < N; J++) {
235
+ Kernel<T, N, Op, AllSca> DevF{};
236
+ T Val = Vx[J];
237
+ simd<T, N> V = DevF (Val); // scalar arg
238
+ Vx[J] = V[J];
239
+ }
240
+ } else {
241
+ Kernel<T, N, Op, AllVec> DevF{};
242
+ Vx = DevF (Vx); // vector arg
243
+ }
199
244
Vx.copy_to (Out, Offset);
200
245
};
201
246
};
202
247
203
- template <class T , int VL , MathOp Op,
204
- template <class , int , MathOp> class Kernel , typename AccIn,
248
+ template <class T , int N , MathOp Op,
249
+ template <class , int , MathOp, int > class Kernel , typename AccIn,
205
250
typename AccOut>
206
251
struct BinaryDeviceFunc {
207
252
AccIn In1;
@@ -212,22 +257,50 @@ struct BinaryDeviceFunc {
212
257
: In1(In1), In2(In2), Out(Out) {}
213
258
214
259
void operator ()(id<1 > I) const SYCL_ESIMD_KERNEL {
215
- unsigned int Offset = I * VL * sizeof (T);
216
- simd<T, VL> V1 (In1, Offset);
217
- simd<T, VL> V2 (In2, Offset);
218
- Kernel<T, VL, Op> DevF{};
219
- simd<T, VL> V = DevF (V1, V2);
260
+ unsigned int Offset = I * N * sizeof (T);
261
+ simd<T, N> V1 (In1, Offset);
262
+ simd<T, N> V2 (In2, Offset);
263
+ simd<T, N> V;
264
+
265
+ if (I.get (0 ) % 2 == 0 ) {
266
+ int Ind = 0 ;
267
+ {
268
+ Kernel<T, N, Op, AllSca> DevF{};
269
+ T Val2 = V2[Ind];
270
+ simd<T, N> Vv = DevF (V1[Ind], Val2); // both arguments are scalar
271
+ V[Ind] = Vv[Ind];
272
+ }
273
+ Ind++;
274
+ {
275
+ Kernel<T, N, Op, Sca1Vec2> DevF{};
276
+ T Val1 = V1[Ind];
277
+ simd<T, N> Vv = DevF (Val1, V2); // scalar, vector
278
+ V[Ind] = Vv[Ind];
279
+ }
280
+ Ind++;
281
+ {
282
+ for (int J = Ind; J < N; ++J) {
283
+ Kernel<T, N, Op, Sca2Vec1> DevF{};
284
+ T Val2 = V2[J];
285
+ simd<T, N> Vv = DevF (V1, Val2); // scalar 2nd arg
286
+ V[J] = Vv[J];
287
+ }
288
+ }
289
+ } else {
290
+ Kernel<T, N, Op, AllVec> DevF{};
291
+ V = DevF (V1, V2); // vec 2nd arg
292
+ }
220
293
V.copy_to (Out, Offset);
221
294
};
222
295
};
223
296
224
297
// --- Generic test function for an extended math operation
225
298
226
- template <class T , int VL , MathOp Op,
227
- template <class , int , MathOp> class Kernel ,
228
- typename InitF = InitDataFuncNarrow <T>>
299
+ template <class T , int N , MathOp Op,
300
+ template <class , int , MathOp, int > class Kernel ,
301
+ typename InitF = InitNarrow <T>>
229
302
bool test (queue &Q, const std::string &Name,
230
- InitF InitData = InitDataFuncNarrow <T>{}, float delta = 0 .0f ) {
303
+ InitF Init = InitNarrow <T>{}, float delta = 0 .0f ) {
231
304
232
305
constexpr size_t Size = 1024 * 128 ;
233
306
constexpr bool IsBinOp = (Op == MathOp::div_ieee) || (Op == MathOp::pow);
@@ -236,11 +309,12 @@ bool test(queue &Q, const std::string &Name,
236
309
T *B = new T[Size];
237
310
T *C = new T[Size];
238
311
if constexpr (IsBinOp) {
239
- InitData (A, B, C, Size);
312
+ Init (A, B, C, Size);
240
313
} else {
241
- InitData (A, B, Size);
314
+ Init (A, B, Size);
242
315
}
243
- const char *kind = std::is_same_v<Kernel<T, VL, Op>, FuncESIMD<T, VL, Op>>
316
+ const char *kind =
317
+ std::is_same_v<Kernel<T, N, Op, AllVec>, ESIMDf<T, N, Op, AllVec>>
244
318
? " ESIMD"
245
319
: " SYCL" ;
246
320
std::cout << " " << Name << " test, kind=" << kind << " ...\n " ;
@@ -251,7 +325,7 @@ bool test(queue &Q, const std::string &Name,
251
325
buffer<T, 1 > BufC (C, range<1 >(Size));
252
326
253
327
// number of workgroups
254
- cl::sycl::range<1 > GlobalRange{Size / VL };
328
+ cl::sycl::range<1 > GlobalRange{Size / N };
255
329
256
330
// threads (workitems) in each workgroup
257
331
cl::sycl::range<1 > LocalRange{1 };
@@ -261,12 +335,12 @@ bool test(queue &Q, const std::string &Name,
261
335
auto PC = BufC.template get_access <access::mode::write>(CGH);
262
336
if constexpr (IsBinOp) {
263
337
auto PB = BufB.template get_access <access::mode::read>(CGH);
264
- BinaryDeviceFunc<T, VL , Op, Kernel, decltype (PA), decltype (PC)> F (
338
+ BinaryDeviceFunc<T, N , Op, Kernel, decltype (PA), decltype (PC)> F (
265
339
PA, PB, PC);
266
340
CGH.parallel_for (nd_range<1 >{GlobalRange, LocalRange}, F);
267
341
} else {
268
- UnaryDeviceFunc<T, VL , Op, Kernel, decltype (PA), decltype (PC)> F (PA,
269
- PC);
342
+ UnaryDeviceFunc<T, N , Op, Kernel, decltype (PA), decltype (PC)> F (PA,
343
+ PC);
270
344
CGH.parallel_for (nd_range<1 >{GlobalRange, LocalRange}, F);
271
345
}
272
346
});
@@ -316,69 +390,62 @@ bool test(queue &Q, const std::string &Name,
316
390
317
391
// --- Tests all extended math operations with given vector length
318
392
319
- template <class T , int VL > bool testESIMD (queue &Q) {
393
+ template <class T , int N > bool testESIMD (queue &Q) {
320
394
bool Pass = true ;
321
395
322
396
std::cout << " --- TESTING ESIMD functions, T=" << typeid (T).name ()
323
- << " , VL = " << VL << " ...\n " ;
324
-
325
- Pass &=
326
- test<T, VL, MathOp::sqrt, FuncESIMD>(Q, " sqrt" , InitDataFuncWide<T>{});
327
- Pass &= test<T, VL, MathOp::inv, FuncESIMD>(Q, " inv" );
328
- Pass &= test<T, VL, MathOp::rsqrt, FuncESIMD>(Q, " rsqrt" );
329
- Pass &= test<T, VL, MathOp::sin, FuncESIMD>(Q, " sin" , InitDataFuncTrig<T>{});
330
- Pass &= test<T, VL, MathOp::cos, FuncESIMD>(Q, " cos" , InitDataFuncTrig<T>{});
331
- Pass &=
332
- test<T, VL, MathOp::exp, FuncESIMD>(Q, " exp" , InitDataInRange0_5<T>{});
333
- Pass &= test<T, VL, MathOp::log, FuncESIMD>(Q, " log" , InitDataFuncWide<T>{});
334
- Pass &=
335
- test<T, VL, MathOp::exp2, FuncESIMD>(Q, " exp2" , InitDataInRange0_5<T>{});
336
- Pass &=
337
- test<T, VL, MathOp::log2, FuncESIMD>(Q, " log2" , InitDataFuncWide<T>{});
338
- Pass &=
339
- test<T, VL, MathOp::trunc, FuncESIMD>(Q, " trunc" , InitDataFuncWide<T>{});
397
+ << " , N = " << N << " ...\n " ;
398
+
399
+ Pass &= test<T, N, MathOp::sqrt, ESIMDf>(Q, " sqrt" , InitWide<T>{});
400
+ Pass &= test<T, N, MathOp::inv, ESIMDf>(Q, " inv" );
401
+ Pass &= test<T, N, MathOp::rsqrt, ESIMDf>(Q, " rsqrt" );
402
+ Pass &= test<T, N, MathOp::sin, ESIMDf>(Q, " sin" , InitTrig<T>{});
403
+ Pass &= test<T, N, MathOp::cos, ESIMDf>(Q, " cos" , InitTrig<T>{});
404
+ Pass &= test<T, N, MathOp::exp, ESIMDf>(Q, " exp" , InitInRange0_5<T>{});
405
+ Pass &= test<T, N, MathOp::log, ESIMDf>(Q, " log" , InitWide<T>{});
406
+ Pass &= test<T, N, MathOp::exp2, ESIMDf>(Q, " exp2" , InitInRange0_5<T>{});
407
+ Pass &= test<T, N, MathOp::log2, ESIMDf>(Q, " log2" , InitWide<T>{});
408
+ Pass &= test<T, N, MathOp::trunc, ESIMDf>(Q, " trunc" , InitWide<T>{});
340
409
return Pass;
341
410
}
342
411
343
- template <class T , int VL > bool testESIMDSqrtIEEE (queue &Q) {
412
+ template <class T , int N > bool testESIMDSqrtIEEE (queue &Q) {
344
413
bool Pass = true ;
345
414
std::cout << " --- TESTING ESIMD sqrt_ieee, T=" << typeid (T).name ()
346
- << " , VL = " << VL << " ...\n " ;
347
- Pass &= test<T, VL, MathOp::sqrt_ieee, FuncESIMD>(Q, " sqrt_ieee" ,
348
- InitDataFuncWide<T>{});
415
+ << " , N = " << N << " ...\n " ;
416
+ Pass &= test<T, N, MathOp::sqrt_ieee, ESIMDf>(Q, " sqrt_ieee" , InitWide<T>{});
349
417
return Pass;
350
418
}
351
419
352
- template <class T , int VL > bool testESIMDDivIEEE (queue &Q) {
420
+ template <class T , int N > bool testESIMDDivIEEE (queue &Q) {
353
421
bool Pass = true ;
354
422
std::cout << " --- TESTING ESIMD div_ieee, T=" << typeid (T).name ()
355
- << " , VL = " << VL << " ...\n " ;
356
- Pass &= test<T, VL, MathOp::div_ieee, BinFuncESIMD>(
357
- Q, " div_ieee" , InitDataBinFuncNarrow<T>{});
423
+ << " , N = " << N << " ...\n " ;
424
+ Pass &= test<T, N, MathOp::div_ieee, BinESIMDf>(Q, " div_ieee" , InitBin<T>{});
358
425
return Pass;
359
426
}
360
427
361
- template <class T , int VL > bool testESIMDPow (queue &Q) {
428
+ template <class T , int N > bool testESIMDPow (queue &Q) {
362
429
bool Pass = true ;
363
430
std::cout << " --- TESTING ESIMD pow, T=" << typeid (T).name ()
364
- << " , VL = " << VL << " ...\n " ;
365
- Pass &= test<T, VL , MathOp::pow, BinFuncESIMD >(
366
- Q, " pow" , InitDataBinFuncNarrow <T>{}, 0.1 );
431
+ << " , N = " << N << " ...\n " ;
432
+ Pass &= test<T, N , MathOp::pow, BinESIMDf >(
433
+ Q, " pow" , InitBin <T>{}, 0.1 );
367
434
return Pass;
368
435
}
369
436
370
- template <class T , int VL > bool testSYCL (queue &Q) {
437
+ template <class T , int N > bool testSYCL (queue &Q) {
371
438
bool Pass = true ;
372
439
// TODO SYCL currently supports only these 4 functions, extend the test when
373
440
// more are available.
374
441
std::cout << " --- TESTING SYCL functions, T=" << typeid (T).name ()
375
- << " , VL = " << VL << " ...\n " ;
442
+ << " , N = " << N << " ...\n " ;
376
443
// SYCL functions will have good accuracy for any argument, unlike bare h/w
377
444
// ESIMD versions, so init with "wide" data set.
378
- Pass &= test<T, VL , MathOp::sin, FuncSYCL >(Q, " sin" , InitDataFuncWide <T>{});
379
- Pass &= test<T, VL , MathOp::cos, FuncSYCL >(Q, " cos" , InitDataFuncWide <T>{});
380
- Pass &= test<T, VL , MathOp::exp, FuncSYCL >(Q, " exp" , InitDataInRange0_5 <T>{});
381
- Pass &= test<T, VL , MathOp::log, FuncSYCL >(Q, " log" , InitDataFuncWide <T>{});
445
+ Pass &= test<T, N , MathOp::sin, SYCLf >(Q, " sin" , InitWide <T>{});
446
+ Pass &= test<T, N , MathOp::cos, SYCLf >(Q, " cos" , InitWide <T>{});
447
+ Pass &= test<T, N , MathOp::exp, SYCLf >(Q, " exp" , InitInRange0_5 <T>{});
448
+ Pass &= test<T, N , MathOp::log, SYCLf >(Q, " log" , InitWide <T>{});
382
449
return Pass;
383
450
}
384
451
0 commit comments