@@ -1270,60 +1270,106 @@ ESIMD_NODEBUG ESIMD_INLINE
1270
1270
return esimd_pack_mask (src_0);
1271
1271
}
1272
1272
1273
- // / Count component-wise the total bits set in source operand.
1273
+ // / Count number of bits set in the source operand per element.
1274
+ // / @param src0 the source operand to count bits in.
1275
+ // / @return a vector of \c uint32_t, where each element is set to bit count of
1276
+ // / the corresponding element of the source operand.
1274
1277
template <typename T, int N>
1275
- ESIMD_NODEBUG ESIMD_INLINE
1276
- typename sycl::detail::enable_if_t <std::is_integral<T>::value,
1277
- simd<uint, N>>
1278
- esimd_cbit (simd<T, N> src0) {
1279
- return __esimd_cbit<T, N>(src0.data ());
1278
+ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1279
+ std::is_integral<T>::value && sizeof (T) <= 4 , simd<uint32_t , N>>
1280
+ esimd_cbit (simd<T, N> src) {
1281
+ return __esimd_cbit<T, N>(src.data ());
1280
1282
}
1281
1283
1284
+ // / Scalar version of \c esimd_cbit - both input and output are scalars rather
1285
+ // / than vectors.
1282
1286
template <typename T>
1283
1287
ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1284
- std::is_integral<T>::value && detail::is_esimd_scalar<T>::value, uint >
1288
+ std::is_integral<T>::value && sizeof (T) <= 4 , uint32_t >
1285
1289
esimd_cbit (T src) {
1286
1290
simd<T, 1 > Src = src;
1287
- simd<uint , 1 > Result = esimd_cbit (Src);
1291
+ simd<uint32_t , 1 > Result = esimd_cbit (Src);
1288
1292
return Result[0 ];
1289
1293
}
1290
1294
1295
+ // / Find the per element number of the first bit set in the source operand
1296
+ // / starting from the least significant bit.
1297
+ // / @param src0 the source operand to count bits in.
1298
+ // / @return a vector of the same type as the source operand, where each element
1299
+ // / is set to the number first bit set in corresponding element of the
1300
+ // / source operand. \c 0xFFFFffff is returned for an element equal to \c 0.
1291
1301
// / Find component-wise the first bit from LSB side
1292
- template <int N>
1293
- ESIMD_NODEBUG ESIMD_INLINE simd<unsigned , N> esimd_fbl (simd<unsigned , N> src) {
1294
- return __esimd_fbl (src.data ());
1302
+ template <typename T, int N>
1303
+ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1304
+ std::is_integral<T>::value && (sizeof (T) == 4 ), simd<T, N>>
1305
+ esimd_fbl (simd<T, N> src) {
1306
+ return __esimd_fbl<T, N>(src.data ());
1295
1307
}
1296
1308
1297
- template <typename T = void >
1298
- ESIMD_NODEBUG ESIMD_INLINE unsigned esimd_fbl (unsigned src) {
1299
- simd<unsigned , 1 > Src = src;
1300
- simd<unsigned , 1 > Result = esimd_fbl (Src);
1309
+ // / Scalar version of \c esimd_fbl - both input and output are scalars rather
1310
+ // / than vectors.
1311
+ template <typename T>
1312
+ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1313
+ std::is_integral<T>::value && (sizeof (T) == 4 ), T>
1314
+ esimd_fbl (T src) {
1315
+ simd<T, 1 > Src = src;
1316
+ simd<T, 1 > Result = esimd_fbl (Src);
1301
1317
return Result[0 ];
1302
1318
}
1303
1319
1304
- // / Find component-wise the first bit from MSB side.
1305
- template <int N>
1306
- ESIMD_NODEBUG ESIMD_INLINE simd<int , N> esimd_fbh (simd<int , N> src) {
1307
- return __esimd_sfbh (src.data ());
1320
+ // / Find the per element number of the first bit set in the source operand
1321
+ // / starting from the most significant bit (sign bit is skipped).
1322
+ // / @param src0 the source operand to count bits in.
1323
+ // / @return a vector of the same type as the source operand, where each element
1324
+ // / is set to the number first bit set in corresponding element of the
1325
+ // / source operand. \c 0xFFFFffff is returned for an element equal to \c 0
1326
+ // / or \c -1.
1327
+ template <typename T, int N>
1328
+ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1329
+ std::is_integral<T>::value && std::is_signed<T>::value && (sizeof (T) == 4 ),
1330
+ simd<T, N>>
1331
+ esimd_fbh (simd<T, N> src) {
1332
+ return __esimd_sfbh<T, N>(src.data ());
1308
1333
}
1309
1334
1310
- template <int N>
1311
- ESIMD_NODEBUG ESIMD_INLINE simd<unsigned int , N>
1312
- esimd_fbh (simd<unsigned int , N> src) {
1313
- return __esimd_ufbh (src.data ());
1335
+ // / Scalar version of \c esimd_fbh - both input and output are scalars rather
1336
+ // / than vectors.
1337
+ template <typename T>
1338
+ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1339
+ std::is_integral<T>::value && std::is_signed<T>::value && (sizeof (T) == 4 ),
1340
+ T>
1341
+ esimd_fbh (T src) {
1342
+ simd<T, 1 > Src = src;
1343
+ simd<T, 1 > Result = esimd_fbh (Src);
1344
+ return Result[0 ];
1314
1345
}
1315
1346
1347
+ // / Find the per element number of the first bit set in the source operand
1348
+ // / starting from the most significant bit (sign bit is counted).
1349
+ // / @param src0 the source operand to count bits in.
1350
+ // / @return a vector of the same type as the source operand, where each element
1351
+ // / is set to the number first bit set in corresponding element of the
1352
+ // / source operand. \c 0xFFFFffff is returned for an element equal to \c 0.
1353
+ template <typename T, int N>
1354
+ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1355
+ std::is_integral<T>::value && !std::is_signed<T>::value && (sizeof (T) == 4 ),
1356
+ simd<T, N>>
1357
+ esimd_fbh (simd<T, N> src) {
1358
+ return __esimd_ufbh<T, N>(src.data ());
1359
+ }
1360
+
1361
+ // / Scalar unsigned version of \c esimd_fbh - both input and output are unsigned
1362
+ // / scalars rather than vectors.
1316
1363
template <typename T>
1317
1364
ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t <
1318
- detail::is_dword_type<T>::value && detail::is_esimd_scalar<T>::value, T>
1365
+ std::is_integral<T>::value && !std::is_signed<T>::value && (sizeof (T) == 4 ),
1366
+ T>
1319
1367
esimd_fbh (T src) {
1320
1368
simd<T, 1 > Src = src;
1321
1369
simd<T, 1 > Result = esimd_fbh (Src);
1322
1370
return Result[0 ];
1323
1371
}
1324
1372
1325
- template <typename T = void > simd<uint, 4 > esimd_rdtsc ();
1326
-
1327
1373
// / \brief DP4A.
1328
1374
// /
1329
1375
// / @param src0 the first source operand of dp4a operation.
@@ -1749,7 +1795,7 @@ ESIMD_INLINE simd<float, N> esimd_tanh_cody_waite_impl(simd<float, N> x) {
1749
1795
* | x | rational polynomial | 1 - 2/(1 + exp(2*x)) | 1
1750
1796
*
1751
1797
* rational polynomial for single precision = x + x * (g * (p[1] * g + p[0]) /
1752
- * (g + q[0]) g = x^2 p0 = − 0.82377 28127 E+00 p1 = − 0.38310 10665 E-02 q0 =
1798
+ * (g + q[0]) g = x^2 p0 = - 0.82377 28127 E+00 p1 = - 0.38310 10665 E-02 q0 =
1753
1799
* 0.24713 19654 E+01 q1 = 1.00000 00000 E+00
1754
1800
*
1755
1801
*/
0 commit comments