@@ -406,82 +406,94 @@ ror(T1 src0, T2 src1) {
406
406
// / @addtogroup sycl_esimd_math
407
407
// / @{
408
408
409
- // imul
410
- #ifndef ESIMD_HAS_LONG_LONG
411
- // use mulh instruction for high half
412
- template <typename T0, typename T1, typename U, int SZ>
413
- ESIMD_NODEBUG
414
- ESIMD_INLINE std::enable_if_t <__ESIMD_DNS::is_dword_type<T0>::value &&
415
- __ESIMD_DNS::is_dword_type<T1>::value &&
416
- __ESIMD_DNS::is_dword_type<U>::value,
417
- __ESIMD_NS::simd<T0, SZ>>
418
- imul (__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
419
- using ComputationTy = __ESIMD_DNS::computation_type_t <decltype (src0), U>;
420
- ComputationTy Src0 = src0;
421
- ComputationTy Src1 = src1;
422
- rmd = Src0 * Src1;
423
- if constexpr (std::is_unsigned<T0>::value)
424
- return __esimd_umulh (Src0.data (), Src1.data ());
425
- else
426
- return __esimd_smulh (Src0.data (), Src1.data ());
427
- }
428
-
429
- #else
430
- // imul bdw+ version: use qw=dw*dw multiply.
431
- // We need to special case SZ==1 to avoid "error: when select size is 1, the
432
- // stride must also be 1" on the selects.
433
- template <typename T0, typename T1, typename U, int SZ>
434
- __ESIMD_API
435
- std::enable_if_t <__ESIMD_DNS::is_dword_type<T0>::value &&
436
- __ESIMD_DNS::is_dword_type<T1>::value &&
437
- __ESIMD_DNS::is_dword_type<U>::value && SZ == 1 ,
438
- __ESIMD_NS::simd<T0, SZ>>
439
- imul (__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
440
- using ComputationTy =
441
- __ESIMD_DNS::computation_type_t <decltype (rmd), long long >;
442
- ComputationTy Product = convert<long long >(src0);
443
- Product *= src1;
444
- rmd = Product.bit_cast_view <T0>().select <1 , 1 >[0 ];
445
- return Product.bit_cast_view <T0>().select <1 , 1 >[1 ];
446
- }
447
-
448
- template <typename T0, typename T1, typename U, int SZ>
449
- __ESIMD_API
450
- std::enable_if_t <__ESIMD_DNS::is_dword_type<T0>::value &&
451
- __ESIMD_DNS::is_dword_type<T1>::value &&
452
- __ESIMD_DNS::is_dword_type<U>::value && SZ != 1 ,
453
- __ESIMD_NS::simd<T0, SZ>>
454
- imul (__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
455
- using ComputationTy =
456
- __ESIMD_DNS::computation_type_t <decltype (rmd), long long >;
457
- ComputationTy Product = convert<long long >(src0);
458
- Product *= src1;
459
- rmd = Product.bit_cast_view <T0>().select <SZ, 2 >(0 );
460
- return Product.bit_cast_view <T0>().select <SZ, 2 >(1 );
461
- }
462
- #endif
463
-
464
- // TODO: document
465
- template <typename T0, typename T1, typename U, int SZ>
466
- __ESIMD_API std::enable_if_t <__ESIMD_DNS::is_esimd_scalar<U>::value,
467
- __ESIMD_NS::simd<T0, SZ>>
468
- imul (__ESIMD_NS::simd<T0, SZ> &rmd, U src0, __ESIMD_NS::simd<T1, SZ> src1) {
469
- return esimd::imul (rmd, src1, src0);
409
+ // / Computes the 64-bit result of two 32-bit element vectors \p src0 and
410
+ // / \p src1 multiplication. The result is returned in two separate 32-bit
411
+ // / vectors. The low 32-bit parts of the results are written to the output
412
+ // / parameter \p rmd and the upper parts of the results are returned from
413
+ // / the function.
414
+ template <typename T, typename T0, typename T1, int N>
415
+ __ESIMD_API __ESIMD_NS::simd<T, N> imul_impl (__ESIMD_NS::simd<T, N> &rmd,
416
+ __ESIMD_NS::simd<T0, N> src0,
417
+ __ESIMD_NS::simd<T1, N> src1) {
418
+ static_assert (__ESIMD_DNS::is_dword_type<T>::value &&
419
+ __ESIMD_DNS::is_dword_type<T0>::value &&
420
+ __ESIMD_DNS::is_dword_type<T1>::value,
421
+ " expected 32-bit integer vector operands." );
422
+ using Comp32T = __ESIMD_DNS::computation_type_t <T0, T1>;
423
+ auto Src0 = src0.template bit_cast_view <Comp32T>();
424
+ auto Src1 = src1.template bit_cast_view <Comp32T>();
425
+
426
+ // Compute the result using 64-bit multiplication operation.
427
+ using Comp64T =
428
+ std::conditional_t <std::is_signed_v<Comp32T>, int64_t , uint64_t >;
429
+ __ESIMD_NS::simd<Comp64T, N> Product64 = Src0;
430
+ Product64 *= Src1;
431
+
432
+ // Split the 32-bit high and low parts to return them from this function.
433
+ auto Product32 = Product64.template bit_cast_view <T>();
434
+ if constexpr (N == 1 ) {
435
+ rmd = Product32[0 ];
436
+ return Product32[1 ];
437
+ } else {
438
+ rmd = Product32.template select <N, 2 >(0 );
439
+ return Product32.template select <N, 2 >(1 );
440
+ }
470
441
}
471
442
472
- // TODO: document
473
- template <typename T0, typename T, typename U>
474
- ESIMD_NODEBUG
475
- ESIMD_INLINE std::enable_if_t <__ESIMD_DNS::is_esimd_scalar<T>::value &&
476
- __ESIMD_DNS::is_esimd_scalar<U>::value &&
477
- __ESIMD_DNS::is_esimd_scalar<T0>::value,
478
- T0>
479
- imul (__ESIMD_NS::simd<T0, 1 > &rmd, T src0, U src1) {
480
- __ESIMD_NS::simd<T, 1 > src_0 = src0;
481
- __ESIMD_NS::simd<U, 1 > src_1 = src1;
482
- __ESIMD_NS::simd<T0, 1 > res =
483
- esimd::imul (rmd, src_0.select_all (), src_1.select_all ());
484
- return res[0 ];
443
+ // / Computes the 64-bit multiply result of two 32-bit integer vectors \p src0
444
+ // / and \p src1. The result is returned in two separate 32-bit vectors.
445
+ // / The low 32-bit parts of the result are written to the output parameter
446
+ // / \p rmd and the upper parts of the result are returned from the function.
447
+ template <typename T, typename T0, typename T1, int N>
448
+ __ESIMD_API __ESIMD_NS::simd<T, N> imul (__ESIMD_NS::simd<T, N> &rmd,
449
+ __ESIMD_NS::simd<T0, N> src0,
450
+ __ESIMD_NS::simd<T1, N> src1) {
451
+ return imul_impl<T, T0, T1, N>(rmd, src0, src1);
452
+ }
453
+
454
+ // / Computes the 64-bit multiply result of 32-bit integer vector \p src0 and
455
+ // / 32-bit integer scalar \p src1. The result is returned in two separate 32-bit
456
+ // / vectors. The low 32-bit parts of the result is written to the output
457
+ // / parameter \p rmd and the upper part of the results is returned from
458
+ // / the function.
459
+ template <typename T, typename T0, typename T1, int N>
460
+ __ESIMD_API std::enable_if_t <__ESIMD_DNS::is_dword_type<T1>::value,
461
+ __ESIMD_NS::simd<T, N>>
462
+ imul (__ESIMD_NS::simd<T, N> &rmd, __ESIMD_NS::simd<T0, N> src0, T1 src1) {
463
+ __ESIMD_NS::simd<T1, N> Src1V = src1;
464
+ return esimd::imul_impl<T, T0, T1, N>(rmd, src0, Src1V);
465
+ }
466
+
467
+ // / Computes the 64-bit multiply result of a scalar 32-bit integer \p src0 and
468
+ // / 32-bit integer vector \p src1. The result is returned in two separate 32-bit
469
+ // / vectors. The low 32-bit parts of the result is written to the output
470
+ // / parameter \p rmd and the upper part of the results is returned from
471
+ // / the function.
472
+ template <typename T, typename T0, typename T1, int N>
473
+ __ESIMD_API std::enable_if_t <__ESIMD_DNS::is_dword_type<T0>::value,
474
+ __ESIMD_NS::simd<T, N>>
475
+ imul (__ESIMD_NS::simd<T, N> &rmd, T0 src0, __ESIMD_NS::simd<T1, N> src1) {
476
+ __ESIMD_NS::simd<T0, N> Src0V = src0;
477
+ return esimd::imul_impl<T, T0, T1, N>(rmd, Src0V, src1);
478
+ }
479
+
480
+ // / Computes the 64-bit multiply result of two scalar 32-bit integer values
481
+ // / \p src0 and \p src1. The result is returned in two separate 32-bit scalars.
482
+ // / The low 32-bit part of the result is written to the output parameter \p rmd
483
+ // / and the upper part of the result is returned from the function.
484
+ template <typename T, typename T0, typename T1>
485
+ __ESIMD_API std::enable_if_t <__ESIMD_DNS::is_dword_type<T>::value &&
486
+ __ESIMD_DNS::is_dword_type<T0>::value &&
487
+ __ESIMD_DNS::is_dword_type<T1>::value,
488
+ T>
489
+ imul (T &rmd, T0 src0, T1 src1) {
490
+ __ESIMD_NS::simd<T, 1 > RmdV = rmd;
491
+ __ESIMD_NS::simd<T0, 1 > Src0V = src0;
492
+ __ESIMD_NS::simd<T1, 1 > Src1V = src1;
493
+ __ESIMD_NS::simd<T, 1 > Res =
494
+ esimd::imul_impl<T, T0, T1, 1 >(RmdV, Src0V, Src1V);
495
+ rmd = RmdV[0 ];
496
+ return Res[0 ];
485
497
}
486
498
487
499
// / Integral quotient (vector version)
0 commit comments