Skip to content

Commit 2517aa1

Browse files
authored
[ESIMD] Fix imul() implementation and allow imul() for 2 vectors (#8176)
Previously the operation supported only vector-by-scalar, scalar-by-vector and scalar-by-scalar multiplication. This patch enables vector-by-vector multiplication as well. The corresponding LIT test PR: intel/llvm-test-suite#1566 Signed-off-by: Vyacheslav N Klochkov <[email protected]> Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 823e1b7 commit 2517aa1

File tree

1 file changed

+86
-74
lines changed
  • sycl/include/sycl/ext/intel/experimental/esimd

1 file changed

+86
-74
lines changed

sycl/include/sycl/ext/intel/experimental/esimd/math.hpp

Lines changed: 86 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -406,82 +406,94 @@ ror(T1 src0, T2 src1) {
406406
/// @addtogroup sycl_esimd_math
407407
/// @{
408408

409-
// imul
410-
#ifndef ESIMD_HAS_LONG_LONG
411-
// use mulh instruction for high half
412-
template <typename T0, typename T1, typename U, int SZ>
413-
ESIMD_NODEBUG
414-
ESIMD_INLINE std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value &&
415-
__ESIMD_DNS::is_dword_type<T1>::value &&
416-
__ESIMD_DNS::is_dword_type<U>::value,
417-
__ESIMD_NS::simd<T0, SZ>>
418-
imul(__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
419-
using ComputationTy = __ESIMD_DNS::computation_type_t<decltype(src0), U>;
420-
ComputationTy Src0 = src0;
421-
ComputationTy Src1 = src1;
422-
rmd = Src0 * Src1;
423-
if constexpr (std::is_unsigned<T0>::value)
424-
return __esimd_umulh(Src0.data(), Src1.data());
425-
else
426-
return __esimd_smulh(Src0.data(), Src1.data());
427-
}
428-
429-
#else
430-
// imul bdw+ version: use qw=dw*dw multiply.
431-
// We need to special case SZ==1 to avoid "error: when select size is 1, the
432-
// stride must also be 1" on the selects.
433-
template <typename T0, typename T1, typename U, int SZ>
434-
__ESIMD_API
435-
std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value &&
436-
__ESIMD_DNS::is_dword_type<T1>::value &&
437-
__ESIMD_DNS::is_dword_type<U>::value && SZ == 1,
438-
__ESIMD_NS::simd<T0, SZ>>
439-
imul(__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
440-
using ComputationTy =
441-
__ESIMD_DNS::computation_type_t<decltype(rmd), long long>;
442-
ComputationTy Product = convert<long long>(src0);
443-
Product *= src1;
444-
rmd = Product.bit_cast_view<T0>().select<1, 1>[0];
445-
return Product.bit_cast_view<T0>().select<1, 1>[1];
446-
}
447-
448-
template <typename T0, typename T1, typename U, int SZ>
449-
__ESIMD_API
450-
std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value &&
451-
__ESIMD_DNS::is_dword_type<T1>::value &&
452-
__ESIMD_DNS::is_dword_type<U>::value && SZ != 1,
453-
__ESIMD_NS::simd<T0, SZ>>
454-
imul(__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
455-
using ComputationTy =
456-
__ESIMD_DNS::computation_type_t<decltype(rmd), long long>;
457-
ComputationTy Product = convert<long long>(src0);
458-
Product *= src1;
459-
rmd = Product.bit_cast_view<T0>().select<SZ, 2>(0);
460-
return Product.bit_cast_view<T0>().select<SZ, 2>(1);
461-
}
462-
#endif
463-
464-
// TODO: document
465-
template <typename T0, typename T1, typename U, int SZ>
466-
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<U>::value,
467-
__ESIMD_NS::simd<T0, SZ>>
468-
imul(__ESIMD_NS::simd<T0, SZ> &rmd, U src0, __ESIMD_NS::simd<T1, SZ> src1) {
469-
return esimd::imul(rmd, src1, src0);
409+
/// Computes the 64-bit result of two 32-bit element vectors \p src0 and
410+
/// \p src1 multiplication. The result is returned in two separate 32-bit
411+
/// vectors. The low 32-bit parts of the results are written to the output
412+
/// parameter \p rmd and the upper parts of the results are returned from
413+
/// the function.
414+
template <typename T, typename T0, typename T1, int N>
415+
__ESIMD_API __ESIMD_NS::simd<T, N> imul_impl(__ESIMD_NS::simd<T, N> &rmd,
416+
__ESIMD_NS::simd<T0, N> src0,
417+
__ESIMD_NS::simd<T1, N> src1) {
418+
static_assert(__ESIMD_DNS::is_dword_type<T>::value &&
419+
__ESIMD_DNS::is_dword_type<T0>::value &&
420+
__ESIMD_DNS::is_dword_type<T1>::value,
421+
"expected 32-bit integer vector operands.");
422+
using Comp32T = __ESIMD_DNS::computation_type_t<T0, T1>;
423+
auto Src0 = src0.template bit_cast_view<Comp32T>();
424+
auto Src1 = src1.template bit_cast_view<Comp32T>();
425+
426+
// Compute the result using 64-bit multiplication operation.
427+
using Comp64T =
428+
std::conditional_t<std::is_signed_v<Comp32T>, int64_t, uint64_t>;
429+
__ESIMD_NS::simd<Comp64T, N> Product64 = Src0;
430+
Product64 *= Src1;
431+
432+
// Split the 32-bit high and low parts to return them from this function.
433+
auto Product32 = Product64.template bit_cast_view<T>();
434+
if constexpr (N == 1) {
435+
rmd = Product32[0];
436+
return Product32[1];
437+
} else {
438+
rmd = Product32.template select<N, 2>(0);
439+
return Product32.template select<N, 2>(1);
440+
}
470441
}
471442

472-
// TODO: document
473-
template <typename T0, typename T, typename U>
474-
ESIMD_NODEBUG
475-
ESIMD_INLINE std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T>::value &&
476-
__ESIMD_DNS::is_esimd_scalar<U>::value &&
477-
__ESIMD_DNS::is_esimd_scalar<T0>::value,
478-
T0>
479-
imul(__ESIMD_NS::simd<T0, 1> &rmd, T src0, U src1) {
480-
__ESIMD_NS::simd<T, 1> src_0 = src0;
481-
__ESIMD_NS::simd<U, 1> src_1 = src1;
482-
__ESIMD_NS::simd<T0, 1> res =
483-
esimd::imul(rmd, src_0.select_all(), src_1.select_all());
484-
return res[0];
443+
/// Computes the 64-bit multiply result of two 32-bit integer vectors \p src0
444+
/// and \p src1. The result is returned in two separate 32-bit vectors.
445+
/// The low 32-bit parts of the result are written to the output parameter
446+
/// \p rmd and the upper parts of the result are returned from the function.
447+
template <typename T, typename T0, typename T1, int N>
448+
__ESIMD_API __ESIMD_NS::simd<T, N> imul(__ESIMD_NS::simd<T, N> &rmd,
449+
__ESIMD_NS::simd<T0, N> src0,
450+
__ESIMD_NS::simd<T1, N> src1) {
451+
return imul_impl<T, T0, T1, N>(rmd, src0, src1);
452+
}
453+
454+
/// Computes the 64-bit multiply result of 32-bit integer vector \p src0 and
455+
/// 32-bit integer scalar \p src1. The result is returned in two separate 32-bit
456+
/// vectors. The low 32-bit parts of the result is written to the output
457+
/// parameter \p rmd and the upper part of the results is returned from
458+
/// the function.
459+
template <typename T, typename T0, typename T1, int N>
460+
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T1>::value,
461+
__ESIMD_NS::simd<T, N>>
462+
imul(__ESIMD_NS::simd<T, N> &rmd, __ESIMD_NS::simd<T0, N> src0, T1 src1) {
463+
__ESIMD_NS::simd<T1, N> Src1V = src1;
464+
return esimd::imul_impl<T, T0, T1, N>(rmd, src0, Src1V);
465+
}
466+
467+
/// Computes the 64-bit multiply result of a scalar 32-bit integer \p src0 and
468+
/// 32-bit integer vector \p src1. The result is returned in two separate 32-bit
469+
/// vectors. The low 32-bit parts of the result is written to the output
470+
/// parameter \p rmd and the upper part of the results is returned from
471+
/// the function.
472+
template <typename T, typename T0, typename T1, int N>
473+
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value,
474+
__ESIMD_NS::simd<T, N>>
475+
imul(__ESIMD_NS::simd<T, N> &rmd, T0 src0, __ESIMD_NS::simd<T1, N> src1) {
476+
__ESIMD_NS::simd<T0, N> Src0V = src0;
477+
return esimd::imul_impl<T, T0, T1, N>(rmd, Src0V, src1);
478+
}
479+
480+
/// Computes the 64-bit multiply result of two scalar 32-bit integer values
481+
/// \p src0 and \p src1. The result is returned in two separate 32-bit scalars.
482+
/// The low 32-bit part of the result is written to the output parameter \p rmd
483+
/// and the upper part of the result is returned from the function.
484+
template <typename T, typename T0, typename T1>
485+
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T>::value &&
486+
__ESIMD_DNS::is_dword_type<T0>::value &&
487+
__ESIMD_DNS::is_dword_type<T1>::value,
488+
T>
489+
imul(T &rmd, T0 src0, T1 src1) {
490+
__ESIMD_NS::simd<T, 1> RmdV = rmd;
491+
__ESIMD_NS::simd<T0, 1> Src0V = src0;
492+
__ESIMD_NS::simd<T1, 1> Src1V = src1;
493+
__ESIMD_NS::simd<T, 1> Res =
494+
esimd::imul_impl<T, T0, T1, 1>(RmdV, Src0V, Src1V);
495+
rmd = RmdV[0];
496+
return Res[0];
485497
}
486498

487499
/// Integral quotient (vector version)

0 commit comments

Comments
 (0)