Skip to content

[ESIMD] Fix imul() implementation and allow imul() for 2 vectors #8176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 7, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 86 additions & 74 deletions sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,82 +406,94 @@ ror(T1 src0, T2 src1) {
/// @addtogroup sycl_esimd_math
/// @{

// imul
#ifndef ESIMD_HAS_LONG_LONG
// use mulh instruction for high half
template <typename T0, typename T1, typename U, int SZ>
ESIMD_NODEBUG
ESIMD_INLINE std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value &&
__ESIMD_DNS::is_dword_type<T1>::value &&
__ESIMD_DNS::is_dword_type<U>::value,
__ESIMD_NS::simd<T0, SZ>>
imul(__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
using ComputationTy = __ESIMD_DNS::computation_type_t<decltype(src0), U>;
ComputationTy Src0 = src0;
ComputationTy Src1 = src1;
rmd = Src0 * Src1;
if constexpr (std::is_unsigned<T0>::value)
return __esimd_umulh(Src0.data(), Src1.data());
else
return __esimd_smulh(Src0.data(), Src1.data());
}

#else
// imul bdw+ version: use qw=dw*dw multiply.
// We need to special case SZ==1 to avoid "error: when select size is 1, the
// stride must also be 1" on the selects.
template <typename T0, typename T1, typename U, int SZ>
__ESIMD_API
std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value &&
__ESIMD_DNS::is_dword_type<T1>::value &&
__ESIMD_DNS::is_dword_type<U>::value && SZ == 1,
__ESIMD_NS::simd<T0, SZ>>
imul(__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
using ComputationTy =
__ESIMD_DNS::computation_type_t<decltype(rmd), long long>;
ComputationTy Product = convert<long long>(src0);
Product *= src1;
rmd = Product.bit_cast_view<T0>().select<1, 1>[0];
return Product.bit_cast_view<T0>().select<1, 1>[1];
}

template <typename T0, typename T1, typename U, int SZ>
__ESIMD_API
std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value &&
__ESIMD_DNS::is_dword_type<T1>::value &&
__ESIMD_DNS::is_dword_type<U>::value && SZ != 1,
__ESIMD_NS::simd<T0, SZ>>
imul(__ESIMD_NS::simd<T0, SZ> &rmd, __ESIMD_NS::simd<T1, SZ> src0, U src1) {
using ComputationTy =
__ESIMD_DNS::computation_type_t<decltype(rmd), long long>;
ComputationTy Product = convert<long long>(src0);
Product *= src1;
rmd = Product.bit_cast_view<T0>().select<SZ, 2>(0);
return Product.bit_cast_view<T0>().select<SZ, 2>(1);
}
#endif

// TODO: document
template <typename T0, typename T1, typename U, int SZ>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<U>::value,
__ESIMD_NS::simd<T0, SZ>>
imul(__ESIMD_NS::simd<T0, SZ> &rmd, U src0, __ESIMD_NS::simd<T1, SZ> src1) {
return esimd::imul(rmd, src1, src0);
/// Computes the 64-bit result of two 32-bit element vectors \p src0 and
/// \p src1 multiplication. The result is returned in two separate 32-bit
/// vectors. The low 32-bit parts of the results are written to the output
/// parameter \p rmd and the upper parts of the results are returned from
/// the function.
template <typename T, typename T0, typename T1, int N>
__ESIMD_API __ESIMD_NS::simd<T, N> imul_impl(__ESIMD_NS::simd<T, N> &rmd,
__ESIMD_NS::simd<T0, N> src0,
__ESIMD_NS::simd<T1, N> src1) {
static_assert(__ESIMD_DNS::is_dword_type<T>::value &&
__ESIMD_DNS::is_dword_type<T0>::value &&
__ESIMD_DNS::is_dword_type<T1>::value,
"expected 32-bit integer vector operands.");
using Comp32T = __ESIMD_DNS::computation_type_t<T0, T1>;
auto Src0 = src0.template bit_cast_view<Comp32T>();
auto Src1 = src1.template bit_cast_view<Comp32T>();

// Compute the result using 64-bit multiplication operation.
using Comp64T =
std::conditional_t<std::is_signed_v<Comp32T>, int64_t, uint64_t>;
__ESIMD_NS::simd<Comp64T, N> Product64 = Src0;
Product64 *= Src1;

// Split the 32-bit high and low parts to return them from this function.
auto Product32 = Product64.template bit_cast_view<T>();
if constexpr (N == 1) {
rmd = Product32[0];
return Product32[1];
} else {
rmd = Product32.template select<N, 2>(0);
return Product32.template select<N, 2>(1);
}
}

// TODO: document
template <typename T0, typename T, typename U>
ESIMD_NODEBUG
ESIMD_INLINE std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T>::value &&
__ESIMD_DNS::is_esimd_scalar<U>::value &&
__ESIMD_DNS::is_esimd_scalar<T0>::value,
T0>
imul(__ESIMD_NS::simd<T0, 1> &rmd, T src0, U src1) {
__ESIMD_NS::simd<T, 1> src_0 = src0;
__ESIMD_NS::simd<U, 1> src_1 = src1;
__ESIMD_NS::simd<T0, 1> res =
esimd::imul(rmd, src_0.select_all(), src_1.select_all());
return res[0];
/// Computes the 64-bit multiply result of two 32-bit integer vectors \p src0
/// and \p src1. The result is returned in two separate 32-bit vectors.
/// The low 32-bit parts of the result are written to the output parameter
/// \p rmd and the upper parts of the result are returned from the function.
template <typename T, typename T0, typename T1, int N>
__ESIMD_API __ESIMD_NS::simd<T, N> imul(__ESIMD_NS::simd<T, N> &rmd,
__ESIMD_NS::simd<T0, N> src0,
__ESIMD_NS::simd<T1, N> src1) {
return imul_impl<T, T0, T1, N>(rmd, src0, src1);
}

/// Computes the 64-bit multiply result of 32-bit integer vector \p src0 and
/// 32-bit integer scalar \p src1. The result is returned in two separate 32-bit
/// vectors. The low 32-bit parts of the result is written to the output
/// parameter \p rmd and the upper part of the results is returned from
/// the function.
template <typename T, typename T0, typename T1, int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T1>::value,
__ESIMD_NS::simd<T, N>>
imul(__ESIMD_NS::simd<T, N> &rmd, __ESIMD_NS::simd<T0, N> src0, T1 src1) {
__ESIMD_NS::simd<T1, N> Src1V = src1;
return esimd::imul_impl<T, T0, T1, N>(rmd, src0, Src1V);
}

/// Computes the 64-bit multiply result of a scalar 32-bit integer \p src0 and
/// 32-bit integer vector \p src1. The result is returned in two separate 32-bit
/// vectors. The low 32-bit parts of the result is written to the output
/// parameter \p rmd and the upper part of the results is returned from
/// the function.
template <typename T, typename T0, typename T1, int N>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value,
__ESIMD_NS::simd<T, N>>
imul(__ESIMD_NS::simd<T, N> &rmd, T0 src0, __ESIMD_NS::simd<T1, N> src1) {
__ESIMD_NS::simd<T0, N> Src0V = src0;
return esimd::imul_impl<T, T0, T1, N>(rmd, Src0V, src1);
}

/// Computes the 64-bit multiply result of two scalar 32-bit integer values
/// \p src0 and \p src1. The result is returned in two separate 32-bit scalars.
/// The low 32-bit part of the result is written to the output parameter \p rmd
/// and the upper part of the result is returned from the function.
template <typename T, typename T0, typename T1>
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T>::value &&
__ESIMD_DNS::is_dword_type<T0>::value &&
__ESIMD_DNS::is_dword_type<T1>::value,
T>
imul(T &rmd, T0 src0, T1 src1) {
__ESIMD_NS::simd<T, 1> RmdV = rmd;
__ESIMD_NS::simd<T0, 1> Src0V = src0;
__ESIMD_NS::simd<T1, 1> Src1V = src1;
__ESIMD_NS::simd<T, 1> Res =
esimd::imul_impl<T, T0, T1, 1>(RmdV, Src0V, Src1V);
rmd = RmdV[0];
return Res[0];
}

/// Integral quotient (vector version)
Expand Down