Skip to content

[SYCL][ESIMD] Add new raw_send APIs moving compile time params to template params #10167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 151 additions & 6 deletions sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,50 @@ raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
}

/// Raw sends. "s" suffix designates "split" variant - i.e. two sources.
///
/// @tparam execSize is the execution size.
/// @tparam sfid is the shared function ID.
/// @tparam numSrc0 is the number of GRFs for source-0.
/// @tparam numSrc1 is the number of GRFs for source-1.
/// @tparam numDst is the number of GRFs for destination.
/// @tparam isEOT is the flag that indicates whether this is an EOT message
/// (optional - default to 0).
/// @tparam isSendc is the flag that indicates whether sendc should be used
/// (optional - default to 0).
/// @param msgDst is the old value of the destination operand.
/// @param msgSrc0 is the first source operand of send message.
/// @param msgSrc1 is the second source operand of send message.
/// @param exDesc is the extended message descriptor.
/// @param msgDesc is the message descriptor.
/// @param mask is the predicate to specify enabled channels (optional - default
/// to on).
/// @return the vector value read from memory.
template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1,
int n1, typename T2, int n2, typename T3, int n3, int N = 16>
__ESIMD_API __ESIMD_NS::simd<T1, n1>
raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
__ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
__ESIMD_NS::simd_mask<N> mask = 1) {
constexpr unsigned _Width1 = n1 * sizeof(T1);
static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
constexpr unsigned _Width2 = n2 * sizeof(T2);
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
constexpr unsigned _Width3 = n3 * sizeof(T3);
static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");

using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
using ElemT3 = __ESIMD_DNS::__raw_t<T3>;

constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);

return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
}

/// Raw send.
///
/// @param msgDst is the old value of the destination operand.
Expand Down Expand Up @@ -129,6 +173,43 @@ raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
msgSrc0.data(), msgDst.data());
}

/// Raw send.
///
/// @tparam execSize is the execution size.
/// @tparam sfid is the shared function ID.
/// @tparam numSrc0 is the number of GRFs for source-0.
/// @tparam numDst is the number of GRFs for destination.
/// @tparam isEOT is the flag that indicates whether this is an EOT message
/// (optional - default to 0).
/// @tparam isSendc is the flag that indicates whether sendc should be used
/// (optional - default to 0).
/// @param msgDst is the old value of the destination operand.
/// @param msgSrc0 is the first source operand of send message.
/// @param exDesc is the extended message descriptor.
/// @param msgDesc is the message descriptor.
/// @param mask is the predicate to specify enabled channels (optional - default
/// to on).
/// @return the vector value read from memory
template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst,
uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1, int n1,
typename T2, int n2, int N = 16>
__ESIMD_API __ESIMD_NS::simd<T1, n1>
raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
uint32_t exDesc, uint32_t msgDesc, __ESIMD_NS::simd_mask<N> mask = 1) {
constexpr unsigned _Width1 = n1 * sizeof(T1);
static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
constexpr unsigned _Width2 = n2 * sizeof(T2);
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");

using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;

constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
msgSrc0.data(), msgDst.data());
}

/// Raw sends. "s" suffix designates "split" variant - i.e. two sources.
///
/// @param msgSrc0 is the first source operand of send message.
Expand Down Expand Up @@ -169,6 +250,43 @@ raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
msgSrc0.data(), msgSrc1.data());
}

/// Raw sends. "s" suffix designates "split" variant - i.e. two sources.
///
/// @tparam execSize is the execution size.
/// @tparam sfid is the shared function ID.
/// @tparam numSrc0 is the number of GRFs for source-0.
/// @tparam numSrc1 is the number of GRFs for source-1.
/// @tparam isEOT is the flag that indicates whether this is an EOT message
/// (optional - default to 0).
/// @tparam isSendc is the flag that indicates whether sendc should be used
/// (optional - default to 0).
/// @param msgSrc0 is the first source operand of send message.
/// @param msgSrc1 is the second source operand of send message.
/// @param exDesc is the extended message descriptor.
/// @param msgDesc is the message descriptor.
/// @param mask is the predicate to specify enabled channels (optional - default
/// to on).
template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1, int n1,
typename T2, int n2, int N = 16>
__ESIMD_API void raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0,
__ESIMD_NS::simd<T2, n2> msgSrc1, uint32_t exDesc,
uint32_t msgDesc,
__ESIMD_NS::simd_mask<N> mask = 1) {
constexpr unsigned _Width1 = n1 * sizeof(T1);
static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
constexpr unsigned _Width2 = n2 * sizeof(T2);
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");

using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;

constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
__esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
msgSrc0.data(), msgSrc1.data());
}

/// Raw send. Generates a \c send or \c sendc instruction for the message
/// gateway.
///
Expand Down Expand Up @@ -201,6 +319,34 @@ raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
msgSrc0.data());
}

/// Raw send. Generates a \c send or \c sendc instruction for the message
/// gateway.
///
/// @tparam execSize is the execution size.
/// @tparam sfid is the shared function ID.
/// @tparam numSrc0 is the number of GRFs for source-0.
/// @tparam isEOT is the flag that indicates whether this is an EOT message
/// (optional - default to 0).
/// @tparam isSendc is the flag that indicates whether sendc should be used
/// (optional - default to 0).
/// @param msgSrc0 is the first source operand of send message.
/// @param exDesc is the extended message descriptor.
/// @param msgDesc is the message descriptor.
/// @param mask is the predicate to specify enabled channels (optional - default
/// to on).
template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
uint8_t isSendc = 0, typename T1, int n1, int N = 16>
__ESIMD_API void raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc,
uint32_t msgDesc, __ESIMD_NS::simd_mask<N> mask = 1) {
constexpr unsigned _Width1 = n1 * sizeof(T1);
static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
__esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
numSrc0, sfid, exDesc, msgDesc,
msgSrc0.data());
}

/// @} sycl_esimd_raw_send

/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs.
Expand Down Expand Up @@ -2628,9 +2774,8 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
constexpr uint8_t sfid = 0xF;
constexpr uint8_t numSrc0 = 0x1;
constexpr uint8_t numDst = (N * sizeof(T)) / 64;
__ESIMD_NS::simd<T, ActualN> Raw =
raw_send(oldDst, payload.get_raw_data(), exDesc, desc, execSize, sfid,
numSrc0, numDst);
__ESIMD_NS::simd<T, ActualN> Raw = raw_send<execSize, numSrc0, numDst, sfid>(
oldDst, payload.get_raw_data(), exDesc, desc);

if constexpr (ActualN == N) {
return Raw;
Expand Down Expand Up @@ -2693,7 +2838,7 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
constexpr uint8_t execSize = 0x0;
constexpr uint8_t sfid = 0xF;
constexpr uint8_t numDst = (N * sizeof(T)) / 64;
raw_send(payload.get_raw_data(), exDesc, desc, execSize, sfid, numDst);
raw_send<execSize, numDst, sfid>(payload.get_raw_data(), exDesc, desc);
}

/// A variation of \c 2D stateless block store \c with parameters passed as
Expand Down Expand Up @@ -2733,8 +2878,8 @@ lsc_store_2d(config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload,
constexpr uint8_t numSrc0 = 0x1;
constexpr uint8_t numSrc1 = (N * sizeof(T)) / 64;

raw_sends(payload.get_raw_data(), Data, exDesc, desc, execSize, sfid, numSrc0,
numSrc1);
raw_sends<execSize, numSrc0, numSrc1, sfid>(payload.get_raw_data(), Data,
exDesc, desc);
}

/// SLM atomic.
Expand Down
14 changes: 11 additions & 3 deletions sycl/test-e2e/ESIMD/histogram_raw_send.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
//==------------ histogram_raw_send.cpp - DPC++ ESIMD on-device test
//-------==//
//==-histogram_raw_send.cpp - DPC++ ESIMD on-device test-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------===//
// REQUIRES: gpu-intel-gen9
// UNSUPPORTED: gpu-intel-dg1,gpu-intel-dg2,gpu-intel-pvc
// UNSUPPORTED: ze_debug
// RUN: %{build} -o %t1.out
// RUN: %{run} %t1.out
// RUN: %{build} -DUSE_CONSTEXPR_API -o %t2.out
// RUN: %{run} %t2.out

// The test checks raw send functionality with atomic write implementation
// on SKL. It does not work on DG1 due to send instruction incompatibility.
Expand Down Expand Up @@ -85,9 +86,16 @@ ESIMD_INLINE void atomic_write(T *bins, simd<unsigned, n> offset,
constexpr uint8_t numSrc1 = 0x1;
constexpr uint8_t isEOT = 0;
constexpr uint8_t isSendc = 0;

#ifdef USE_CONSTEXPR_API
experimental::esimd::raw_sends<execSize, sfid, numSrc0, numSrc1, numDst,
isEOT, isSendc>(oldDst, vAddr, src0, exDesc,
desc, pred);
#else
experimental::esimd::raw_sends(oldDst, vAddr, src0, exDesc, desc, execSize,
sfid, numSrc0, numSrc1, numDst, isEOT, isSendc,
pred);
#endif
}

int main(int argc, char *argv[]) {
Expand Down
18 changes: 17 additions & 1 deletion sycl/test-e2e/ESIMD/vadd_raw_send.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
// XFAIL: esimd_emulator
// RUN: %{build} -fno-sycl-esimd-force-stateless-mem -o %t1.out
// RUN: %{run} %t1.out

// RUN: %{build} -fno-sycl-esimd-force-stateless-mem -DUSE_CONSTEXPR_API -o %t2.out
// RUN: %{run} %t2.out
// The test checks raw send functionality with block read/write implementation
// on SKL. It does not work on DG1 due to send instruction incompatibility.

Expand Down Expand Up @@ -40,8 +41,13 @@ ESIMD_INLINE simd<T, N> dwaligned_block_read(AccessorTy acc,
constexpr uint8_t sfid = 0x0;
constexpr uint8_t numSrc0 = 0x1;
constexpr uint8_t numDst = 0x2;
#ifdef USE_CONSTEXPR_API
return experimental::esimd::raw_send<execSize, sfid, numSrc0, numDst>(
oldDst, src0, exDesc, desc);
#else
return experimental::esimd::raw_send(oldDst, src0, exDesc, desc, execSize,
sfid, numSrc0, numDst);
#endif
}

template <typename T, int N, typename AccessorTy>
Expand All @@ -57,8 +63,13 @@ ESIMD_INLINE void block_write1(AccessorTy acc, unsigned int offset,
constexpr uint8_t sfid = 0x0;
constexpr uint8_t numSrc0 = 0x1;
constexpr uint8_t numSrc1 = 0x1;
#ifdef USE_CONSTEXPR_API
return experimental::esimd::raw_sends<execSize, sfid, numSrc0, numSrc1>(
src0, data, exDesc, desc);
#else
return experimental::esimd::raw_sends(src0, data, exDesc, desc, execSize,
sfid, numSrc0, numSrc1);
#endif
}

template <typename T, int N, typename AccessorTy>
Expand All @@ -77,8 +88,13 @@ ESIMD_INLINE void block_write2(AccessorTy acc, unsigned int offset,
constexpr uint8_t execSize = 0x83;
constexpr uint8_t sfid = 0x0;
constexpr uint8_t numSrc0 = 0x2;
#ifdef USE_CONSTEXPR_API
return experimental::esimd::raw_send<execSize, sfid, numSrc0>(src0, exDesc,
desc);
#else
return experimental::esimd::raw_send(src0, exDesc, desc, execSize, sfid,
numSrc0);
#endif
}

template <typename T> int test(queue q) {
Expand Down