Skip to content

Commit 6c75e0e

Browse files
authored
[ESIMD] Add lsc_slm_block_load() with merging semantics (#8552)
The new lsc_slm_block_load() has an additional operand 'old_values' that contains the values returned from the function if the predicate passed to it is 0. The corresponding LIT test: intel/llvm-test-suite#1637 Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 27ec896 commit 6c75e0e

File tree

1 file changed

+50
-11
lines changed
  • sycl/include/sycl/ext/intel/experimental/esimd

1 file changed

+50
-11
lines changed

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -493,26 +493,65 @@ lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
493493
/// @tparam NElts is the number of elements to load per address.
494494
/// @tparam DS is the data size.
495495
/// @param offset is the zero-based offset for SLM buffer in bytes.
496+
/// @param pred is the predicate; if it contains 0, then the actual load
497+
/// is not performed and the returned value is undefined.
496498
/// @return is a vector of type T and size NElts
497499
///
498500
template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
499-
__ESIMD_API __ESIMD_NS::simd<T, NElts> lsc_slm_block_load(uint32_t offset) {
501+
__ESIMD_API __ESIMD_NS::simd<T, NElts>
502+
lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred = 1) {
500503
detail::check_lsc_vector_size<NElts>();
501504
detail::check_lsc_data_size<T, DS>();
502-
constexpr uint16_t _AddressScale = 1;
503-
constexpr int _ImmOffset = 0;
504-
constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
505-
static_assert(_DS == lsc_data_size::u32 || _DS == lsc_data_size::u64,
505+
constexpr uint16_t AddressScale = 1;
506+
constexpr int ImmOffset = 0;
507+
constexpr lsc_data_size FDS = detail::finalize_data_size<T, DS>();
508+
static_assert(FDS == lsc_data_size::u32 || FDS == lsc_data_size::u64,
506509
"Transposed load is supported only for data size u32 or u64");
507-
constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
508-
constexpr detail::lsc_data_order _Transposed =
509-
detail::lsc_data_order::transpose;
510+
constexpr detail::lsc_vector_size VS = detail::to_lsc_vector_size<NElts>();
511+
constexpr auto Transposed = detail::lsc_data_order::transpose;
510512
constexpr int N = 1;
511-
__ESIMD_NS::simd_mask<N> pred = 1;
512513
__ESIMD_NS::simd<uint32_t, N> offsets = offset;
513514
return __esimd_lsc_load_slm<T, cache_hint::none, cache_hint::none,
514-
_AddressScale, _ImmOffset, _DS, _VS, _Transposed,
515-
N>(pred.data(), offsets.data());
515+
AddressScale, ImmOffset, FDS, VS, Transposed, N>(
516+
pred.data(), offsets.data());
517+
}
518+
519+
/// Transposed SLM gather with 1 channel.
520+
/// Supported platforms: DG2, PVC
521+
/// VISA instruction: lsc_load.slm
522+
///
523+
/// Collects elements located at slm and returns them
524+
/// as a single \ref simd object.
525+
///
526+
/// @tparam T is element type.
527+
/// @tparam NElts is the number of elements to load per address.
528+
/// @tparam DS is the data size.
529+
/// @param offset is the zero-based offset for SLM buffer in bytes.
530+
/// @param pred is the predicate; if it contains 0, then the actual load
531+
/// is not performed and \p old_values is returned.
532+
/// @param old_values contains the vector that is returned if
533+
/// the parameter \p pred contains 0.
534+
/// @return is a vector of type T and size NElts.
535+
///
536+
template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
537+
__ESIMD_API __ESIMD_NS::simd<T, NElts>
538+
lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
539+
__ESIMD_NS::simd<T, NElts> old_values) {
540+
detail::check_lsc_vector_size<NElts>();
541+
detail::check_lsc_data_size<T, DS>();
542+
constexpr uint16_t AddressScale = 1;
543+
constexpr int ImmOffset = 0;
544+
constexpr lsc_data_size FDS = detail::finalize_data_size<T, DS>();
545+
static_assert(FDS == lsc_data_size::u32 || FDS == lsc_data_size::u64,
546+
"Transposed load is supported only for data size u32 or u64");
547+
constexpr detail::lsc_vector_size VS = detail::to_lsc_vector_size<NElts>();
548+
constexpr auto Transposed = detail::lsc_data_order::transpose;
549+
constexpr int N = 1;
550+
__ESIMD_NS::simd<uint32_t, N> offsets = offset;
551+
return __esimd_lsc_load_merge_slm<T, cache_hint::none, cache_hint::none,
552+
AddressScale, ImmOffset, FDS, VS,
553+
Transposed, N>(pred.data(), offsets.data(),
554+
old_values.data());
516555
}
517556

518557
/// USM pointer gather.

0 commit comments

Comments
 (0)