Skip to content

Commit 1f89573

Browse files
Added SLM gather/scatter for 1- and 2-byte block sizes
1 parent e9a6bf9 commit 1f89573

File tree

2 files changed

+64
-16
lines changed

2 files changed

+64
-16
lines changed

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -649,25 +649,73 @@ inline ESIMD_NODEBUG void esimd_sbarrier(split_barrier_action flag) {
649649
/// Declare per-work-group slm size.
650650
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void slm_init(uint32_t size);
651651

652-
/// SLM gather.
653-
///
654-
/// Only allow simd-16 and simd-32.
655-
template <typename T, int n>
652+
/// SLM gather (version for 4-byte block size).
653+
/// \tparam T element type of the input vector.
654+
/// \tparam N size of the \p offsets , \p pred and returned vectors. Must be 16
655+
/// or 32.
656+
/// @param offsets byte-offsets within the SLM.
657+
/// @param pred predication control used for masking lanes.
658+
/// @return vector of read values of type \p T.
659+
/// \ingroup sycl_esimd
660+
template <typename T, int N>
656661
ESIMD_INLINE ESIMD_NODEBUG
657-
typename sycl::detail::enable_if_t<(n == 16 || n == 32) && (sizeof(T) <= 4),
658-
simd<T, n>>
659-
slm_load(simd<uint32_t, n> offsets, simd<uint16_t, n> pred = 1) {
660-
return __esimd_slm_read<T, n>(offsets.data(), pred.data());
662+
typename sycl::detail::enable_if_t<(N == 16 || N == 32) && (sizeof(T) == 4),
663+
simd<T, N>>
664+
slm_load(simd<uint32_t, N> offsets, simd<uint16_t, N> pred = 1) {
665+
return __esimd_slm_read<T, N>(offsets.data(), pred.data());
661666
}
662667

663-
/// SLM scatter.
664-
template <typename T, int n>
668+
/// SLM gather (version for 1- and 2-byte block size).
669+
/// \tparam T element type of the input vector.
670+
/// \tparam N size of the \p offsets , \p pred and returned vectors. Must be 16
671+
/// or 32.
672+
/// @param offsets byte-offsets within the SLM.
673+
/// @param pred predication control used for masking lanes.
674+
/// @return vector of read values of type \p T.
675+
/// \ingroup sycl_esimd
676+
template <typename T, int N>
677+
ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<
678+
(N == 16 || N == 32) && (sizeof(T) == 1 || sizeof(T) == 2), simd<T, N>>
679+
slm_load(simd<uint32_t, N> offsets, simd<uint16_t, N> pred = 1) {
680+
typedef typename detail::dword_type<T>::type T1;
681+
simd<T1, N> temp = __esimd_slm_read<T1, N>(offsets.data(), pred.data());
682+
simd<T, N> res = temp;
683+
return res;
684+
}
685+
686+
/// SLM scatter (version for 4-byte block size).
687+
/// \tparam T element type of the input vector.
688+
/// \tparam N size of the \p offsets , \p pred and \p vals vectors. Must be 16
689+
/// or 32.
690+
/// @param vals values to be written.
691+
/// @param offsets byte-offsets within the SLM.
692+
/// @param pred predication control used for masking lanes.
693+
/// \ingroup sycl_esimd
694+
template <typename T, int N>
665695
ESIMD_INLINE ESIMD_NODEBUG
666-
typename sycl::detail::enable_if_t<(n == 16 || n == 32) && (sizeof(T) <= 4),
696+
typename sycl::detail::enable_if_t<(N == 16 || N == 32) && (sizeof(T) == 4),
667697
void>
668-
slm_store(simd<T, n> vals, simd<uint32_t, n> offsets,
669-
simd<uint16_t, n> pred = 1) {
670-
__esimd_slm_write<T, n>(offsets.data(), vals.data(), pred.data());
698+
slm_store(simd<T, N> vals, simd<uint32_t, N> offsets,
699+
simd<uint16_t, N> pred = 1) {
700+
__esimd_slm_write<T, N>(offsets.data(), vals.data(), pred.data());
701+
}
702+
703+
/// SLM scatter (version for 1- and 2-byte block size).
704+
/// \tparam T element type of the input vector.
705+
/// \tparam N size of the \p offsets , \p pred and \p vals vectors. Must be 16
706+
/// or 32.
707+
/// @param vals values to be written.
708+
/// @param offsets byte-offsets within the SLM.
709+
/// @param pred predication control used for masking lanes.
710+
/// \ingroup sycl_esimd
711+
template <typename T, int N>
712+
ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<
713+
(N == 16 || N == 32) && (sizeof(T) == 1 || sizeof(T) == 2), void>
714+
slm_store(simd<T, N> vals, simd<uint32_t, N> offsets,
715+
simd<uint16_t, N> pred = 1) {
716+
typedef typename detail::dword_type<T>::type T1;
717+
simd<T1, N> temp = vals;
718+
__esimd_slm_write<T1, N>(offsets.data(), temp.data(), pred.data());
671719
}
672720

673721
/// SLM gather4.

sycl/test/esimd/slm_load.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ void slm_supported_types() __attribute__((sycl_device)) {
2929
auto v3 = slm_load<int, 32>(offsets);
3030
auto v4 = slm_load<float, 32>(offsets);
3131
// expected-error@+2 {{no matching function for call to 'slm_load'}}
32-
// expected-note@sycl/ext/intel/experimental/esimd/memory.hpp:* {{candidate template ignored}}
32+
// expected-note@sycl/ext/intel/experimental/esimd/memory.hpp:* 2 {{candidate template ignored}}
3333
auto v5 = slm_load<double, 32>(offsets);
3434

3535
slm_store<char, 32>(v1, offsets);
@@ -38,6 +38,6 @@ void slm_supported_types() __attribute__((sycl_device)) {
3838
slm_store<float, 32>(v4, offsets);
3939
simd<double, 32> v6(0, 1);
4040
// expected-error@+2 {{no matching function for call to 'slm_store'}}
41-
// expected-note@sycl/ext/intel/experimental/esimd/memory.hpp:* {{candidate template ignored}}
41+
// expected-note@sycl/ext/intel/experimental/esimd/memory.hpp:* 2 {{candidate template ignored}}
4242
slm_store<double, 32>(v6, offsets);
4343
}

0 commit comments

Comments
 (0)