Skip to content

Commit 646ab08

Browse files
authored
[SYCL][ESIMD] Implement unified memory API - block_store slm and local accessors (#11921)
This change implements the last piece for block_store: local accessors and SLM. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent a37c661 commit 646ab08

File tree

11 files changed

+938
-77
lines changed

11 files changed

+938
-77
lines changed

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,39 @@ __ESIMD_INTRIN void __esimd_slm_block_st(uint32_t offset,
157157
}
158158
#endif // __SYCL_DEVICE_ONLY__
159159

160+
/// SLM block_store/scatter.
161+
/// Supported platforms: DG2, PVC
162+
///
163+
/// Scatters elements located to slm.
164+
///
165+
/// @tparam Ty is element type.
166+
/// @tparam L1H is L1 cache hint.
167+
/// @tparam L2H is L2 cache hint.
168+
/// @tparam AddressScale is the address scale.
169+
/// @tparam ImmOffset is the immediate offset added to each address.
170+
/// @tparam DS is the data size.
171+
/// @tparam VS is the number of elements to store per address.
172+
/// @tparam Transposed indicates if the data is transposed during the transfer.
173+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
174+
/// @param pred is predicates.
175+
/// @param offsets is the zero-based offsets for SLM buffer in bytes.
176+
/// @param vals is values to store.
177+
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
178+
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
179+
__ESIMD_DNS::lsc_vector_size VS,
180+
__ESIMD_DNS::lsc_data_order _Transposed, int N>
181+
__ESIMD_INTRIN void __esimd_lsc_store_slm(
182+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
183+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
184+
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals)
185+
#ifdef __SYCL_DEVICE_ONLY__
186+
;
187+
#else // __SYCL_DEVICE_ONLY__
188+
{
189+
__ESIMD_UNSUPPORTED_ON_HOST;
190+
}
191+
#endif // __SYCL_DEVICE_ONLY__
192+
160193
// Read a block of data from SLM at the given offset.
161194
template <typename Ty, int N, size_t Align>
162195
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 279 additions & 6 deletions
Large diffs are not rendered by default.

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -154,39 +154,6 @@ __esimd_lsc_prefetch_stateless(__ESIMD_DNS::simd_mask_storage_t<N> pred,
154154
}
155155
#endif // __SYCL_DEVICE_ONLY__
156156

157-
/// SLM scatter.
158-
/// Supported platforms: DG2, PVC
159-
///
160-
/// Scatters elements located to slm.
161-
///
162-
/// @tparam Ty is element type.
163-
/// @tparam L1H is L1 cache hint.
164-
/// @tparam L3H is L3 cache hint.
165-
/// @tparam AddressScale is the address scale.
166-
/// @tparam ImmOffset is the immediate offset added to each address.
167-
/// @tparam DS is the data size.
168-
/// @tparam VS is the number of elements to load per address.
169-
/// @tparam Transposed indicates if the data is transposed during the transfer.
170-
/// @tparam N is the SIMD size of operation (the number of addresses to access)
171-
/// @param pred is predicates.
172-
/// @param offsets is the zero-based offsets for SLM buffer in bytes.
173-
/// @param vals is values to store.
174-
template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
175-
uint16_t AddressScale, int ImmOffset, __ESIMD_ENS::lsc_data_size DS,
176-
__ESIMD_EDNS::lsc_vector_size VS,
177-
__ESIMD_EDNS::lsc_data_order _Transposed, int N>
178-
__ESIMD_INTRIN void __esimd_lsc_store_slm(
179-
__ESIMD_DNS::simd_mask_storage_t<N> pred,
180-
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
181-
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_EDNS::to_int<VS>()> vals)
182-
#ifdef __SYCL_DEVICE_ONLY__
183-
;
184-
#else // __SYCL_DEVICE_ONLY__
185-
{
186-
__ESIMD_UNSUPPORTED_ON_HOST;
187-
}
188-
#endif // __SYCL_DEVICE_ONLY__
189-
190157
/// 2D USM pointer block load.
191158
/// Supported platforms: PVC
192159
///

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,29 +1516,18 @@ __ESIMD_API void lsc_slm_scatter(__ESIMD_NS::simd<uint32_t, N> offsets,
15161516
///
15171517
/// @tparam T is element type.
15181518
/// @tparam NElts is the number of elements to store per address.
1519-
/// @tparam DS is the data size.
1519+
/// @tparam DS is the data size (unused/obsolete).
15201520
/// @param offset is the zero-based offset for SLM buffer in bytes.
15211521
/// @param vals is values to store.
15221522
///
15231523
template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
15241524
__ESIMD_API void lsc_slm_block_store(uint32_t offset,
15251525
__ESIMD_NS::simd<T, NElts> vals) {
1526-
detail::check_lsc_vector_size<NElts>();
1527-
detail::check_lsc_data_size<T, DS>();
1528-
constexpr uint16_t _AddressScale = 1;
1529-
constexpr int _ImmOffset = 0;
1530-
constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
1531-
static_assert(_DS == lsc_data_size::u32 || _DS == lsc_data_size::u64,
1532-
"Transposed store is supported only for data size u32 or u64");
1533-
constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
1534-
constexpr detail::lsc_data_order _Transposed =
1535-
detail::lsc_data_order::transpose;
1536-
constexpr int N = 1;
1537-
__ESIMD_NS::simd_mask<N> pred = 1;
1538-
__ESIMD_NS::simd<uint32_t, N> offsets = offset;
1539-
__esimd_lsc_store_slm<T, cache_hint::none, cache_hint::none, _AddressScale,
1540-
_ImmOffset, _DS, _VS, _Transposed, N>(
1541-
pred.data(), offsets.data(), vals.data());
1526+
// Make sure we generate an LSC block store
1527+
constexpr size_t DefaultAlignment = sizeof(T) <= 4 ? 4 : sizeof(T);
1528+
__ESIMD_NS::properties Props{__ESIMD_NS::alignment<DefaultAlignment>};
1529+
__ESIMD_NS::simd_mask<1> pred = 1;
1530+
__ESIMD_NS::slm_block_store<T, NElts>(offset, vals, pred, Props);
15421531
}
15431532

15441533
/// USM pointer scatter.

sycl/test-e2e/ESIMD/unified_memory_api/Inputs/block_load.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,8 +793,8 @@ template <typename T, bool TestPVCFeatures> bool testSLM(queue Q) {
793793

794794
constexpr int I32Factor =
795795
std::max(static_cast<int>(sizeof(int) / sizeof(T)), 1);
796-
constexpr size_t ReqiredAlignment = sizeof(T) <= 4 ? 4 : 8;
797-
properties PVCProps{alignment<ReqiredAlignment>};
796+
constexpr size_t RequiredAlignment = sizeof(T) <= 4 ? 4 : 8;
797+
properties PVCProps{alignment<RequiredAlignment>};
798798

799799
// Test block_load() that is available on PVC:
800800
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).

0 commit comments

Comments
 (0)