Skip to content

Commit 960d898

Browse files
authored
[SYCL][ESIMD] Implement unified memory API - block_store(acc,...) (#11830)
This change implements the new compile time properties API for block_store with accessors. I also updated the old E2E test to fix alignment issues, added compile-time tests and updated the naming of the old functions. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 2581123 commit 960d898

File tree

10 files changed

+897
-211
lines changed

10 files changed

+897
-211
lines changed

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,43 @@ __ESIMD_INTRIN void __esimd_lsc_store_stateless(
842842
}
843843
#endif // __SYCL_DEVICE_ONLY__
844844

845+
/// Surface-based scatter.
846+
/// Supported platforms: DG2, PVC
847+
///
848+
/// Scatters elements to surface.
849+
///
850+
/// @tparam Ty is element type.
851+
/// @tparam L1H is L1 cache hint.
852+
/// @tparam L2H is L2 cache hint.
853+
/// @tparam AddressScale is the address scale.
854+
/// @tparam ImmOffset is the immediate offset added to each address.
855+
/// @tparam DS is the data size.
856+
/// @tparam VS is the number of elements to load per address.
857+
/// @tparam Transposed indicates if the data is transposed during the transfer.
858+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
859+
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
860+
/// @param pred is predicates.
861+
/// @param offsets is the zero-based offsets in bytes.
862+
/// @param vals is values to store.
863+
/// @param surf_ind is the surface index.
864+
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
865+
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
866+
__ESIMD_DNS::lsc_vector_size VS,
867+
__ESIMD_DNS::lsc_data_order _Transposed, int N,
868+
typename SurfIndAliasTy>
869+
__ESIMD_INTRIN void __esimd_lsc_store_bti(
870+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
871+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
872+
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals,
873+
SurfIndAliasTy surf_ind)
874+
#ifdef __SYCL_DEVICE_ONLY__
875+
;
876+
#else // __SYCL_DEVICE_ONLY__
877+
{
878+
__ESIMD_UNSUPPORTED_ON_HOST;
879+
}
880+
#endif // __SYCL_DEVICE_ONLY__
881+
845882
// \brief Raw sends.
846883
//
847884
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 423 additions & 40 deletions
Large diffs are not rendered by default.

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -187,43 +187,6 @@ __ESIMD_INTRIN void __esimd_lsc_store_slm(
187187
}
188188
#endif // __SYCL_DEVICE_ONLY__
189189

190-
/// Surface-based scatter.
191-
/// Supported platforms: DG2, PVC
192-
///
193-
/// Scatters elements to surface.
194-
///
195-
/// @tparam Ty is element type.
196-
/// @tparam L1H is L1 cache hint.
197-
/// @tparam L3H is L3 cache hint.
198-
/// @tparam AddressScale is the address scale.
199-
/// @tparam ImmOffset is the immediate offset added to each address.
200-
/// @tparam DS is the data size.
201-
/// @tparam VS is the number of elements to load per address.
202-
/// @tparam Transposed indicates if the data is transposed during the transfer.
203-
/// @tparam N is the SIMD size of operation (the number of addresses to access)
204-
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
205-
/// @param pred is predicates.
206-
/// @param offsets is the zero-based offsets in bytes.
207-
/// @param vals is values to store.
208-
/// @param surf_ind is the surface index.
209-
template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L3H,
210-
uint16_t AddressScale, int ImmOffset, __ESIMD_ENS::lsc_data_size DS,
211-
__ESIMD_EDNS::lsc_vector_size VS,
212-
__ESIMD_EDNS::lsc_data_order _Transposed, int N,
213-
typename SurfIndAliasTy>
214-
__ESIMD_INTRIN void __esimd_lsc_store_bti(
215-
__ESIMD_DNS::simd_mask_storage_t<N> pred,
216-
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
217-
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_EDNS::to_int<VS>()> vals,
218-
SurfIndAliasTy surf_ind)
219-
#ifdef __SYCL_DEVICE_ONLY__
220-
;
221-
#else // __SYCL_DEVICE_ONLY__
222-
{
223-
__ESIMD_UNSUPPORTED_ON_HOST;
224-
}
225-
#endif // __SYCL_DEVICE_ONLY__
226-
227190
/// 2D USM pointer block load.
228191
/// Supported platforms: PVC
229192
///

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 3 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,7 @@ lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
17921792
///
17931793
/// @tparam T is element type.
17941794
/// @tparam NElts is the number of elements to store per address.
1795-
/// @tparam DS is the data size.
1795+
/// @tparam DS is the data size (unused/obsolete).
17961796
/// @tparam L1H is L1 cache hint.
17971797
/// @tparam L3H is L3 cache hint.
17981798
/// @tparam AccessorTy is the \ref sycl::accessor type.
@@ -1815,69 +1815,8 @@ __ESIMD_API std::enable_if_t<
18151815
lsc_block_store(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset,
18161816
__ESIMD_NS::simd<T, NElts> vals,
18171817
__ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1818-
#ifdef __ESIMD_FORCE_STATELESS_MEM
1819-
lsc_block_store<T, NElts, DS, L1H, L3H>(
1820-
__ESIMD_DNS::accessorToPointer<T>(acc, offset), vals, pred, flags);
1821-
#else
1822-
detail::check_lsc_data_size<T, DS>();
1823-
detail::check_lsc_cache_hint<detail::lsc_action::store, L1H, L3H>();
1824-
constexpr auto Alignment =
1825-
FlagsT::template alignment<__ESIMD_DNS::__raw_t<T>>;
1826-
static_assert(
1827-
(Alignment >= __ESIMD_DNS::OperandSize::DWORD && sizeof(T) <= 4) ||
1828-
(Alignment >= __ESIMD_DNS::OperandSize::QWORD && sizeof(T) > 4),
1829-
"Incorrect alignment for the data type");
1830-
// Prepare template arguments for the call of intrinsic.
1831-
constexpr uint16_t _AddressScale = 1;
1832-
constexpr int _ImmOffset = 0;
1833-
constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
1834-
static_assert(_DS == lsc_data_size::u16 || _DS == lsc_data_size::u8 ||
1835-
_DS == lsc_data_size::u32 || _DS == lsc_data_size::u64,
1836-
"Conversion data types are not supported");
1837-
constexpr detail::lsc_data_order _Transposed =
1838-
detail::lsc_data_order::transpose;
1839-
constexpr int N = 1;
1840-
1841-
__ESIMD_NS::simd<uint32_t, N> Offsets = offset;
1842-
auto si = __ESIMD_NS::get_surface_index(acc);
1843-
1844-
constexpr int SmallIntFactor32Bit =
1845-
(_DS == lsc_data_size::u16) ? 2 : (_DS == lsc_data_size::u8 ? 4 : 1);
1846-
static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
1847-
"Number of elements is not supported by Transposed store");
1848-
1849-
constexpr bool Use64BitData =
1850-
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
1851-
(sizeof(T) == 8 ||
1852-
(DS == lsc_data_size::default_size && NElts / SmallIntFactor32Bit > 64 &&
1853-
(NElts * sizeof(T)) % 8 == 0));
1854-
constexpr int SmallIntFactor64Bit =
1855-
(_DS == lsc_data_size::u16)
1856-
? 4
1857-
: (_DS == lsc_data_size::u8 ? 8
1858-
: (_DS == lsc_data_size::u32 ? 2 : 1));
1859-
constexpr int SmallIntFactor =
1860-
Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
1861-
constexpr int FactoredNElts = NElts / SmallIntFactor;
1862-
constexpr lsc_data_size ActualDS = Use64BitData
1863-
? __ESIMD_ENS::lsc_data_size::u64
1864-
: __ESIMD_ENS::lsc_data_size::u32;
1865-
1866-
detail::check_lsc_vector_size<FactoredNElts>();
1867-
constexpr detail::lsc_vector_size _VS =
1868-
detail::to_lsc_vector_size<FactoredNElts>();
1869-
1870-
using StoreType = __ESIMD_DNS::__raw_t<
1871-
std::conditional_t<SmallIntFactor == 1, T,
1872-
std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
1873-
1874-
__esimd_lsc_store_bti<StoreType, L1H, L3H, _AddressScale, _ImmOffset,
1875-
ActualDS, _VS, _Transposed, N>(
1876-
pred.data(), Offsets.data(),
1877-
sycl::bit_cast<__ESIMD_DNS::vector_type_t<StoreType, FactoredNElts>>(
1878-
vals.data()),
1879-
si);
1880-
#endif
1818+
__ESIMD_DNS::block_store_impl<T, NElts, L1H, L3H>(acc, offset, vals, pred,
1819+
flags);
18811820
}
18821821

18831822
template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,

0 commit comments

Comments
 (0)