Skip to content

Commit 656b8be

Browse files
fineg74sarnex
andauthored
[SYCL][ESIMD] Implement accessor based prefetch API that accepting compile time properties (#12878)
Co-authored-by: Nick Sarnie <[email protected]>
1 parent e4d2873 commit 656b8be

File tree

8 files changed

+864
-70
lines changed

8 files changed

+864
-70
lines changed

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,34 @@ __ESIMD_INTRIN void __esimd_lsc_prefetch_stateless(
169169
__ESIMD_DNS::simd_mask_storage_t<N> pred,
170170
__ESIMD_DNS::vector_type_t<uintptr_t, N> addrs) __ESIMD_INTRIN_END;
171171

172+
/// Surface-based prefetch gather.
173+
/// Supported platforms: DG2, PVC
174+
///
175+
/// Prefetches elements located at surface.
176+
///
177+
/// @tparam Ty is element type.
178+
/// @tparam L1H is L1 cache hint.
179+
/// @tparam L2H is L2 cache hint.
180+
/// @tparam AddressScale is the address scale.
181+
/// @tparam ImmOffset is the immediate offset added to each address.
182+
/// @tparam DS is the data size.
183+
/// @tparam VS is the number of elements to load per address.
184+
/// @tparam Transposed indicates if the data is transposed during the transfer.
185+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
186+
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
187+
/// @param pred is predicates.
188+
/// @param offsets is the zero-based offsets in bytes.
189+
/// @param surf_ind is the surface index.
190+
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
191+
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
192+
__ESIMD_DNS::lsc_vector_size VS,
193+
__ESIMD_DNS::lsc_data_order Transposed, int N,
194+
typename SurfIndAliasTy>
195+
__ESIMD_INTRIN void
196+
__esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
197+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
198+
SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
199+
172200
// Read a block of data from SLM at the given offset.
173201
template <typename Ty, int N, size_t Align>
174202
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 460 additions & 0 deletions
Large diffs are not rendered by default.

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -62,34 +62,6 @@ __ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
6262
__ESIMD_DNS::vector_type_t<Ty, N> msg_var,
6363
uint16_t pred = 1) __ESIMD_INTRIN_END;
6464

65-
/// Surface-based prefetch gather.
66-
/// Supported platforms: DG2, PVC
67-
///
68-
/// Prefetches elements located at surface.
69-
///
70-
/// @tparam Ty is element type.
71-
/// @tparam L1H is L1 cache hint.
72-
/// @tparam L2H is L2 cache hint.
73-
/// @tparam AddressScale is the address scale.
74-
/// @tparam ImmOffset is the immediate offset added to each address.
75-
/// @tparam DS is the data size.
76-
/// @tparam VS is the number of elements to load per address.
77-
/// @tparam Transposed indicates if the data is transposed during the transfer.
78-
/// @tparam N is the SIMD size of operation (the number of addresses to access)
79-
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
80-
/// @param pred is predicates.
81-
/// @param offsets is the zero-based offsets in bytes.
82-
/// @param surf_ind is the surface index.
83-
template <typename Ty, __ESIMD_ENS::cache_hint L1H, __ESIMD_ENS::cache_hint L2H,
84-
uint16_t AddressScale, int ImmOffset, __ESIMD_ENS::lsc_data_size DS,
85-
__ESIMD_EDNS::lsc_vector_size VS,
86-
__ESIMD_EDNS::lsc_data_order _Transposed, int N,
87-
typename SurfIndAliasTy>
88-
__ESIMD_INTRIN void
89-
__esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
90-
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
91-
SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
92-
9365
/// 2D USM pointer block load.
9466
/// Supported platforms: PVC
9567
///

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,30 +1293,13 @@ template <typename T, int NElts = 1,
12931293
__ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
12941294
AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
12951295
lsc_prefetch(AccessorTy acc,
1296-
#ifdef __ESIMD_FORCE_STATELESS_MEM
1297-
__ESIMD_NS::simd<uint64_t, N> offsets,
1298-
#else
1299-
__ESIMD_NS::simd<uint32_t, N> offsets,
1300-
#endif
1296+
__ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
13011297
__ESIMD_NS::simd_mask<N> pred = 1) {
13021298
#ifdef __ESIMD_FORCE_STATELESS_MEM
1303-
return lsc_prefetch<T, NElts, DS, L1H, L2H>(
1304-
__ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
1299+
lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1300+
offsets, pred);
13051301
#else
1306-
detail::check_lsc_vector_size<NElts>();
1307-
detail::check_lsc_data_size<T, DS>();
1308-
detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
1309-
constexpr uint16_t _AddressScale = 1;
1310-
constexpr int _ImmOffset = 0;
1311-
constexpr lsc_data_size _DS =
1312-
detail::expand_data_size(detail::finalize_data_size<T, DS>());
1313-
constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
1314-
constexpr detail::lsc_data_order _Transposed =
1315-
detail::lsc_data_order::nontranspose;
1316-
using MsgT = typename detail::lsc_expand_type<T>::type;
1317-
auto si = __ESIMD_NS::get_surface_index(acc);
1318-
__esimd_lsc_prefetch_bti<MsgT, L1H, L2H, _AddressScale, _ImmOffset, _DS, _VS,
1319-
_Transposed, N>(pred.data(), offsets.data(), si);
1302+
__ESIMD_DNS::prefetch_impl<T, NElts, DS, L1H, L2H>(acc, offsets, pred);
13201303
#endif
13211304
}
13221305

@@ -1362,24 +1345,8 @@ lsc_prefetch(AccessorTy acc, __ESIMD_DNS::DeviceAccessorOffsetT offset) {
13621345
lsc_prefetch<T, NElts, DS, L1H, L2H>(
13631346
__ESIMD_DNS::accessorToPointer<T>(acc, offset));
13641347
#else
1365-
detail::check_lsc_vector_size<NElts>();
1366-
detail::check_lsc_data_size<T, DS>();
1367-
detail::check_lsc_cache_hint<detail::lsc_action::prefetch, L1H, L2H>();
1368-
constexpr uint16_t _AddressScale = 1;
1369-
constexpr int _ImmOffset = 0;
1370-
constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
1371-
static_assert(
1372-
_DS == lsc_data_size::u32 || _DS == lsc_data_size::u64,
1373-
"Transposed prefetch is supported only for data size u32 or u64");
1374-
constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
1375-
constexpr detail::lsc_data_order _Transposed =
1376-
detail::lsc_data_order::transpose;
1377-
constexpr int N = 1;
1378-
__ESIMD_NS::simd_mask<N> pred = 1;
1379-
__ESIMD_NS::simd<uint32_t, N> offsets = offset;
1380-
auto si = __ESIMD_NS::get_surface_index(acc);
1381-
__esimd_lsc_prefetch_bti<T, L1H, L2H, _AddressScale, _ImmOffset, _DS, _VS,
1382-
_Transposed, N>(pred.data(), offsets.data(), si);
1348+
__ESIMD_NS::simd_mask<1> Mask = 1;
1349+
__ESIMD_DNS::prefetch_impl<T, NElts, DS, L1H, L2H>(acc, offset, Mask);
13831350
#endif
13841351
}
13851352

0 commit comments

Comments
 (0)