Skip to content

Commit 91e8d45

Browse files
committed
[ESIMD] Unified memory API - part2 - block_load(acc, ...)
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent abe3efb commit 91e8d45

File tree

11 files changed

+1179
-536
lines changed

11 files changed

+1179
-536
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ class ESIMDIntrinDescTable {
534534
{"lsc_load_merge_bti",
535535
{"lsc.load.merge.bti",
536536
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
537-
t8(6), t8(7), c8(0), a(1), aSI(2), a(2)}}},
537+
t8(6), t8(7), c8(0), a(1), aSI(2), a(3)}}},
538538
{"lsc_load_stateless",
539539
{"lsc.load.stateless",
540540
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
@@ -1422,7 +1422,7 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
14221422
GenXArgs.erase(GenXArgs.begin());
14231423
}
14241424

1425-
CallInst *NewCI = IntrinsicInst::Create(
1425+
CallInst *NewCI = IntrinsicInst::Create( // xxyy
14261426
NewFDecl, GenXArgs,
14271427
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
14281428
&CI);

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,64 @@ __esimd_svm_block_st(__ESIMD_DNS::vector_type_t<Ty, N> *addr,
182182
}
183183
#endif // __SYCL_DEVICE_ONLY__
184184

185+
/// Surface-based gather.
186+
/// Supported platforms: DG2, PVC
187+
///
188+
/// Collects elements located at surface and returns them
189+
/// as a single \ref simd object.
190+
///
191+
/// @tparam T is element type.
192+
/// @tparam L1H is L1 cache hint.
193+
/// @tparam L2H is L2 cache hint.
194+
/// @tparam AddressScale is the address scale.
195+
/// @tparam ImmOffset is the immediate offset added to each address.
196+
/// @tparam DS is the data size.
197+
/// @tparam VS is the number of elements to load per address.
198+
/// @tparam Transposed indicates if the data is transposed during the transfer.
199+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
200+
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
201+
/// @param pred is predicates.
202+
/// @param offsets is the zero-based offsets in bytes.
203+
/// @param surf_ind is the surface index.
204+
/// @param OldValues contains the vector which elements are copied
205+
/// to the returned result when the corresponding element of \p pred is 0.
206+
/// @return is a vector of type T and N * to_int<VS>()
207+
template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
208+
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
209+
__ESIMD_DNS::lsc_vector_size VS,
210+
__ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
211+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
212+
__esimd_lsc_load_merge_bti(
213+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
214+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets, SurfIndAliasT surf_ind,
215+
__ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()> OldValues = 0)
216+
#ifdef __SYCL_DEVICE_ONLY__
217+
;
218+
#else // __SYCL_DEVICE_ONLY__
219+
{
220+
__ESIMD_UNSUPPORTED_ON_HOST;
221+
}
222+
#endif // __SYCL_DEVICE_ONLY__
223+
224+
/// Similar to __esimd_lsc_load_merge_bti(), but the argument OldValues is not
225+
/// explicitly specified, which results into random values in those elements of
226+
/// the returned result for which the corresponding element in \p pred is 0.
227+
template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
228+
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
229+
__ESIMD_DNS::lsc_vector_size VS,
230+
__ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
231+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
232+
__esimd_lsc_load_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
233+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
234+
SurfIndAliasT surf_ind)
235+
#ifdef __SYCL_DEVICE_ONLY__
236+
;
237+
#else // __SYCL_DEVICE_ONLY__
238+
{
239+
__ESIMD_UNSUPPORTED_ON_HOST;
240+
}
241+
#endif // __SYCL_DEVICE_ONLY__
242+
185243
// flat_read4 does flat-address gather4
186244
template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
187245
__ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
@@ -223,7 +281,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
223281
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
224282
// @tparam Scale - offset scaling factor; must be zero currently
225283
// @tparam L1H - L1 cache hint
226-
// @tparam L3H - L3 cache hint
284+
// @tparam L2H - L2 cache hint
227285
//
228286
// Formal parameters:
229287
// @param surf_ind - the surface index, taken from the SYCL memory object
@@ -260,7 +318,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
260318
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
261319
// @tparam Scale - offset scale; only 0 is supported for now
262320
// @tparam L1H - L1 cache hint
263-
// @tparam L3H - L3 cache hint
321+
// @tparam L2H - L2 cache hint
264322
//
265323
// Formal parameters:
266324
// @param pred - per-element predicates; elements with zero corresponding
@@ -589,7 +647,7 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
589647
///
590648
/// @tparam Ty is element type.
591649
/// @tparam L1H is L1 cache hint.
592-
/// @tparam L3H is L3 cache hint.
650+
/// @tparam L2H is L2 cache hint.
593651
/// @tparam AddressScale is the address scale.
594652
/// @tparam ImmOffset is the immediate offset added to each address.
595653
/// @tparam DS is the data size.
@@ -601,7 +659,7 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
601659
/// @param old_values is the vector of values copied to the result when the
602660
/// corresponding element in \p pred is unset.
603661
/// @return is a vector of type T and N * to_int<VS>()
604-
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L3H,
662+
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
605663
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
606664
__ESIMD_DNS::lsc_vector_size VS,
607665
__ESIMD_DNS::lsc_data_order Transposed, int N>
@@ -627,7 +685,7 @@ __esimd_lsc_load_merge_stateless(
627685
///
628686
/// @tparam Ty is element type.
629687
/// @tparam L1H is L1 cache hint.
630-
/// @tparam L3H is L3 cache hint.
688+
/// @tparam L2H is L2 cache hint.
631689
/// @tparam AddressScale is the address scale.
632690
/// @tparam ImmOffset is the immediate offset added to each address.
633691
/// @tparam DS is the data size.
@@ -637,7 +695,7 @@ __esimd_lsc_load_merge_stateless(
637695
/// @param pred is predicates.
638696
/// @param addrs is the load addresses.
639697
/// @return is a vector of type T and N * to_int<VS>()
640-
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L3H,
698+
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
641699
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
642700
__ESIMD_DNS::lsc_vector_size VS,
643701
__ESIMD_DNS::lsc_data_order Transposed, int N>

0 commit comments

Comments
 (0)