Skip to content

[ESIMD] Implement unified memory API - part2 - block_load(acc, ...) #11545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ class ESIMDIntrinDescTable {
{"lsc_load_merge_bti",
{"lsc.load.merge.bti",
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
t8(6), t8(7), c8(0), a(1), aSI(2), a(2)}}},
t8(6), t8(7), c8(0), a(1), aSI(2), a(3)}}},
{"lsc_load_stateless",
{"lsc.load.stateless",
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
Expand Down
75 changes: 66 additions & 9 deletions sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,64 @@ __esimd_svm_block_st(__ESIMD_DNS::vector_type_t<Ty, N> *addr,
}
#endif // __SYCL_DEVICE_ONLY__

/// Surface-based gather.
/// Supported platforms: DG2, PVC
///
/// Collects elements located at surface and returns them
/// as a single \ref simd object.
///
/// @tparam T is element type.
/// @tparam L1H is L1 cache hint.
/// @tparam L2H is L2 cache hint.
/// @tparam AddressScale is the address scale.
/// @tparam ImmOffset is the immediate offset added to each address.
/// @tparam DS is the data size.
/// @tparam VS is the number of elements to load per address.
/// @tparam Transposed indicates if the data is transposed during the transfer.
/// @tparam N is the SIMD size of operation (the number of addresses to access)
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
/// @param pred is predicates.
/// @param offsets is the zero-based offsets in bytes.
/// @param surf_ind is the surface index.
/// @param PassThru contains the vector which elements are copied
/// to the returned result when the corresponding element of \p pred is 0.
/// @return is a vector of type T and N * to_int<VS>()
template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
__ESIMD_DNS::lsc_vector_size VS,
__ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
__esimd_lsc_load_merge_bti(
__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets, SurfIndAliasT surf_ind,
__ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()> PassThru = 0)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
__ESIMD_UNSUPPORTED_ON_HOST;
}
#endif // __SYCL_DEVICE_ONLY__

/// Similar to __esimd_lsc_load_merge_bti(), but the argument PassThru is not
/// explicitly specified, which results into random values in those elements of
/// the returned result for which the corresponding element in \p pred is 0.
template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
__ESIMD_DNS::lsc_vector_size VS,
__ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
__esimd_lsc_load_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
SurfIndAliasT surf_ind)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
{
__ESIMD_UNSUPPORTED_ON_HOST;
}
#endif // __SYCL_DEVICE_ONLY__

// flat_read4 does flat-address gather4
template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
__ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
Expand Down Expand Up @@ -223,7 +281,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
// @tparam Scale - offset scaling factor; must be zero currently
// @tparam L1H - L1 cache hint
// @tparam L3H - L3 cache hint
// @tparam L2H - L2 cache hint
//
// Formal parameters:
// @param surf_ind - the surface index, taken from the SYCL memory object
Expand Down Expand Up @@ -260,7 +318,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
// @tparam Scale - offset scale; only 0 is supported for now
// @tparam L1H - L1 cache hint
// @tparam L3H - L3 cache hint
// @tparam L2H - L2 cache hint
//
// Formal parameters:
// @param pred - per-element predicates; elements with zero corresponding
Expand Down Expand Up @@ -589,7 +647,7 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
///
/// @tparam Ty is element type.
/// @tparam L1H is L1 cache hint.
/// @tparam L3H is L3 cache hint.
/// @tparam L2H is L2 cache hint.
/// @tparam AddressScale is the address scale.
/// @tparam ImmOffset is the immediate offset added to each address.
/// @tparam DS is the data size.
Expand All @@ -598,19 +656,18 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
/// @tparam N is the SIMD size of operation (the number of addresses to access)
/// @param pred is predicates.
/// @param addrs is the load addresses.
/// @param old_values is the vector of values copied to the result when the
/// @param pass_thru is the vector of values copied to the result when the
/// corresponding element in \p pred is unset.
/// @return is a vector of type T and N * to_int<VS>()
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L3H,
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
__ESIMD_DNS::lsc_vector_size VS,
__ESIMD_DNS::lsc_data_order Transposed, int N>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
__esimd_lsc_load_merge_stateless(
__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> old_values =
0)
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> pass_thru = 0)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // __SYCL_DEVICE_ONLY__
Expand All @@ -627,7 +684,7 @@ __esimd_lsc_load_merge_stateless(
///
/// @tparam Ty is element type.
/// @tparam L1H is L1 cache hint.
/// @tparam L3H is L3 cache hint.
/// @tparam L2H is L2 cache hint.
/// @tparam AddressScale is the address scale.
/// @tparam ImmOffset is the immediate offset added to each address.
/// @tparam DS is the data size.
Expand All @@ -637,7 +694,7 @@ __esimd_lsc_load_merge_stateless(
/// @param pred is predicates.
/// @param addrs is the load addresses.
/// @return is a vector of type T and N * to_int<VS>()
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L3H,
template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
__ESIMD_DNS::lsc_vector_size VS,
__ESIMD_DNS::lsc_data_order Transposed, int N>
Expand Down
Loading