Skip to content

Commit d286f4a

Browse files
fineg74v-klochkov
andauthored
[SYCL][ESIMD]Implement gather(lacc) accepting compile time properties (#12533)
This implements the unified memory API for gather with local accessors --------- Co-authored-by: Vyacheslav Klochkov <[email protected]>
1 parent 62c86d7 commit d286f4a

File tree

5 files changed

+717
-8
lines changed

5 files changed

+717
-8
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 314 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3043,7 +3043,7 @@ gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
30433043
/// simd<T, N> gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
30443044
/// simd_mask<N / VS> mask,
30453045
/// PropertyListT props = {}); // (acc-ga-2)
3046-
/// Supported platforms: DG2, PVC in most cases. The DG2/PVC is not required if
3046+
/// Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
30473047
/// VS == 1 and no L1/L2 cache hints used and sizeof(T) <= 4 and N = {1,8,16,32}
30483048
///
30493049
/// Loads ("gathers") elements of the type 'T' from memory locations addressed
@@ -3111,7 +3111,7 @@ gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
31113111
/// typename PropertyListT = empty_properties_t>
31123112
/// simd<T, N> gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
31133113
/// PropertyListT props = {}); // (acc-ga-3)
3114-
/// Supported platforms: DG2, PVC in most cases. The DG2/PVC is not required if
3114+
/// Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
31153115
/// VS == 1 and no L1/L2 cache hints used and sizeof(T) <= 4 and N = {1,8,16,32}
31163116
///
31173117
/// Loads ("gathers") elements of the type 'T' from memory locations addressed
@@ -7389,6 +7389,317 @@ __ESIMD_API
73897389
flags);
73907390
}
73917391

7392+
/// Variant of gather that uses local accessor as a parameter
7393+
/// template <typename T, int N, int VS, typename AccessorT,
7394+
/// typename PropertyListT = empty_properties_t>
7395+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7396+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
7397+
/// PropertyListT props = {}); // (lacc-ga-1)
7398+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7399+
/// simd_mask<N / VS> mask,
7400+
/// PropertyListT props = {}); // (lacc-ga-2)
7401+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7402+
/// PropertyListT props = {}); // (lacc-ga-3)
7403+
///
7404+
/// The next 3 functions are similar to (lacc-ga-1,2,3), but they don't have
7405+
/// the template parameter 'VS'. These functions are added for convenience and
7406+
/// to make it possible for the user to omit the template parameters T and N,
7407+
/// e.g. 'auto res = gather(acc, byte_offsets);
7408+
/// template <typename T, int N, typename AccessorT,
7409+
/// typename PropertyListT = empty_properties_t>
7410+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7411+
/// simd_mask<N> mask, simd<T, N> pass_thru,
7412+
/// PropertyListT props = {}); // (lacc-ga-4)
7413+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7414+
/// simd_mask<N> mask, PropertyListT props = {});//(lacc-ga-5)
7415+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7416+
/// PropertyListT props = {}); // (lacc-ga-6)
7417+
///
7418+
/// The next 3 functions are similar to (lacc-ga-1,2,3), but accept the
7419+
/// \p byte_offsets as a \c simd_view argument:
7420+
/// template <typename T, int N, int VS = 1, typename AccessorT,
7421+
/// typename OffsetSimdViewT,
7422+
// typename PropertyListT = empty_properties_t>
7423+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7424+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
7425+
/// PropertyListT props = {}); // (lacc-ga-7)
7426+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7427+
/// simd_mask<N / VS> mask,
7428+
/// PropertyListT props = {}); // (lacc-ga-8)
7429+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7430+
/// PropertyListT props = {}); // (lacc-ga-9)
7431+
7432+
/// template <typename T, int N, int VS, typename AccessorT,
7433+
/// typename PropertyListT = empty_properties_t>
7434+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7435+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
7436+
/// PropertyListT props = {}); // (lacc-ga-1)
7437+
/// Supported platforms: DG2, PVC only - Temporary restriction for the variant
7438+
/// with pass_thru operand. The only exception: DG2/PVC is not required if
7439+
/// the __ESIMD_GATHER_SCATTER_LLVM_IR macro is used.
7440+
///
7441+
/// Loads ("gathers") elements of the type 'T' from memory locations addressed
7442+
/// by the local accessor \p acc and byte offsets \p byte_offsets, and returns
7443+
/// the loaded elements.
7444+
/// Access to any element's memory location can be disabled via the input vector
7445+
/// of predicates \p mask. If mask[i] is unset, then the load from
7446+
/// (acc + byte_offsets[i]) is skipped and the corresponding i-th element from
7447+
/// \p pass_thru operand is returned.
7448+
/// @tparam T Element type.
7449+
/// @tparam N Number of elements to read.
7450+
/// @tparam VS Vector size. It can also be read as the number of reads per each
7451+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
7452+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
7453+
/// @param acc Accessor referencing the data to load.
7454+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
7455+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
7456+
/// If the alignment property is not passed, then it is assumed that each
7457+
/// accessed address is aligned by element-size.
7458+
/// @param mask The access mask.
7459+
/// @param pass_thru The vector pass through values.
7460+
/// @param props The optional compile-time properties. Only 'alignment'
7461+
/// property is used.
7462+
/// @return A vector of elements read.
7463+
template <typename T, int N, int VS, typename AccessorT,
7464+
typename PropertyListT =
7465+
ext::oneapi::experimental::detail::empty_properties_t>
7466+
__ESIMD_API std::enable_if_t<
7467+
(detail::is_local_accessor_with_v<AccessorT,
7468+
detail::accessor_mode_cap::can_read> &&
7469+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7470+
simd<T, N>>
7471+
gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7472+
simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}) {
7473+
return slm_gather<T, N, VS>(byte_offsets +
7474+
__ESIMD_DNS::localAccessorToOffset(acc),
7475+
mask, pass_thru, props);
7476+
}
7477+
7478+
/// template <typename T, int N, int VS, typename AccessorT,
7479+
/// typename PropertyListT = empty_properties_t>
7480+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7481+
/// simd_mask<N / VS> mask,
7482+
/// PropertyListT props = {}); // (lacc-ga-2)
7483+
/// Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
7484+
/// VS == 1 and the __ESIMD_GATHER_SCATTER_LLVM_IR macro is used or sizeof(T) <=
7485+
/// 4 and N = {1,2,4,8,16,32}
7486+
///
7487+
/// Loads ("gathers") elements of the type 'T' from memory locations addressed
7488+
/// by the local accessor \p acc and byte offsets \p byte_offsets, and returns
7489+
/// the loaded elements.
7490+
/// Access to any element's memory location can be disabled via the input vector
7491+
/// of predicates \p mask. If mask[i] is unset, then the load from
7492+
/// (acc + byte_offsets[i]) is skipped and the corresponding i-th element of
7493+
/// the returned vector is undefined.
7494+
/// @tparam T Element type.
7495+
/// @tparam N Number of elements to read.
7496+
/// @tparam VS Vector size. It can also be read as the number of reads per each
7497+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
7498+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
7499+
/// @param acc Accessor referencing the data to load.
7500+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
7501+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
7502+
/// If the alignment property is not passed, then it is assumed that each
7503+
/// accessed address is aligned by element-size.
7504+
/// @param mask The access mask.
7505+
/// @param props The optional compile-time properties. Only 'alignment'
7506+
/// property is used.
7507+
/// @return A vector of elements read.
7508+
template <typename T, int N, int VS, typename AccessorT,
7509+
typename PropertyListT =
7510+
ext::oneapi::experimental::detail::empty_properties_t>
7511+
__ESIMD_API std::enable_if_t<
7512+
(detail::is_local_accessor_with_v<AccessorT,
7513+
detail::accessor_mode_cap::can_read> &&
7514+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7515+
simd<T, N>>
7516+
gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7517+
simd_mask<N / VS> mask, PropertyListT props = {}) {
7518+
return slm_gather<T, N, VS>(
7519+
byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), mask, props);
7520+
}
7521+
7522+
/// template <typename T, int N, int VS, typename AccessorT,
7523+
/// typename PropertyListT = empty_properties_t>
7524+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7525+
/// PropertyListT props = {}); // (lacc-ga-3)
7526+
/// Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
7527+
/// VS == 1 and the __ESIMD_GATHER_SCATTER_LLVM_IR macro is used or sizeof(T) <=
7528+
/// 4 and N = {1,2,4,8,16,32}
7529+
///
7530+
/// Loads ("gathers") elements of the type 'T' from memory locations addressed
7531+
/// by the local accessor \p acc and byte offsets \p byte_offsets, and returns
7532+
/// the loaded elements.
7533+
/// @tparam T Element type.
7534+
/// @tparam N Number of elements to read.
7535+
/// @tparam VS Vector size. It can also be read as the number of reads per each
7536+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
7537+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
7538+
/// @param acc Accessor referencing the data to load.
7539+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
7540+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
7541+
/// If the alignment property is not passed, then it is assumed that each
7542+
/// accessed address is aligned by element-size.
7543+
/// @param props The optional compile-time properties. Only 'alignment'
7544+
/// and cache hint properties are used.
7545+
/// @return A vector of elements read.
7546+
template <typename T, int N, int VS, typename AccessorT,
7547+
typename PropertyListT =
7548+
ext::oneapi::experimental::detail::empty_properties_t>
7549+
__ESIMD_API std::enable_if_t<
7550+
(detail::is_local_accessor_with_v<AccessorT,
7551+
detail::accessor_mode_cap::can_read> &&
7552+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7553+
simd<T, N>>
7554+
gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7555+
PropertyListT props = {}) {
7556+
return slm_gather<T, N, VS>(
7557+
byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), props);
7558+
}
7559+
7560+
/// template <typename T, int N, typename AccessorT,
7561+
/// typename PropertyListT = empty_properties_t>
7562+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7563+
/// simd_mask<N> mask, simd<T, N> pass_thru,
7564+
/// PropertyListT props = {}); // (lacc-ga-4)
7565+
/// This function is identical to (lacc-ga-1) except that vector size is fixed
7566+
/// to 1. This variant is added for convenience and lets the user omit the
7567+
/// template arguments and call the function as 'gather(acc, byte_offsets, mask,
7568+
/// pass_thru);'.
7569+
// Dev note: the mask type was turned into template parameter `MaskT` to
7570+
// avoid the conflicts of this prototype with the old gather() function
7571+
// accepting a 'global_offset' parameter and avoid 'ambiguous call' errors
7572+
// for calls like this: gather(acc, byte_offsets_simd, 0, mask);
7573+
template <typename T, int N, typename AccessorT, typename MaskT,
7574+
typename PropertyListT =
7575+
ext::oneapi::experimental::detail::empty_properties_t>
7576+
__ESIMD_API std::enable_if_t<
7577+
(detail::is_local_accessor_with_v<AccessorT,
7578+
detail::accessor_mode_cap::can_read> &&
7579+
std::is_same_v<MaskT, simd_mask<N>> &&
7580+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7581+
simd<T, N>>
7582+
gather(AccessorT acc, simd<uint32_t, N> byte_offsets, MaskT mask,
7583+
simd<T, N> pass_thru, PropertyListT props = {}) {
7584+
return slm_gather<T, N>(byte_offsets +
7585+
__ESIMD_DNS::localAccessorToOffset(acc),
7586+
mask, pass_thru, props);
7587+
}
7588+
7589+
/// template <typename T, int N, typename AccessorT,
7590+
/// typename PropertyListT = empty_properties_t>
7591+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7592+
/// simd_mask<N> mask, PropertyListT props // (lacc-ga-5)
7593+
/// This function is identical to (lacc-ga-2) except that vector size is fixed
7594+
/// to 1. This variant is added for convenience and let user omit the template
7595+
/// arguments and call the function as 'gather(acc, byte_offsets, mask);'.
7596+
// Dev note: the mask type was turned into template parameter `MaskT` to
7597+
// avoid the conflicts of this prototype with the old gather() function
7598+
// accepting a 'global_offset' parameter and avoid 'ambiguous call' errors
7599+
// for calls like this: gather(acc, byte_offsets_simd, 0);
7600+
template <typename T, int N, typename AccessorT, typename MaskT,
7601+
typename PropertyListT =
7602+
ext::oneapi::experimental::detail::empty_properties_t>
7603+
__ESIMD_API std::enable_if_t<
7604+
(detail::is_local_accessor_with_v<AccessorT,
7605+
detail::accessor_mode_cap::can_read> &&
7606+
std::is_same_v<MaskT, simd_mask<N>> &&
7607+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7608+
simd<T, N>>
7609+
gather(AccessorT acc, simd<uint32_t, N> byte_offsets, MaskT mask,
7610+
PropertyListT props = {}) {
7611+
return slm_gather<T, N>(
7612+
byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), mask, props);
7613+
}
7614+
7615+
/// template <typename T, int N, typename AccessorT,
7616+
/// typename PropertyListT = empty_properties_t>
7617+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7618+
/// PropertyListT props = {}); // (lacc-ga-6)
7619+
/// This function is identical to (lacc-ga-3) except that vector size is fixed
7620+
/// to 1. This variant is added for convenience and let user omit the template
7621+
/// arguments and call the function as 'gather(acc, byte_offsets);'.
7622+
template <typename T, int N, typename AccessorT,
7623+
typename PropertyListT =
7624+
ext::oneapi::experimental::detail::empty_properties_t>
7625+
__ESIMD_API std::enable_if_t<
7626+
(detail::is_local_accessor_with_v<AccessorT,
7627+
detail::accessor_mode_cap::can_read> &&
7628+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7629+
simd<T, N>>
7630+
gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7631+
PropertyListT props = {}) {
7632+
return slm_gather<T, N>(
7633+
byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), props);
7634+
}
7635+
7636+
/// template <typename T, int N, int VS = 1,
7637+
/// typename OffsetSimdViewT,
7638+
// typename PropertyListT = empty_properties_t>
7639+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7640+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
7641+
/// PropertyListT props = {}); // (lacc-ga-7)
7642+
/// This function is identical to (lacc-ga-1) except that the \p byte_offsets
7643+
/// is represented as \c simd_view.
7644+
template <typename T, int N, int VS = 1, typename AccessorT,
7645+
typename OffsetSimdViewT,
7646+
typename PropertyListT =
7647+
ext::oneapi::experimental::detail::empty_properties_t>
7648+
__ESIMD_API std::enable_if_t<
7649+
(detail::is_local_accessor_with_v<AccessorT,
7650+
detail::accessor_mode_cap::can_read> &&
7651+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
7652+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7653+
simd<T, N>>
7654+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
7655+
simd<T, N> pass_thru, PropertyListT props = {}) {
7656+
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
7657+
}
7658+
7659+
/// template <typename T, int N, int VS = 1, typename AccessorT,
7660+
/// typename OffsetSimdViewT,
7661+
// typename PropertyListT = empty_properties_t>
7662+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7663+
/// simd_mask<N / VS> mask,
7664+
/// PropertyListT props = {}); // (lacc-ga-8)
7665+
/// This function is identical to (lacc-ga-2) except that the \p byte_offsets
7666+
/// is represented as \c simd_view.
7667+
template <typename T, int N, int VS = 1, typename AccessorT,
7668+
typename OffsetSimdViewT,
7669+
typename PropertyListT =
7670+
ext::oneapi::experimental::detail::empty_properties_t>
7671+
__ESIMD_API std::enable_if_t<
7672+
(detail::is_local_accessor_with_v<AccessorT,
7673+
detail::accessor_mode_cap::can_read> &&
7674+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
7675+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7676+
simd<T, N>>
7677+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
7678+
PropertyListT props = {}) {
7679+
return gather<T, N, VS>(acc, byte_offsets.read(), mask, props);
7680+
}
7681+
7682+
/// template <typename T, int N, int VS = 1, typename AccessorT,
7683+
/// typename OffsetSimdViewT,
7684+
// typename PropertyListT = empty_properties_t>
7685+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7686+
/// PropertyListT props = {}); // (lacc-ga-9)
7687+
/// This function is identical to (lacc-ga-3) except that the \p byte_offsets
7688+
/// is represented as \c simd_view.
7689+
template <typename T, int N, int VS = 1, typename AccessorT,
7690+
typename OffsetSimdViewT,
7691+
typename PropertyListT =
7692+
ext::oneapi::experimental::detail::empty_properties_t>
7693+
__ESIMD_API std::enable_if_t<
7694+
(detail::is_local_accessor_with_v<AccessorT,
7695+
detail::accessor_mode_cap::can_read> &&
7696+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
7697+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7698+
simd<T, N>>
7699+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
7700+
return gather<T, N, VS>(acc, byte_offsets.read(), props);
7701+
}
7702+
73927703
/// Variant of gather that uses local accessor as a parameter
73937704
///
73947705
/// Collects elements located at given offsets in an accessor and returns them
@@ -7411,7 +7722,7 @@ __ESIMD_API
74117722
std::enable_if_t<detail::is_local_accessor_with_v<
74127723
AccessorTy, detail::accessor_mode_cap::can_read>,
74137724
simd<T, N>>
7414-
gather(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset = 0,
7725+
gather(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset,
74157726
simd_mask<N> mask = 1) {
74167727
return slm_gather<T, N>(
74177728
offsets + glob_offset + __ESIMD_DNS::localAccessorToOffset(acc), mask);

0 commit comments

Comments
 (0)