Skip to content

Commit 5d839a0

Browse files
authored
[ESIMD] Allow full autodeduction for USM gather APIs accepting simd_view (#13920)
1 parent 704a2a3 commit 5d839a0

File tree

2 files changed

+311
-7
lines changed

2 files changed

+311
-7
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,129 @@ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
542542
return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru, props);
543543
}
544544

545+
/// template <int VS = 1, typename OffsetT, typename T, typename
546+
/// PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() *
547+
/// PassThruSimdViewT::getSizeY(),
548+
/// typename PropertyListT = empty_props_t>
549+
/// simd <T, N> gather(const T *p,
550+
/// simd<OffsetT, N / VS> byte_offsets,
551+
/// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
552+
/// PropertyListT props = {});
553+
/// Variation of the API that allows to use \c simd_view without specifying \c T
554+
/// and \c N template parameters. Loads ("gathers") elements of the type 'T'
555+
/// from memory locations addressed by the base pointer \p p and byte offsets \p
556+
/// byte_offsets, and returns the loaded elements. Access to any element's
557+
/// memory location can be disabled via the input vector of predicates \p mask.
558+
/// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
559+
/// the corresponding i-th element from \p pass_thru operand is returned.
560+
/// @tparam VS Vector size. It can also be read as the number of reads per each
561+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
562+
/// only on DG2 and PVC.
563+
/// @param p The base address.
564+
/// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
565+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
566+
/// If the alignment property is not passed, then it is assumed that each
567+
/// accessed address is aligned by element-size.
568+
/// @param mask The access mask.
569+
/// @param pass_thru The vector pass through values.
570+
/// @param props The optional compile-time properties. Only 'alignment'
571+
/// and cache hint properties are used.
572+
/// @return A vector of elements read.
573+
template <
574+
int VS = 1, typename OffsetT, typename T, typename PassThruSimdViewT,
575+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
576+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
577+
__ESIMD_API std::enable_if_t<
578+
ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
579+
detail::is_simd_view_type_v<PassThruSimdViewT>,
580+
simd<T, N>>
581+
gather(const T *p, simd<OffsetT, N / VS> byte_offsets, simd_mask<N / VS> mask,
582+
PassThruSimdViewT pass_thru, PropertyListT props = {}) {
583+
return gather<T, N, VS>(p, byte_offsets, mask, pass_thru.read(), props);
584+
}
585+
586+
/// template <int VS = 1, typename OffsetSimdViewT, typename T, typename
587+
/// PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() *
588+
/// PassThruSimdViewT::getSizeY(),
589+
/// typename PropertyListT = empty_props_t>
590+
/// simd <T, N> gather(const T *p,
591+
/// OffsetSimdViewT byte_offsets,
592+
/// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
593+
/// PropertyListT props = {});
594+
/// Variation of the API that allows to use \c simd_view without specifying \c T
595+
/// and \c N template parameters. Loads ("gathers") elements of the type 'T'
596+
/// from memory locations addressed by the base pointer \p p and byte offsets \p
597+
/// byte_offsets, and returns the loaded elements. Access to any element's
598+
/// memory location can be disabled via the input vector of predicates \p mask.
599+
/// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
600+
/// the corresponding i-th element from \p pass_thru operand is returned.
601+
/// @tparam VS Vector size. It can also be read as the number of reads per each
602+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
603+
/// only on DG2 and PVC.
604+
/// @param p The base address.
605+
/// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
606+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
607+
/// If the alignment property is not passed, then it is assumed that each
608+
/// accessed address is aligned by element-size.
609+
/// @param mask The access mask.
610+
/// @param pass_thru The vector pass through values.
611+
/// @param props The optional compile-time properties. Only 'alignment'
612+
/// and cache hint properties are used.
613+
/// @return A vector of elements read.
614+
template <
615+
int VS = 1, typename OffsetSimdViewT, typename T,
616+
typename PassThruSimdViewT,
617+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
618+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
619+
__ESIMD_API std::enable_if_t<
620+
ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
621+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
622+
detail::is_simd_view_type_v<PassThruSimdViewT>,
623+
simd<T, N>>
624+
gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
625+
PassThruSimdViewT pass_thru, PropertyListT props = {}) {
626+
return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru.read(),
627+
props);
628+
}
629+
630+
/// template <int VS = 1, typename OffsetSimdViewT, typename T, int N,
631+
/// typename PropertyListT = empty_props_t>
632+
/// simd <T, N> gather(const T *p,
633+
/// OffsetSimdViewT byte_offsets,
634+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
635+
/// PropertyListT props = {});
636+
/// Variation of the API that allows to use \c simd_view without specifying \c T
637+
/// and \c N template parameters. Loads ("gathers") elements of the type 'T'
638+
/// from memory locations addressed by the base pointer \p p and byte offsets \p
639+
/// byte_offsets, and returns the loaded elements. Access to any element's
640+
/// memory location can be disabled via the input vector of predicates \p mask.
641+
/// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
642+
/// the corresponding i-th element from \p pass_thru operand is returned.
643+
/// @tparam VS Vector size. It can also be read as the number of reads per each
644+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
645+
/// only on DG2 and PVC.
646+
/// @param p The base address.
647+
/// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
648+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
649+
/// If the alignment property is not passed, then it is assumed that each
650+
/// accessed address is aligned by element-size.
651+
/// @param mask The access mask.
652+
/// @param pass_thru The vector pass through values.
653+
/// @param props The optional compile-time properties. Only 'alignment'
654+
/// and cache hint properties are used.
655+
/// @return A vector of elements read.
656+
template <
657+
int VS, typename OffsetSimdViewT, typename T, int N,
658+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
659+
__ESIMD_API std::enable_if_t<
660+
ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
661+
detail::is_simd_view_type_v<OffsetSimdViewT>,
662+
simd<T, N>>
663+
gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
664+
simd<T, N> pass_thru, PropertyListT props = {}) {
665+
return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru, props);
666+
}
667+
545668
/// simd <T, N> gather(const T *p,
546669
/// OffsetSimdViewT byte_offsets,
547670
/// simd_mask<N / VS> mask, PropertyListT props = {}); // (usm-ga-8)
@@ -577,6 +700,40 @@ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
577700
return gather<T, N, VS>(p, byte_offsets.read(), mask, props);
578701
}
579702

703+
/// simd <T, N> gather(const T *p,
704+
/// OffsetSimdViewT byte_offsets,
705+
/// simd_mask<N / VS> mask, PropertyListT props = {});
706+
/// Variation of the API that allows to use \c simd_view without specifying \c T
707+
/// and \c N template parameters. Loads ("gathers") elements of the type 'T'
708+
/// from memory locations addressed by the base pointer \p p and byte offsets \p
709+
/// byte_offsets, and returns the loaded elements. Access to any element's
710+
/// memory location can be disabled via the input vector of predicates \p mask.
711+
/// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
712+
/// the corresponding i-th element of the returned vector is undefined.
713+
/// @tparam VS Vector size. It can also be read as the number of reads per each
714+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
715+
/// only on DG2 and PVC.
716+
/// @param p The base address.
717+
/// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
718+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
719+
/// @param mask The access mask.
720+
/// @param props The optional compile-time properties. Only 'alignment'
721+
/// and cache hint properties are used.
722+
/// @return A vector of elements read. Elements in masked out lanes are
723+
/// undefined.
724+
template <
725+
int VS = 1, typename OffsetSimdViewT, typename T,
726+
int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
727+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
728+
__ESIMD_API std::enable_if_t<
729+
ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
730+
detail::is_simd_view_type_v<OffsetSimdViewT>,
731+
simd<T, N>>
732+
gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
733+
PropertyListT props = {}) {
734+
return gather<T, N, VS>(p, byte_offsets.read(), mask, props);
735+
}
736+
580737
/// simd <T, N> gather(const T *p,
581738
/// OffsetSimdViewT byte_offsets,
582739
/// PropertyListT props = {}); // (usm-ga-9)
@@ -604,6 +761,33 @@ __ESIMD_API std::enable_if_t<
604761
gather(const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
605762
return gather<T, N, VS>(p, byte_offsets.read(), props);
606763
}
764+
/// simd <T, N> gather(const T *p,
765+
/// OffsetSimdViewT byte_offsets,
766+
/// PropertyListT props = {});
767+
/// Variation of the API that allows to use \c simd_view without specifying \c T
768+
/// and \c N template parameters. Loads ("gathers") elements of the type 'T'
769+
/// from memory locations addressed by the base pointer \p p and byte offsets \p
770+
/// byte_offsets, and returns the loaded elements.
771+
/// @tparam VS Vector size. It can also be read as the number of reads per each
772+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
773+
/// only on DG2 and PVC.
774+
/// @param p The base address.
775+
/// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
776+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
777+
/// @param props The optional compile-time properties. Only 'alignment'
778+
/// and cache hint properties are used.
779+
/// @return A vector of elements read.
780+
template <
781+
int VS = 1, typename OffsetSimdViewT, typename T,
782+
int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
783+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
784+
__ESIMD_API std::enable_if_t<
785+
ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
786+
detail::is_simd_view_type_v<OffsetSimdViewT>,
787+
simd<T, N>>
788+
gather(const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
789+
return gather<T, N, VS>(p, byte_offsets.read(), props);
790+
}
607791

608792
/// A variation of \c gather API with \c offsets represented as scalar.
609793
///

0 commit comments

Comments
 (0)