@@ -542,6 +542,129 @@ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
542
542
return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru, props);
543
543
}
544
544
545
+ /// template <int VS = 1, typename OffsetT, typename T, typename
546
+ /// PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() *
547
+ /// PassThruSimdViewT::getSizeY(),
548
+ /// typename PropertyListT = empty_props_t>
549
+ /// simd <T, N> gather(const T *p,
550
+ /// simd<OffsetT, N / VS> byte_offsets,
551
+ /// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
552
+ /// PropertyListT props = {});
553
+ /// Variation of the API that allows to use \c simd_view without specifying \c T
554
+ /// and \c N template parameters. Loads ("gathers") elements of the type 'T'
555
+ /// from memory locations addressed by the base pointer \p p and byte offsets \p
556
+ /// byte_offsets, and returns the loaded elements. Access to any element's
557
+ /// memory location can be disabled via the input vector of predicates \p mask.
558
+ /// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
559
+ /// the corresponding i-th element from \p pass_thru operand is returned.
560
+ /// @tparam VS Vector size. It can also be read as the number of reads per each
561
+ /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
562
+ /// only on DG2 and PVC.
563
+ /// @param p The base address.
564
+ /// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
565
+ /// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
566
+ /// If the alignment property is not passed, then it is assumed that each
567
+ /// accessed address is aligned by element-size.
568
+ /// @param mask The access mask.
569
+ /// @param pass_thru The vector pass through values.
570
+ /// @param props The optional compile-time properties. Only 'alignment'
571
+ /// and cache hint properties are used.
572
+ /// @return A vector of elements read.
573
+ template <
574
+ int VS = 1, typename OffsetT, typename T, typename PassThruSimdViewT,
575
+ int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
576
+ typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
577
+ __ESIMD_API std::enable_if_t<
578
+ ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
579
+ detail::is_simd_view_type_v<PassThruSimdViewT>,
580
+ simd<T, N>>
581
+ gather(const T *p, simd<OffsetT, N / VS> byte_offsets, simd_mask<N / VS> mask,
582
+ PassThruSimdViewT pass_thru, PropertyListT props = {}) {
583
+ return gather<T, N, VS>(p, byte_offsets, mask, pass_thru.read(), props);
584
+ }
585
+
586
+ /// template <int VS = 1, typename OffsetSimdViewT, typename T, typename
587
+ /// PassThruSimdViewT, int N = PassThruSimdViewT::getSizeX() *
588
+ /// PassThruSimdViewT::getSizeY(),
589
+ /// typename PropertyListT = empty_props_t>
590
+ /// simd <T, N> gather(const T *p,
591
+ /// OffsetSimdViewT byte_offsets,
592
+ /// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
593
+ /// PropertyListT props = {});
594
+ /// Variation of the API that allows to use \c simd_view without specifying \c T
595
+ /// and \c N template parameters. Loads ("gathers") elements of the type 'T'
596
+ /// from memory locations addressed by the base pointer \p p and byte offsets \p
597
+ /// byte_offsets, and returns the loaded elements. Access to any element's
598
+ /// memory location can be disabled via the input vector of predicates \p mask.
599
+ /// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
600
+ /// the corresponding i-th element from \p pass_thru operand is returned.
601
+ /// @tparam VS Vector size. It can also be read as the number of reads per each
602
+ /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
603
+ /// only on DG2 and PVC.
604
+ /// @param p The base address.
605
+ /// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
606
+ /// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
607
+ /// If the alignment property is not passed, then it is assumed that each
608
+ /// accessed address is aligned by element-size.
609
+ /// @param mask The access mask.
610
+ /// @param pass_thru The vector pass through values.
611
+ /// @param props The optional compile-time properties. Only 'alignment'
612
+ /// and cache hint properties are used.
613
+ /// @return A vector of elements read.
614
+ template <
615
+ int VS = 1, typename OffsetSimdViewT, typename T,
616
+ typename PassThruSimdViewT,
617
+ int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
618
+ typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
619
+ __ESIMD_API std::enable_if_t<
620
+ ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
621
+ detail::is_simd_view_type_v<OffsetSimdViewT> &&
622
+ detail::is_simd_view_type_v<PassThruSimdViewT>,
623
+ simd<T, N>>
624
+ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
625
+ PassThruSimdViewT pass_thru, PropertyListT props = {}) {
626
+ return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru.read(),
627
+ props);
628
+ }
629
+
630
+ /// template <int VS = 1, typename OffsetSimdViewT, typename T, int N,
631
+ /// typename PropertyListT = empty_props_t>
632
+ /// simd <T, N> gather(const T *p,
633
+ /// OffsetSimdViewT byte_offsets,
634
+ /// simd_mask<N / VS> mask, simd<T, N> pass_thru,
635
+ /// PropertyListT props = {});
636
+ /// Variation of the API that allows to use \c simd_view without specifying \c T
637
+ /// and \c N template parameters. Loads ("gathers") elements of the type 'T'
638
+ /// from memory locations addressed by the base pointer \p p and byte offsets \p
639
+ /// byte_offsets, and returns the loaded elements. Access to any element's
640
+ /// memory location can be disabled via the input vector of predicates \p mask.
641
+ /// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
642
+ /// the corresponding i-th element from \p pass_thru operand is returned.
643
+ /// @tparam VS Vector size. It can also be read as the number of reads per each
644
+ /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
645
+ /// only on DG2 and PVC.
646
+ /// @param p The base address.
647
+ /// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
648
+ /// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
649
+ /// If the alignment property is not passed, then it is assumed that each
650
+ /// accessed address is aligned by element-size.
651
+ /// @param mask The access mask.
652
+ /// @param pass_thru The vector pass through values.
653
+ /// @param props The optional compile-time properties. Only 'alignment'
654
+ /// and cache hint properties are used.
655
+ /// @return A vector of elements read.
656
+ template <
657
+ int VS, typename OffsetSimdViewT, typename T, int N,
658
+ typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
659
+ __ESIMD_API std::enable_if_t<
660
+ ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
661
+ detail::is_simd_view_type_v<OffsetSimdViewT>,
662
+ simd<T, N>>
663
+ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
664
+ simd<T, N> pass_thru, PropertyListT props = {}) {
665
+ return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru, props);
666
+ }
667
+
545
668
/// simd <T, N> gather(const T *p,
546
669
/// OffsetSimdViewT byte_offsets,
547
670
/// simd_mask<N / VS> mask, PropertyListT props = {}); // (usm-ga-8)
@@ -577,6 +700,40 @@ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
577
700
return gather<T, N, VS>(p, byte_offsets.read(), mask, props);
578
701
}
579
702
703
+ /// simd <T, N> gather(const T *p,
704
+ /// OffsetSimdViewT byte_offsets,
705
+ /// simd_mask<N / VS> mask, PropertyListT props = {});
706
+ /// Variation of the API that allows to use \c simd_view without specifying \c T
707
+ /// and \c N template parameters. Loads ("gathers") elements of the type 'T'
708
+ /// from memory locations addressed by the base pointer \p p and byte offsets \p
709
+ /// byte_offsets, and returns the loaded elements. Access to any element's
710
+ /// memory location can be disabled via the input vector of predicates \p mask.
711
+ /// If mask[i] is unset, then the load from (p + byte_offsets[i]) is skipped and
712
+ /// the corresponding i-th element of the returned vector is undefined.
713
+ /// @tparam VS Vector size. It can also be read as the number of reads per each
714
+ /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
715
+ /// only on DG2 and PVC.
716
+ /// @param p The base address.
717
+ /// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
718
+ /// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
719
+ /// @param mask The access mask.
720
+ /// @param props The optional compile-time properties. Only 'alignment'
721
+ /// and cache hint properties are used.
722
+ /// @return A vector of elements read. Elements in masked out lanes are
723
+ /// undefined.
724
+ template <
725
+ int VS = 1, typename OffsetSimdViewT, typename T,
726
+ int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
727
+ typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
728
+ __ESIMD_API std::enable_if_t<
729
+ ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
730
+ detail::is_simd_view_type_v<OffsetSimdViewT>,
731
+ simd<T, N>>
732
+ gather(const T *p, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
733
+ PropertyListT props = {}) {
734
+ return gather<T, N, VS>(p, byte_offsets.read(), mask, props);
735
+ }
736
+
580
737
/// simd <T, N> gather(const T *p,
581
738
/// OffsetSimdViewT byte_offsets,
582
739
/// PropertyListT props = {}); // (usm-ga-9)
@@ -604,6 +761,33 @@ __ESIMD_API std::enable_if_t<
604
761
gather(const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
605
762
return gather<T, N, VS>(p, byte_offsets.read(), props);
606
763
}
764
+ /// simd <T, N> gather(const T *p,
765
+ /// OffsetSimdViewT byte_offsets,
766
+ /// PropertyListT props = {});
767
+ /// Variation of the API that allows to use \c simd_view without specifying \c T
768
+ /// and \c N template parameters. Loads ("gathers") elements of the type 'T'
769
+ /// from memory locations addressed by the base pointer \p p and byte offsets \p
770
+ /// byte_offsets, and returns the loaded elements.
771
+ /// @tparam VS Vector size. It can also be read as the number of reads per each
772
+ /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
773
+ /// only on DG2 and PVC.
774
+ /// @param p The base address.
775
+ /// @param byte_offsets the vector of 32-bit or 64-bit offsets in bytes.
776
+ /// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
777
+ /// @param props The optional compile-time properties. Only 'alignment'
778
+ /// and cache hint properties are used.
779
+ /// @return A vector of elements read.
780
+ template <
781
+ int VS = 1, typename OffsetSimdViewT, typename T,
782
+ int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
783
+ typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
784
+ __ESIMD_API std::enable_if_t<
785
+ ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
786
+ detail::is_simd_view_type_v<OffsetSimdViewT>,
787
+ simd<T, N>>
788
+ gather(const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
789
+ return gather<T, N, VS>(p, byte_offsets.read(), props);
790
+ }
607
791
608
792
/// A variation of \c gather API with \c offsets represented as scalar.
609
793
///
0 commit comments