@@ -493,26 +493,65 @@ lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
493
493
// / @tparam NElts is the number of elements to load per address.
494
494
// / @tparam DS is the data size.
495
495
// / @param offset is the zero-based offset for SLM buffer in bytes.
496
+ // / @param pred is the predicate; if it contains 0, then the actual load
497
+ // / is not performed and the returned value is undefined.
496
498
// / @return is a vector of type T and size NElts
497
499
// /
498
500
template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
499
- __ESIMD_API __ESIMD_NS::simd<T, NElts> lsc_slm_block_load (uint32_t offset) {
501
+ __ESIMD_API __ESIMD_NS::simd<T, NElts>
502
+ lsc_slm_block_load (uint32_t offset, __ESIMD_NS::simd_mask<1 > pred = 1 ) {
500
503
detail::check_lsc_vector_size<NElts>();
501
504
detail::check_lsc_data_size<T, DS>();
502
- constexpr uint16_t _AddressScale = 1 ;
503
- constexpr int _ImmOffset = 0 ;
504
- constexpr lsc_data_size _DS = detail::finalize_data_size<T, DS>();
505
- static_assert (_DS == lsc_data_size::u32 || _DS == lsc_data_size::u64 ,
505
+ constexpr uint16_t AddressScale = 1 ;
506
+ constexpr int ImmOffset = 0 ;
507
+ constexpr lsc_data_size FDS = detail::finalize_data_size<T, DS>();
508
+ static_assert (FDS == lsc_data_size::u32 || FDS == lsc_data_size::u64 ,
506
509
" Transposed load is supported only for data size u32 or u64" );
507
- constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<NElts>();
508
- constexpr detail::lsc_data_order _Transposed =
509
- detail::lsc_data_order::transpose;
510
+ constexpr detail::lsc_vector_size VS = detail::to_lsc_vector_size<NElts>();
511
+ constexpr auto Transposed = detail::lsc_data_order::transpose;
510
512
constexpr int N = 1 ;
511
- __ESIMD_NS::simd_mask<N> pred = 1 ;
512
513
__ESIMD_NS::simd<uint32_t , N> offsets = offset;
513
514
return __esimd_lsc_load_slm<T, cache_hint::none, cache_hint::none,
514
- _AddressScale, _ImmOffset, _DS, _VS, _Transposed,
515
- N>(pred.data (), offsets.data ());
515
+ AddressScale, ImmOffset, FDS, VS, Transposed, N>(
516
+ pred.data (), offsets.data ());
517
+ }
518
+
519
+ // / Transposed SLM gather with 1 channel.
520
+ // / Supported platforms: DG2, PVC
521
+ // / VISA instruction: lsc_load.slm
522
+ // /
523
+ // / Collects elements located at slm and returns them
524
+ // / as a single \ref simd object.
525
+ // /
526
+ // / @tparam T is element type.
527
+ // / @tparam NElts is the number of elements to load per address.
528
+ // / @tparam DS is the data size.
529
+ // / @param offset is the zero-based offset for SLM buffer in bytes.
530
+ // / @param pred is the predicate; if it contains 0, then the actual load
531
+ // / is not performed and \p old_values is returned.
532
+ // / @param old_values contains the vector that is returned if
533
+ // / the parameter \p pred contains 0.
534
+ // / @return is a vector of type T and size NElts.
535
+ // /
536
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
537
+ __ESIMD_API __ESIMD_NS::simd<T, NElts>
538
+ lsc_slm_block_load (uint32_t offset, __ESIMD_NS::simd_mask<1 > pred,
539
+ __ESIMD_NS::simd<T, NElts> old_values) {
540
+ detail::check_lsc_vector_size<NElts>();
541
+ detail::check_lsc_data_size<T, DS>();
542
+ constexpr uint16_t AddressScale = 1 ;
543
+ constexpr int ImmOffset = 0 ;
544
+ constexpr lsc_data_size FDS = detail::finalize_data_size<T, DS>();
545
+ static_assert (FDS == lsc_data_size::u32 || FDS == lsc_data_size::u64 ,
546
+ " Transposed load is supported only for data size u32 or u64" );
547
+ constexpr detail::lsc_vector_size VS = detail::to_lsc_vector_size<NElts>();
548
+ constexpr auto Transposed = detail::lsc_data_order::transpose;
549
+ constexpr int N = 1 ;
550
+ __ESIMD_NS::simd<uint32_t , N> offsets = offset;
551
+ return __esimd_lsc_load_merge_slm<T, cache_hint::none, cache_hint::none,
552
+ AddressScale, ImmOffset, FDS, VS,
553
+ Transposed, N>(pred.data (), offsets.data (),
554
+ old_values.data ());
516
555
}
517
556
518
557
// / USM pointer gather.
0 commit comments