@@ -521,6 +521,7 @@ gather_impl(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset,
521
521
// / \c float.
522
522
// / @tparam N The number of vector elements. Can be \c 1, \c 8, \c 16 or \c 32.
523
523
// / @tparam AccessorTy The accessor type.
524
+ // / @tparam Toffset The offset type.
524
525
// / @param acc The accessor to gather from.
525
526
// / @param offsets Per-element offsets in bytes.
526
527
// / @param glob_offset Offset in bytes added to each individual element's offset
@@ -529,12 +530,17 @@ gather_impl(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset,
529
530
// / predicate are not accessed, their values in the resulting vector are
530
531
// / undefined.
531
532
// /
532
- template <typename T, int N, typename AccessorTy>
533
- __ESIMD_API std::enable_if_t <(sizeof (T) <= 4 ) &&
534
- (N == 1 || N == 8 || N == 16 || N == 32 ) &&
535
- !std::is_pointer<AccessorTy>::value,
536
- simd<T, N>>
537
- gather (AccessorTy acc, simd<uint32_t , N> offsets, uint32_t glob_offset = 0 ,
533
+ template <typename T, int N, typename AccessorTy, typename Toffset>
534
+ __ESIMD_API std::enable_if_t <
535
+ (sizeof (T) <= 4 ) && (N == 1 || N == 8 || N == 16 || N == 32 ) &&
536
+ !std::is_pointer<AccessorTy>::value && std::is_integral_v<Toffset>,
537
+ simd<T, N>>
538
+ gather (AccessorTy acc, simd<Toffset, N> offsets,
539
+ #ifdef __ESIMD_FORCE_STATELESS_MEM
540
+ uint64_t glob_offset = 0 ,
541
+ #else
542
+ uint32_t glob_offset = 0 ,
543
+ #endif
538
544
simd_mask<N> mask = 1 ) {
539
545
#ifdef __ESIMD_FORCE_STATELESS_MEM
540
546
return gather<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset),
@@ -554,6 +560,7 @@ gather(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset = 0,
554
560
// / \c float.
555
561
// / @tparam N The number of vector elements. Can be \c 1, \c 8, \c 16 or \c 32.
556
562
// / @tparam AccessorTy The accessor type.
563
+ // / @tparam Toffset The offset type.
557
564
// / @param acc The accessor to scatter to.
558
565
// / @param offsets Per-element offsets in bytes.
559
566
// / @param vals Values to write.
@@ -563,12 +570,17 @@ gather(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset = 0,
563
570
// / predicate are not accessed.
564
571
// /
565
572
// /
566
- template <typename T, int N, typename AccessorTy>
567
- __ESIMD_API std::enable_if_t <(sizeof (T) <= 4 ) &&
568
- (N == 1 || N == 8 || N == 16 || N == 32 ) &&
569
- !std::is_pointer<AccessorTy>::value>
570
- scatter (AccessorTy acc, simd<uint32_t , N> offsets, simd<T, N> vals,
571
- uint32_t glob_offset = 0 , simd_mask<N> mask = 1 ) {
573
+ template <typename T, int N, typename AccessorTy, typename Toffset>
574
+ __ESIMD_API std::enable_if_t <
575
+ (sizeof (T) <= 4 ) && (N == 1 || N == 8 || N == 16 || N == 32 ) &&
576
+ !std::is_pointer<AccessorTy>::value && std::is_integral_v<Toffset>>
577
+ scatter (AccessorTy acc, simd<Toffset, N> offsets, simd<T, N> vals,
578
+ #ifdef __ESIMD_FORCE_STATELESS_MEM
579
+ uint64_t glob_offset = 0 ,
580
+ #else
581
+ uint32_t glob_offset = 0 ,
582
+ #endif
583
+ simd_mask<N> mask = 1 ) {
572
584
#ifdef __ESIMD_FORCE_STATELESS_MEM
573
585
scatter<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset), offsets,
574
586
vals, mask);
0 commit comments