@@ -47,8 +47,8 @@ class AccessorPrivateProxy {
47
47
};
48
48
49
49
template <int ElemsPerAddr,
50
- typename = sycl::detail:: enable_if_t <
51
- (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4 )>>
50
+ typename = std:: enable_if_t <(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
51
+ ElemsPerAddr == 4 )>>
52
52
constexpr unsigned int ElemsPerAddrEncoding () {
53
53
// encoding requires log2 of ElemsPerAddr
54
54
if constexpr (ElemsPerAddr == 1 )
@@ -342,7 +342,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
342
342
;
343
343
#else
344
344
{
345
- static_assert (N == 1 || N == 8 || N == 16 );
345
+ static_assert (N == 1 || N == 8 || N == 16 || N == 32 );
346
346
static_assert (TySizeLog2 <= 2 && Scale == 0 );
347
347
static_assert (std::is_integral<Ty>::value || TySizeLog2 == 2 );
348
348
throw cl::sycl::feature_not_supported ();
@@ -392,9 +392,6 @@ __esimd_scatter_scaled(__SEIEED::simd_mask_storage_t<N> pred,
392
392
}
393
393
#endif // __SYCL_DEVICE_ONLY__
394
394
395
- // TODO bring the parameter order of __esimd* intrinsics in accordance with the
396
- // correponsing BE intrinsicics parameter order.
397
-
398
395
// flat_atomic: flat-address atomic
399
396
template <__SEIEE::atomic_op Op, typename Ty, int N,
400
397
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
@@ -473,7 +470,7 @@ __ESIMD_INTRIN void __esimd_fence(uint8_t cntl)
473
470
474
471
// Scaled gather from a surface.
475
472
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
476
- int16_t SCALE = 0 >
473
+ int16_t Scale = 0 >
477
474
__ESIMD_INTRIN __SEIEED::vector_type_t <Ty, N>
478
475
__esimd_gather_scaled (__SEIEED::simd_mask_storage_t <N> pred,
479
476
SurfIndAliasTy surf_ind, uint32_t global_offset,
@@ -486,6 +483,40 @@ __esimd_gather_scaled(__SEIEED::simd_mask_storage_t<N> pred,
486
483
}
487
484
#endif // __SYCL_DEVICE_ONLY__
488
485
486
+ // / Predicated (masked) scaled gather from a surface.
487
+ // /
488
+ // / Template (compile-time constant) parameters:
489
+ // / @tparam Ty - element type
490
+ // / @tparam N - the number of elements to read
491
+ // / @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
492
+ // / accessor used to denote the surface
493
+ // / @tparam TySizeLog2 - Log2 of the number of bytes written per element:
494
+ // / 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
495
+ // / @tparam Scale - offset scale; only 0 is supported for now
496
+ // /
497
+ // / Formal parameters:
498
+ // / @param surf_ind - the surface index, taken from the SYCL memory object
499
+ // / @param global_offset - offset added to each individual element's offset to
500
+ // / compute actual memory access offset for that element
501
+ // / @param offsets - per-element offsets
502
+ // / @param pred - per-element predicates; elements with zero corresponding
503
+ // / predicates are not written
504
+ // / @return - elements read ("gathered") from memory
505
+
506
+ template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
507
+ int16_t Scale = 0 >
508
+ __ESIMD_INTRIN __SEIEED::vector_type_t <Ty, N>
509
+ __esimd_gather_masked_scaled2 (SurfIndAliasTy surf_ind, uint32_t global_offset,
510
+ __SEIEED::vector_type_t <uint32_t , N> offsets,
511
+ __SEIEED::simd_mask_storage_t <N> pred)
512
+ #ifdef __SYCL_DEVICE_ONLY__
513
+ ;
514
+ #else
515
+ {
516
+ throw cl::sycl::feature_not_supported ();
517
+ }
518
+ #endif // __SYCL_DEVICE_ONLY__
519
+
489
520
// Reads a block of data from given surface at given offset, offset must be
490
521
// 16-byte-aligned.
491
522
template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0 >
@@ -705,7 +736,6 @@ __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
705
736
}
706
737
#endif // __SYCL_DEVICE_ONLY__
707
738
708
- #ifdef __SYCL_DEVICE_ONLY__
709
739
// / \brief Converts given value to a surface index.
710
740
// / The input must always be a result of
711
741
// / detail::AccessorPrivateProxy::getNativeImageObj(acc)
@@ -724,15 +754,17 @@ __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
724
754
// / pointer, where we can do ptr to uint32_t conversion.
725
755
// / This intrinsic can be called only from the device code, as
726
756
// / accessor => memory handle translation for host is different.
727
- // /
728
- // / @param SYCL accessor's native memory object extracted from it via
757
+ // / @param acc the SYCL accessor.
729
758
// / getNativeImageObj.
730
- // /
731
- // / Returns the surface index (binding table index) value 'sid' corresponds to.
732
- // /
733
- template <typename SurfIndAliasTy>
734
- __ESIMD_INTRIN __SEIEE::SurfaceIndex
735
- __esimd_get_surface_index (SurfIndAliasTy sid);
759
+ // / Returns the binding table index value.
760
+ template <typename MemObjTy>
761
+ __ESIMD_INTRIN __SEIEE::SurfaceIndex __esimd_get_surface_index (MemObjTy obj)
762
+ #ifdef __SYCL_DEVICE_ONLY__
763
+ ;
764
+ #else
765
+ {
766
+ throw cl::sycl::feature_not_supported ();
767
+ }
736
768
#endif // __SYCL_DEVICE_ONLY__
737
769
738
770
// / \brief Raw sends load.
0 commit comments