@@ -182,6 +182,64 @@ __esimd_svm_block_st(__ESIMD_DNS::vector_type_t<Ty, N> *addr,
182
182
}
183
183
#endif // __SYCL_DEVICE_ONLY__
184
184
185
+ // / Surface-based gather.
186
+ // / Supported platforms: DG2, PVC
187
+ // /
188
+ // / Collects elements located at surface and returns them
189
+ // / as a single \ref simd object.
190
+ // /
191
+ // / @tparam T is element type.
192
+ // / @tparam L1H is L1 cache hint.
193
+ // / @tparam L2H is L2 cache hint.
194
+ // / @tparam AddressScale is the address scale.
195
+ // / @tparam ImmOffset is the immediate offset added to each address.
196
+ // / @tparam DS is the data size.
197
+ // / @tparam VS is the number of elements to load per address.
198
+ // / @tparam Transposed indicates if the data is transposed during the transfer.
199
+ // / @tparam N is the SIMD size of operation (the number of addresses to access)
200
+ // / @tparam SurfIndAliasTy is the \ref sycl::accessor type.
201
+ // / @param pred is predicates.
202
+ // / @param offsets is the zero-based offsets in bytes.
203
+ // / @param surf_ind is the surface index.
204
+ // / @param OldValues contains the vector which elements are copied
205
+ // / to the returned result when the corresponding element of \p pred is 0.
206
+ // / @return is a vector of type T and N * to_int<VS>()
207
+ template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
208
+ uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
209
+ __ESIMD_DNS::lsc_vector_size VS,
210
+ __ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
211
+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N * __ESIMD_DNS::to_int<VS>()>
212
+ __esimd_lsc_load_merge_bti(
213
+ __ESIMD_DNS::simd_mask_storage_t <N> pred,
214
+ __ESIMD_DNS::vector_type_t <uint32_t , N> offsets, SurfIndAliasT surf_ind,
215
+ __ESIMD_DNS::vector_type_t <T, N * __ESIMD_DNS::to_int<VS>()> OldValues = 0)
216
+ #ifdef __SYCL_DEVICE_ONLY__
217
+ ;
218
+ #else // __SYCL_DEVICE_ONLY__
219
+ {
220
+ __ESIMD_UNSUPPORTED_ON_HOST;
221
+ }
222
+ #endif // __SYCL_DEVICE_ONLY__
223
+
224
+ // / Similar to __esimd_lsc_load_merge_bti(), but the argument OldValues is not
225
+ // / explicitly specified, which results into random values in those elements of
226
+ // / the returned result for which the corresponding element in \p pred is 0.
227
+ template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
228
+ uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
229
+ __ESIMD_DNS::lsc_vector_size VS,
230
+ __ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
231
+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N * __ESIMD_DNS::to_int<VS>()>
232
+ __esimd_lsc_load_bti (__ESIMD_DNS::simd_mask_storage_t <N> pred,
233
+ __ESIMD_DNS::vector_type_t <uint32_t , N> offsets,
234
+ SurfIndAliasT surf_ind)
235
+ #ifdef __SYCL_DEVICE_ONLY__
236
+ ;
237
+ #else // __SYCL_DEVICE_ONLY__
238
+ {
239
+ __ESIMD_UNSUPPORTED_ON_HOST;
240
+ }
241
+ #endif // __SYCL_DEVICE_ONLY__
242
+
185
243
// flat_read4 does flat-address gather4
186
244
template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
187
245
__ESIMD_DNS::vector_type_t <Ty, N * get_num_channels_enabled (Mask)>
@@ -223,7 +281,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
223
281
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
224
282
// @tparam Scale - offset scaling factor; must be zero currently
225
283
// @tparam L1H - L1 cache hint
226
- // @tparam L3H - L3 cache hint
284
+ // @tparam L2H - L2 cache hint
227
285
//
228
286
// Formal parameters:
229
287
// @param surf_ind - the surface index, taken from the SYCL memory object
@@ -260,7 +318,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
260
318
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
261
319
// @tparam Scale - offset scale; only 0 is supported for now
262
320
// @tparam L1H - L1 cache hint
263
- // @tparam L3H - L3 cache hint
321
+ // @tparam L2H - L2 cache hint
264
322
//
265
323
// Formal parameters:
266
324
// @param pred - per-element predicates; elements with zero corresponding
@@ -589,7 +647,7 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
589
647
// /
590
648
// / @tparam Ty is element type.
591
649
// / @tparam L1H is L1 cache hint.
592
- // / @tparam L3H is L3 cache hint.
650
+ // / @tparam L2H is L2 cache hint.
593
651
// / @tparam AddressScale is the address scale.
594
652
// / @tparam ImmOffset is the immediate offset added to each address.
595
653
// / @tparam DS is the data size.
@@ -601,7 +659,7 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
601
659
// / @param old_values is the vector of values copied to the result when the
602
660
// / corresponding element in \p pred is unset.
603
661
// / @return is a vector of type T and N * to_int<VS>()
604
- template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L3H ,
662
+ template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H ,
605
663
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
606
664
__ESIMD_DNS::lsc_vector_size VS,
607
665
__ESIMD_DNS::lsc_data_order Transposed, int N>
@@ -627,7 +685,7 @@ __esimd_lsc_load_merge_stateless(
627
685
// /
628
686
// / @tparam Ty is element type.
629
687
// / @tparam L1H is L1 cache hint.
630
- // / @tparam L3H is L3 cache hint.
688
+ // / @tparam L2H is L2 cache hint.
631
689
// / @tparam AddressScale is the address scale.
632
690
// / @tparam ImmOffset is the immediate offset added to each address.
633
691
// / @tparam DS is the data size.
@@ -637,7 +695,7 @@ __esimd_lsc_load_merge_stateless(
637
695
// / @param pred is predicates.
638
696
// / @param addrs is the load addresses.
639
697
// / @return is a vector of type T and N * to_int<VS>()
640
- template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L3H ,
698
+ template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H ,
641
699
uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
642
700
__ESIMD_DNS::lsc_vector_size VS,
643
701
__ESIMD_DNS::lsc_data_order Transposed, int N>
0 commit comments