@@ -3928,7 +3928,7 @@ atomic_update_impl(AccessorTy acc, simd<Toffset, N> byte_offsets,
3928
3928
// / @tparam L2H is L2 cache hint.
3929
3929
// / @tparam AccessorTy is the \ref sycl::accessor type.
3930
3930
// / @param acc is the SYCL accessor.
3931
- // / @param offsets is the zero-based offsets.
3931
+ // / @param byte_offset is the zero-based offsets.
3932
3932
// / @param src0 is the first atomic operand.
3933
3933
// / @param pred is predicates.
3934
3934
// /
@@ -3971,6 +3971,63 @@ atomic_update_impl(AccessorTy acc, simd<Toffset, N> byte_offset,
3971
3971
return lsc_format_ret<T>(Tmp);
3972
3972
#endif
3973
3973
}
3974
+
3975
+ // / Accessor-based atomic.
3976
+ // / Supported platforms: DG2, PVC
3977
+ // / VISA instruction: lsc_atomic_<OP>.ugm
3978
+ // /
3979
+ // / @tparam Op is operation type.
3980
+ // / @tparam T is element type.
3981
+ // / @tparam N is the number of channels (platform dependent).
3982
+ // / @tparam DS is the data size.
3983
+ // / @tparam L1H is L1 cache hint.
3984
+ // / @tparam L2H is L2 cache hint.
3985
+ // / @tparam AccessorTy is the \ref sycl::accessor type.
3986
+ // / @param acc is the SYCL accessor.
3987
+ // / @param byte_offset is the zero-based offsets.
3988
+ // / @param src0 is the first atomic operand (expected value).
3989
+ // / @param src1 is the second atomic operand (new value).
3990
+ // / @param pred is predicates.
3991
+ // /
3992
+ // / @return A vector of the old values at the memory locations before the
3993
+ // / update.
3994
+ template <atomic_op Op, typename T, int N, lsc_data_size DS, cache_hint L1H,
3995
+ cache_hint L2H, typename AccessorTy, typename Toffset>
3996
+ __ESIMD_API std::enable_if_t <
3997
+ get_num_args<Op>() == 2 &&
3998
+ sycl::detail::acc_properties::is_accessor_v<AccessorTy> &&
3999
+ !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
4000
+ simd<T, N>>
4001
+ atomic_update_impl (AccessorTy acc, simd<Toffset, N> byte_offset,
4002
+ simd<T, N> src0, simd<T, N> src1, simd_mask<N> pred) {
4003
+ #ifdef __ESIMD_FORCE_STATELESS_MEM
4004
+ return atomic_update_impl<Op, T, N, DS, L1H, L2H>(
4005
+ __ESIMD_DNS::accessorToPointer<T>(acc), byte_offset, src0, src1, pred);
4006
+ #else
4007
+ static_assert (std::is_integral_v<Toffset> && sizeof (Toffset) == 4 ,
4008
+ " Unsupported offset type" );
4009
+ check_lsc_vector_size<1 >();
4010
+ check_lsc_data_size<T, DS>();
4011
+ check_atomic<Op, T, N, 2 , /* IsLSC*/ true >();
4012
+ check_cache_hint<cache_action::atomic, L1H, L2H>();
4013
+ constexpr uint16_t AddressScale = 1 ;
4014
+ constexpr int ImmOffset = 0 ;
4015
+ constexpr lsc_data_size EDS = expand_data_size (finalize_data_size<T, DS>());
4016
+ constexpr lsc_vector_size VS = to_lsc_vector_size<1 >();
4017
+ constexpr lsc_data_order Transposed = lsc_data_order::nontranspose;
4018
+ using MsgT = typename lsc_expand_type<T>::type;
4019
+ constexpr int IOp = lsc_to_internal_atomic_op<T, Op>();
4020
+ simd<MsgT, N> Msg_data0 = lsc_format_input<MsgT>(src0);
4021
+ simd<MsgT, N> Msg_data1 = lsc_format_input<MsgT>(src1);
4022
+ auto si = get_surface_index (acc);
4023
+ simd<MsgT, N> Tmp =
4024
+ __esimd_lsc_xatomic_bti_2<MsgT, IOp, L1H, L2H, AddressScale, ImmOffset,
4025
+ EDS, VS, Transposed, N>(
4026
+ pred.data (), byte_offset.data (), Msg_data0.data (), Msg_data1.data (),
4027
+ si);
4028
+ return lsc_format_ret<T>(Tmp);
4029
+ #endif
4030
+ }
3974
4031
} // namespace detail
3975
4032
3976
4033
// / @addtogroup sycl_esimd_memory_atomics
@@ -6143,58 +6200,220 @@ atomic_update(AccessorTy acc, Toffset offset, simd<Tx, N> src0,
6143
6200
}
6144
6201
6145
6202
// / @anchor accessor_atomic_update2
6203
+ // / @brief Two-argument variant of the atomic update operation.
6204
+ // /
6205
+ // / simd<T, N>
6206
+ // / atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset,
6207
+ // / simd<T, N> src0, simd<T, N> src1,
6208
+ // simd_mask<N> mask,props = {}); // (acc-au2-1)
6209
+ // /
6210
+ // / simd<T, N>
6211
+ // / atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset,
6212
+ // / simd<T, N> src0, simd<T, N> src1,
6213
+ // / props = {}); // (acc-au2-2)
6214
+ // / simd<T, N>
6215
+ // / atomic_update(AccessorTy acc, simd_view<OffsetObjT, OffsetRegionTy>
6216
+ // / byte_offset, simd<T, N> src0, simd<T, N> src1,
6217
+ // / simd_mask<N> mask, props = {}); // (acc-au2-3)
6218
+ // /
6219
+ // / simd<T, N>
6220
+ // / atomic_update(AccessorTy acc,
6221
+ // / simd_view<OffsetObjT, OffsetRegionTy>, byte_offset,
6222
+ // / simd<T, N> src0, simd<T, N> src1, props = {}); // (acc-au2-4)
6223
+ // /
6224
+ // / simd<T, N>
6225
+ // / atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset,
6226
+ // / simd<T, N> src0, simd<T, N> src1,
6227
+ // simd_mask<N> mask,props = {}); // (acc-au2-1)
6228
+ // /
6146
6229
// / Atomically updates \c N memory locations represented by an accessor and
6147
6230
// / a vector of offsets and returns a vector of old
6148
6231
// / values found at the memory locations before update. The update operation
6149
6232
// / has 2 additional arguments.
6150
6233
// /
6151
6234
// / @tparam Op The atomic operation - can be one of the following:
6152
6235
// / \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg.
6153
- // / @tparam Tx The vector element type.
6236
+ // / @tparam T The vector element type.
6154
6237
// / @tparam N The number of memory locations to update.
6155
6238
// / @tparam AccessorTy type of the SYCL accessor.
6156
6239
// / @param acc The SYCL accessor.
6157
- // / @param offset The vector of 32-bit or 64-bit offsets in bytes. 64-bit
6240
+ // / @param byte_offset The vector of 32-bit or 64-bit offsets in bytes. 64-bit
6158
6241
// / offsets are supported only when stateless memory accesses are enforced,
6159
6242
// / i.e. accessor based accesses are automatically converted to stateless
6160
6243
// / accesses.
6161
6244
// / @param src0 The first additional argument (new value).
6162
6245
// / @param src1 The second additional argument (expected value).
6163
6246
// / @param mask Operation mask, only locations with non-zero in the
6164
6247
// / corresponding mask element are updated.
6248
+ // / @param props The parameter 'props' specifies the optional compile-time
6249
+ // / properties list. Only L1/L2 properties are used.
6250
+ // Other properties are ignored.
6165
6251
// / @return A vector of the old values at the memory locations before the
6166
6252
// / update.
6167
6253
// /
6168
- template <atomic_op Op, typename Tx, int N, typename Toffset,
6169
- typename AccessorTy>
6254
+ template <atomic_op Op, typename T, int N, typename Toffset,
6255
+ typename AccessorTy,
6256
+ typename PropertyListT =
6257
+ ext::oneapi::experimental::detail::empty_properties_t >
6170
6258
__ESIMD_API std::enable_if_t <
6171
- std::is_integral_v<Toffset> &&
6172
- sycl::detail::acc_properties::is_accessor_v <AccessorTy> &&
6173
- !sycl::detail::acc_properties::is_local_accessor_v<AccessorTy >,
6174
- simd<Tx , N>>
6175
- atomic_update (AccessorTy acc, simd<Toffset, N> offset , simd<Tx , N> src0,
6176
- simd<Tx , N> src1, simd_mask<N> mask) {
6259
+ __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
6260
+ __ESIMD_DNS::is_rw_device_accessor_v <AccessorTy> &&
6261
+ ext::oneapi::experimental::is_property_list_v<PropertyListT >,
6262
+ simd<T , N>>
6263
+ atomic_update (AccessorTy acc, simd<Toffset, N> byte_offset , simd<T , N> src0,
6264
+ simd<T , N> src1, simd_mask<N> mask, PropertyListT props = {} ) {
6177
6265
#ifdef __ESIMD_FORCE_STATELESS_MEM
6178
- return atomic_update<Op, Tx , N>(__ESIMD_DNS::accessorToPointer<Tx >(acc),
6179
- offset , src0, src1, mask);
6266
+ return atomic_update<Op, T , N>(__ESIMD_DNS::accessorToPointer<T >(acc),
6267
+ byte_offset , src0, src1, mask, props );
6180
6268
#else
6269
+ constexpr auto L1Hint =
6270
+ detail::getPropertyValue<PropertyListT, cache_hint_L1_key>(
6271
+ cache_hint::none);
6272
+
6273
+ constexpr auto L2Hint =
6274
+ detail::getPropertyValue<PropertyListT, cache_hint_L2_key>(
6275
+ cache_hint::none);
6276
+
6277
+ static_assert (!PropertyListT::template has_property<cache_hint_L3_key>(),
6278
+ " L3 cache hint is reserved. The old/experimental L3 LSC cache "
6279
+ " hint is cache_level::L2 now." );
6181
6280
static_assert (std::is_integral_v<Toffset>, " Unsupported offset type" );
6182
6281
static_assert (sizeof (Toffset) == 4 , " Only 32 bit offset is supported" );
6183
- if constexpr (Op == atomic_op::fcmpxchg) {
6184
- // Auto-convert FP atomics to LSC version.
6185
- return atomic_update<detail::to_lsc_atomic_op<Op>(), Tx, N>(
6186
- acc, offset, src0, src1, mask);
6282
+ // Use LSC atomic when cache hints are present, FP atomics is used,
6283
+ // non-power of two length is used, or operation width greater than 32.
6284
+ if constexpr (L1Hint != cache_hint::none || L2Hint != cache_hint::none ||
6285
+ Op == atomic_op::fcmpxchg || !__ESIMD_DNS::isPowerOf2 (N, 32 )) {
6286
+ // 2-argument lsc_atomic_update arguments order matches the standard one -
6287
+ // expected value first, then new value. But atomic_update uses reverse
6288
+ // order, hence the src1/src0 swap.
6289
+ return detail::atomic_update_impl<
6290
+ Op, T, N, detail::lsc_data_size::default_size, L1Hint, L2Hint>(
6291
+ acc, byte_offset, src1, src0, mask);
6187
6292
} else {
6188
- detail::check_atomic<Op, Tx , N, 2 >();
6189
- static_assert (sizeof (Tx ) == 4 , " Only 32 bit data is supported" );
6293
+ detail::check_atomic<Op, T , N, 2 >();
6294
+ static_assert (sizeof (T ) == 4 , " Only 32 bit data is supported" );
6190
6295
const auto si = __ESIMD_NS::get_surface_index (acc);
6191
- using T = typename detail::__raw_t <Tx>;
6192
- return __esimd_dword_atomic2<Op, T, N>(mask.data (), si, offset.data (),
6193
- src0.data (), src1.data ());
6296
+ using Tx = typename detail::__raw_t <T>;
6297
+ return __esimd_dword_atomic2<Op, Tx, N>(
6298
+ mask.data (), si, byte_offset.data (),
6299
+ sycl::bit_cast<__ESIMD_DNS::vector_type_t <Tx, N>>(src0.data ()),
6300
+ sycl::bit_cast<__ESIMD_DNS::vector_type_t <Tx, N>>(src1.data ()));
6194
6301
}
6195
6302
#endif
6196
6303
}
6197
6304
6305
+ // / simd<T, N>
6306
+ // / atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset,
6307
+ // / simd<T, N> src0, simd<T, N> src1,
6308
+ // / props = {}); // (acc-au2-2)
6309
+ // /
6310
+ // / A variation of \c atomic_update API with no mask operand.
6311
+ // /
6312
+ // / @tparam Op The atomic operation - can be one of the following:
6313
+ // / \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg.
6314
+ // / @tparam T The vector element type.
6315
+ // / @tparam N The number of memory locations to update.
6316
+ // / @param acc The SYCL accessor.
6317
+ // / @param byte_offset The vector of 32-bit or 64-bit offsets in bytes.
6318
+ // / @param src0 The first additional argument (new value).
6319
+ // / @param src1 The second additional argument (expected value).
6320
+ // / @param props The parameter 'props' specifies the optional compile-time
6321
+ // / properties list. Only L1/L2 properties are used.
6322
+ // Other properties are ignored.
6323
+ // / @return A vector of the old values at the memory locations before the
6324
+ // / update.
6325
+ // /
6326
+ template <atomic_op Op, typename T, int N, typename Toffset,
6327
+ typename AccessorTy,
6328
+ typename PropertyListT =
6329
+ ext::oneapi::experimental::detail::empty_properties_t >
6330
+ __ESIMD_API std::enable_if_t <
6331
+ __ESIMD_DNS::get_num_args<Op>() == 2 &&
6332
+ __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
6333
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6334
+ simd<T, N>>
6335
+ atomic_update (AccessorTy acc, simd<Toffset, N> byte_offset, simd<T, N> src0,
6336
+ simd<T, N> src1, PropertyListT props = {}) {
6337
+ simd_mask<N> mask = 1 ;
6338
+ return atomic_update<Op, T, N>(acc, byte_offset, src0, src1, mask, props);
6339
+ }
6340
+
6341
+ // / simd<T, N>
6342
+ // / atomic_update(AccessorTy acc, simd_view<OffsetObjT, OffsetRegionTy>
6343
+ // / byte_offset, simd<T, N> src0, simd<T, N> src1,
6344
+ // / simd_mask<N> mask, props = {}); // (acc-au2-3)
6345
+ // /
6346
+ // / A variation of \c atomic_update API with \c byte_offset represented as
6347
+ // / a \c simd_view object.
6348
+ // /
6349
+ // / @tparam Op The atomic operation - can be one of the following:
6350
+ // / \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg.
6351
+ // / @tparam T The vector element type.
6352
+ // / @tparam N The number of memory locations to update.
6353
+ // / @param acc The SYCL accessor.
6354
+ // / @param byte_offset The vector of 32-bit or 64-bit offsets in bytes.
6355
+ // / @param src0 The first additional argument (new value).
6356
+ // / @param src1 The second additional argument (expected value).
6357
+ // / @param mask Operation mask, only locations with non-zero in the
6358
+ // / corresponding mask element are updated.
6359
+ // / @param props The parameter 'props' specifies the optional compile-time
6360
+ // / properties list. Only L1/L2 properties are used.
6361
+ // Other properties are ignored.
6362
+ // / @return A vector of the old values at the memory locations before the
6363
+ // / update.
6364
+ template <atomic_op Op, typename T, int N, typename OffsetObjT,
6365
+ typename AccessorTy, typename OffsetRegionTy,
6366
+ typename PropertyListT =
6367
+ ext::oneapi::experimental::detail::empty_properties_t >
6368
+ __ESIMD_API std::enable_if_t <
6369
+ __ESIMD_DNS::get_num_args<Op>() == 2 &&
6370
+ __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
6371
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6372
+ simd<T, N>>
6373
+ atomic_update (AccessorTy acc, simd_view<OffsetObjT, OffsetRegionTy> byte_offset,
6374
+ simd<T, N> src0, simd<T, N> src1, simd_mask<N> mask,
6375
+ PropertyListT props = {}) {
6376
+ return atomic_update<Op, T, N>(acc, byte_offset.read (), src0, src1, mask,
6377
+ props);
6378
+ }
6379
+
6380
+ // / simd<T, N>
6381
+ // / atomic_update(AccessorTy acc,
6382
+ // / simd_view<OffsetObjT, OffsetRegionTy>, byte_offset,
6383
+ // / simd<T, N> src0, simd<T, N> src1, props = {}); // (acc-au2-4)
6384
+ // /
6385
+ // / A variation of \c atomic_update API with \c byte_offset represented as
6386
+ // / a \c simd_view object and no mask operand.
6387
+ // /
6388
+ // / @tparam Op The atomic operation - can be one of the following:
6389
+ // / \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg.
6390
+ // / @tparam T The vector element type.
6391
+ // / @tparam N The number of memory locations to update.
6392
+ // / @param acc The SYCL accessor.
6393
+ // / @param byte_offset The vector of 32-bit or 64-bit offsets in bytes.
6394
+ // / @param src0 The first additional argument (new value).
6395
+ // / @param src1 The second additional argument (expected value).
6396
+ // / @param props The parameter 'props' specifies the optional compile-time
6397
+ // / properties list. Only L1/L2 properties are used.
6398
+ // Other properties are ignored.
6399
+ // / @return A vector of the old values at the memory locations before the
6400
+ // / update.
6401
+ template <atomic_op Op, typename T, int N, typename OffsetObjT,
6402
+ typename AccessorTy, typename OffsetRegionTy,
6403
+ typename PropertyListT =
6404
+ ext::oneapi::experimental::detail::empty_properties_t >
6405
+ __ESIMD_API std::enable_if_t <
6406
+ __ESIMD_DNS::get_num_args<Op>() == 2 &&
6407
+ __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
6408
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6409
+ simd<T, N>>
6410
+ atomic_update (AccessorTy acc, simd_view<OffsetObjT, OffsetRegionTy> byte_offset,
6411
+ simd<T, N> src0, simd<T, N> src1, PropertyListT props = {}) {
6412
+ simd_mask<N> mask = 1 ;
6413
+ return atomic_update<Op, T, N>(acc, byte_offset.read (), src0, src1, mask,
6414
+ props);
6415
+ }
6416
+
6198
6417
// / Variant of \c atomic_update that uses \c local_accessor as a parameter.
6199
6418
// / Atomically updates \c N memory locations represented by an accessor and
6200
6419
// / a vector of offsets and returns a vector of old
@@ -6232,35 +6451,6 @@ atomic_update(AccessorTy acc, simd<uint32_t, N> offset, simd<Tx, N> src0,
6232
6451
}
6233
6452
}
6234
6453
6235
- // / A variation of \c atomic_update API with \c offsets represented as
6236
- // / \c simd_view object.
6237
- // /
6238
- // / @tparam Op The atomic operation - can be one of the following:
6239
- // / \c atomic_op::cmpxchg, \c atomic_op::fcmpxchg.
6240
- // / @tparam Tx The vector element type.
6241
- // / @tparam N The number of memory locations to update.
6242
- // / @tparam AccessorTy type of the SYCL accessor.
6243
- // / @param acc The SYCL accessor.
6244
- // / @param offset The simd_view of 32-bit or 64-bit offsets in bytes. 64-bit
6245
- // / offsets are supported only when stateless memory accesses are enforced,
6246
- // / i.e. accessor based accesses are automatically converted to stateless
6247
- // / accesses.
6248
- // / @param src0 The first additional argument (new value).
6249
- // / @param src1 The second additional argument (expected value).
6250
- // / @param mask Operation mask, only locations with non-zero in the
6251
- // / corresponding mask element are updated.
6252
- // / @return A vector of the old values at the memory locations before the
6253
- // / update.
6254
- // /
6255
- template <atomic_op Op, typename Tx, int N, typename Toffset,
6256
- typename AccessorTy, typename RegionTy = region1d_t <Toffset, N, 1 >>
6257
- __ESIMD_API std::enable_if_t <
6258
- std::is_integral_v<Toffset> && !std::is_pointer_v<AccessorTy>, simd<Tx, N>>
6259
- atomic_update (AccessorTy acc, simd_view<Toffset, RegionTy> offsets,
6260
- simd<Tx, N> src0, simd<Tx, N> src1, simd_mask<N> mask) {
6261
- return atomic_update<Op, Tx, N>(acc, offsets.read (), src0, src1, mask);
6262
- }
6263
-
6264
6454
// / A variation of \c atomic_update API with \c offsets represented as
6265
6455
// / scalar.
6266
6456
// /
0 commit comments