Skip to content

Commit 5332773

Browse files
authored
[SYCL][ESIMD] atomic_update with data size less than 4 bytes should use LSC atomics (#13340)
SVM doesn't support less than 4 bytes on Gen12, we either get an error or the wrong answer. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 05644a4 commit 5332773

File tree

2 files changed

+142
-16
lines changed

2 files changed

+142
-16
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5862,8 +5862,8 @@ __ESIMD_API simd<T, N> slm_atomic_update_impl(simd<uint32_t, N> offsets,
58625862
template <atomic_op Op, typename T, int N>
58635863
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0, simd<T, N>>
58645864
slm_atomic_update(simd<uint32_t, N> byte_offset, simd_mask<N> mask = 1) {
5865-
// 2 byte, 8 byte types, non-power of two, and operations wider than 32 are
5866-
// supported only by LSC.
5865+
// 2 byte, 8 byte types, non-power of two, and operations wider than
5866+
// 32 are supported only by LSC.
58675867
if constexpr (sizeof(T) == 2 || sizeof(T) == 8 ||
58685868
!__ESIMD_DNS::isPowerOf2(N, 32)) {
58695869
return slm_atomic_update_impl<Op, T, N,
@@ -5942,8 +5942,8 @@ template <atomic_op Op, typename T, int N>
59425942
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1, simd<T, N>>
59435943
slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0,
59445944
simd_mask<N> mask = 1) {
5945-
// 2 byte, 8 byte types, non-power of two, and operations wider than 32 are
5946-
// supported only by LSC.
5945+
// 2 byte, 8 byte types, non-power of two, and operations wider than
5946+
// 32 are supported only by LSC.
59475947
if constexpr (sizeof(T) == 2 || sizeof(T) == 8 ||
59485948
!__ESIMD_DNS::isPowerOf2(N, 32)) {
59495949
// half and short are supported in LSC.
@@ -6031,8 +6031,8 @@ template <atomic_op Op, typename T, int N>
60316031
__ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2, simd<T, N>>
60326032
slm_atomic_update(simd<uint32_t, N> byte_offset, simd<T, N> src0,
60336033
simd<T, N> src1, simd_mask<N> mask = 1) {
6034-
// 2 byte, 8 byte types, non-power of two, and operations wider than 32 are
6035-
// supported only by LSC.
6034+
// 2 byte, 8 byte types, non-power of two, and operations wider than
6035+
// 32 are supported only by LSC.
60366036
if constexpr (sizeof(T) == 2 || sizeof(T) == 8 ||
60376037
!__ESIMD_DNS::isPowerOf2(N, 32)) {
60386038
// 2-argument lsc_atomic_update arguments order matches the standard one -
@@ -6417,7 +6417,7 @@ atomic_update(T *p, simd<Toffset, N> byte_offset, simd_mask<N> mask,
64176417
static_assert(std::is_integral_v<Toffset>, "Unsupported offset type");
64186418

64196419
if constexpr (detail::has_cache_hints<PropertyListT>() ||
6420-
!__ESIMD_DNS::isPowerOf2(N, 32)) {
6420+
!__ESIMD_DNS::isPowerOf2(N, 32) || sizeof(T) < 4) {
64216421
return detail::atomic_update_impl<
64226422
Op, T, N, detail::lsc_data_size::default_size, PropertyListT, Toffset>(
64236423
p, byte_offset, mask);
@@ -6640,7 +6640,7 @@ atomic_update(T *p, simd<Toffset, N> byte_offset, simd<T, N> src0,
66406640
if constexpr (detail::has_cache_hints<PropertyListT>() ||
66416641
(Op == atomic_op::fmin) || (Op == atomic_op::fmax) ||
66426642
(Op == atomic_op::fadd) || (Op == atomic_op::fsub) ||
6643-
!__ESIMD_DNS::isPowerOf2(N, 32)) {
6643+
!__ESIMD_DNS::isPowerOf2(N, 32) || sizeof(T) < 4) {
66446644
return detail::atomic_update_impl<
66456645
Op, T, N, detail::lsc_data_size::default_size, PropertyListT, Toffset>(
66466646
p, byte_offset, src0, mask);
@@ -6888,9 +6888,11 @@ atomic_update(T *p, simd<Toffset, N> byte_offset, simd<T, N> src0,
68886888
static_assert(std::is_integral_v<Toffset>, "Unsupported offset type");
68896889

68906890
// Use LSC atomic when cache hints are present, FP atomics is used,
6891-
// non-power of two length is used, or operation width greater than 32.
6891+
// non-power of two length is used, or operation width greater than 32, or the
6892+
// data size is less than 4 bytes.
68926893
if constexpr (detail::has_cache_hints<PropertyListT>() ||
6893-
Op == atomic_op::fcmpxchg || !__ESIMD_DNS::isPowerOf2(N, 32)) {
6894+
Op == atomic_op::fcmpxchg || !__ESIMD_DNS::isPowerOf2(N, 32) ||
6895+
sizeof(T) < 4) {
68946896
// 2-argument lsc_atomic_update arguments order matches the standard one -
68956897
// expected value first, then new value. But atomic_update uses reverse
68966898
// order, hence the src1/src0 swap.
@@ -7116,7 +7118,7 @@ atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset, simd_mask<N> mask,
71167118
static_assert(std::is_integral_v<Toffset>, "Unsupported offset type");
71177119

71187120
if constexpr (detail::has_cache_hints<PropertyListT>() ||
7119-
!detail::isPowerOf2(N, 32)) {
7121+
!detail::isPowerOf2(N, 32) || sizeof(T) < 4) {
71207122
return detail::atomic_update_impl<
71217123
Op, T, N, detail::lsc_data_size::default_size, PropertyListT>(
71227124
acc, byte_offset, mask);
@@ -7384,7 +7386,7 @@ atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset, simd<T, N> src0,
73847386
if constexpr (detail::has_cache_hints<PropertyListT>() ||
73857387
Op == atomic_op::fmin || Op == atomic_op::fmax ||
73867388
Op == atomic_op::fadd || Op == atomic_op::fsub ||
7387-
!__ESIMD_DNS::isPowerOf2(N, 32)) {
7389+
!__ESIMD_DNS::isPowerOf2(N, 32) || sizeof(T) < 4) {
73887390
return detail::atomic_update_impl<
73897391
Op, T, N, detail::lsc_data_size::default_size, PropertyListT>(
73907392
acc, byte_offset, src0, mask);
@@ -7681,9 +7683,11 @@ atomic_update(AccessorTy acc, simd<Toffset, N> byte_offset, simd<T, N> src0,
76817683
static_assert(std::is_integral_v<Toffset>, "Unsupported offset type");
76827684
static_assert(sizeof(Toffset) == 4, "Only 32 bit offset is supported");
76837685
// Use LSC atomic when cache hints are present, FP atomics is used,
7684-
// non-power of two length is used, or operation width greater than 32.
7686+
// non-power of two length is used, operation width greater than 32, or the
7687+
// data size is less than 4 bytes,
76857688
if constexpr (detail::has_cache_hints<PropertyListT>() ||
7686-
Op == atomic_op::fcmpxchg || !__ESIMD_DNS::isPowerOf2(N, 32)) {
7689+
Op == atomic_op::fcmpxchg || !__ESIMD_DNS::isPowerOf2(N, 32) ||
7690+
sizeof(T) < 4) {
76877691
// 2-argument lsc_atomic_update arguments order matches the standard one -
76887692
// expected value first, then new value. But atomic_update uses reverse
76897693
// order, hence the src1/src0 swap.

sycl/test/esimd/memory_properties.cpp

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,17 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
322322
atomic_update<atomic_op::inc, int, VL>(ptr, offsets, pred);
323323
}
324324

325+
// Try with int16_t to check that LSC atomic is generated
326+
// The result is later casted to int16, not captured here.
327+
// CHECK: call <8 x i32> @llvm.genx.lsc.xatomic.stateless.v8i32.v8i1.v8i64(<8 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <8 x i64> {{[^)]+}}, <8 x i32> undef, <8 x i32> undef, i32 0, <8 x i32> undef)
328+
{
329+
int16_t *ptr = 0;
330+
constexpr int VL = 8;
331+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
332+
auto atomic_res =
333+
atomic_update<atomic_op::inc, int16_t, VL>(ptr, offsets);
334+
}
335+
325336
// Accessor
326337

327338
// CHECK-STATEFUL: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 8, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 {{[^)]+}}, <4 x i32> undef)
@@ -377,6 +388,19 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
377388
auto atomic_res_acc =
378389
atomic_update<atomic_op::inc, int, VL>(acc, offsets, pred);
379390
}
391+
// Try with int16_t to check that LSC atomic is generated
392+
// The result is later casted to int16, not captured here.
393+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.lsc.xatomic.bti.v8i32.v8i1.v8i32(<8 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <8 x i32> {{[^)]+}}, <8 x i32> undef, <8 x i32> undef, i32 {{[^)]+}}, <8 x i32> undef)
394+
// CHECK-STATELESS: call <8 x i32> @llvm.genx.lsc.xatomic.stateless.v8i32.v8i1.v8i64(<8 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <8 x i64> {{[^)]+}}, <8 x i32> undef, <8 x i32> undef, i32 0, <8 x i32> undef)
395+
{
396+
using AccType =
397+
sycl::accessor<int16_t, 1, sycl::access::mode::read_write>;
398+
AccType *acc = nullptr;
399+
constexpr int VL = 8;
400+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
401+
auto atomic_res =
402+
atomic_update<atomic_op::inc, int16_t, VL>(*acc, offsets);
403+
}
380404
}
381405

382406
// Test atomic update with one operand.
@@ -432,6 +456,18 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
432456
auto res_atomic_8 =
433457
atomic_update<atomic_op::add, int>(ptr, offsets, add, pred);
434458

459+
// Try with int16_t to check that LSC atomic is generated
460+
// The result is later casted to int16, not captured here.
461+
// CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32>{{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef)
462+
{
463+
int16_t *ptr = 0;
464+
constexpr int VL = 4;
465+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
466+
auto add = simd<int16_t, VL>(5);
467+
auto atomic_res =
468+
atomic_update<atomic_op::add, int16_t, VL>(ptr, offsets, add);
469+
}
470+
435471
// Accessors
436472

437473
// CHECK-STATEFUL-COUNT-14: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 12, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 {{[^)]+}}, <4 x i32> undef)
@@ -483,6 +519,21 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
483519
// CHECK-STATELESS: call <4 x i32> @llvm.genx.svm.atomic.sub.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef)
484520
auto res_atomic_17 =
485521
atomic_update<atomic_op::sub, int>(acc, offsets, add, pred);
522+
523+
// Try with int16_t to check that LSC atomic is generated
524+
// The result is later casted to int16, not captured here.
525+
// CHECK-STATEFUL: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 {{[^)]+}}, <4 x i32> undef)
526+
// CHECK-STATELESS: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef)
527+
{
528+
using AccType =
529+
sycl::accessor<int16_t, 1, sycl::access::mode::read_write>;
530+
AccType *acc = nullptr;
531+
constexpr int VL = 4;
532+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
533+
auto add = simd<int16_t, VL>(5);
534+
auto atomic_res =
535+
atomic_update<atomic_op::add, int16_t, VL>(*acc, offsets, add);
536+
}
486537
}
487538

488539
// Test atomic update with two operands.
@@ -626,6 +677,19 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
626677
auto res_atomic_100 = atomic_update<atomic_op::cmpxchg, int, VL>(
627678
ptr, offsets, swap, compare, pred);
628679

680+
// Try with int16_t to check that LSC atomic is generated
681+
// The result is later casted to int16, not captured here.
682+
// CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef)
683+
{
684+
int16_t *ptr = 0;
685+
constexpr int VL = 4;
686+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
687+
simd<int16_t, VL> swap = simd<int16_t, VL>(1) * sizeof(int);
688+
auto compare = swap * 2;
689+
auto atomic_res = atomic_update<atomic_op::cmpxchg, int16_t, VL>(
690+
ptr, offsets, swap, compare);
691+
}
692+
629693
// Accessors
630694

631695
// CHECK-STATEFUL-COUNT-30: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 18, i8 1, i8 3, i16 1, i32 0, i8 3, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 {{[^)]+}}, <4 x i32> undef)
@@ -751,6 +815,22 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
751815
// CHECK-STATELESS: call <4 x i32> @llvm.genx.svm.atomic.cmpxchg.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef)
752816
auto res_atomic_33 = atomic_update<atomic_op::cmpxchg, int, VL>(
753817
acc, offsets, swap, compare, pred);
818+
819+
// Try with int16_t to check that LSC atomic is generated
820+
// The result is later casted to int16, not captured here.
821+
// CHECK-STATEFUL: call <4 x i32> @llvm.genx.lsc.xatomic.bti.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 {{[^)]+}}, <4 x i32> undef)
822+
// CHECK-STATELESS: call <4 x i32> @llvm.genx.lsc.xatomic.stateless.v4i32.v4i1.v4i64(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i64> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef)
823+
{
824+
using AccType =
825+
sycl::accessor<int16_t, 1, sycl::access::mode::read_write>;
826+
AccType *acc = nullptr;
827+
constexpr int VL = 4;
828+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
829+
simd<int16_t, VL> swap = simd<int16_t, VL>(1) * sizeof(int);
830+
auto compare = swap * 2;
831+
auto atomic_res = atomic_update<atomic_op::cmpxchg, int16_t, VL>(
832+
*acc, offsets, compare, swap);
833+
}
754834
}
755835

756836
// Test slm_atomic_update without operands.
@@ -824,12 +904,11 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
824904
{
825905
constexpr int VL = 16;
826906
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
827-
auto pred = simd_mask<VL>(1);
828907
simd<int16_t, VL> add = simd<int16_t, VL>(1) * sizeof(int);
829908

830909
// CHECK: call <16 x i32> @llvm.genx.lsc.xatomic.slm.v16i32.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <16 x i32> {{[^)]+}}, <16 x i32> {{[^)]+}}, <16 x i32> undef, i32 0, <16 x i32> undef)
831910
auto res_slm_atomic_0 =
832-
slm_atomic_update<atomic_op::add, int16_t>(offsets, add, pred);
911+
slm_atomic_update<atomic_op::add, int16_t>(offsets, add);
833912
}
834913
// Expect DWORD for fmin.
835914
{
@@ -934,6 +1013,19 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
9341013
offsets_view.select<VL, 1>(), swap_view.select<VL, 1>(),
9351014
compare_view.select<VL, 1>());
9361015

1016+
// Expect LSC for short.
1017+
{
1018+
constexpr int VL = 16;
1019+
simd<uint32_t, VL> offsets = simd<uint32_t, VL>(1) * sizeof(int16_t);
1020+
auto compare = simd<int16_t, VL>(VL, 1);
1021+
auto swap = compare * 2;
1022+
1023+
// CHECK: call <16 x i32> @llvm.genx.lsc.xatomic.slm.v16i32.v16i1.v16i32(<16 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <16 x i32> {{[^)]+}}, <16 x i32> {{[^)]+}}, <16 x i32> {{[^)]+}}, i32 0, <16 x i32> undef)
1024+
auto res_slm_atomic_0 =
1025+
slm_atomic_update<atomic_op::cmpxchg, int16_t, VL>(offsets, swap,
1026+
compare);
1027+
}
1028+
9371029
// Expect LSC for int64_t.
9381030
{
9391031
constexpr int VL = 16;
@@ -964,6 +1056,15 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
9641056
local_acc, offsets_view.select<VL, 1>(), pred);
9651057
auto res_slm_atomic_6 = atomic_update<atomic_op::inc, int, VL>(
9661058
local_acc, offsets_view.select<VL, 1>());
1059+
1060+
// Expect LSC for short.
1061+
{
1062+
using LocalAccType = sycl::local_accessor<int16_t, 1>;
1063+
LocalAccType *local_acc = nullptr;
1064+
// CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.slm.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 8, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> undef, <4 x i32> undef, i32 0, <4 x i32> undef)
1065+
auto res_slm_atomic_1 =
1066+
atomic_update<atomic_op::inc, int16_t>(*local_acc, offsets);
1067+
}
9671068
}
9681069
// One operand atomic.
9691070
{
@@ -997,6 +1098,16 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
9971098
pred);
9981099
res_slm_atomic_8 = atomic_update<atomic_op::add, int, VL>(
9991100
local_acc, offsets_view.select<VL, 1>(), add_view.select<VL, 1>());
1101+
1102+
// Expect LSC for short.
1103+
{
1104+
using LocalAccType = sycl::local_accessor<int16_t, 1>;
1105+
LocalAccType *local_acc = nullptr;
1106+
simd<int16_t, VL> add = simd<int16_t, VL>(1) * sizeof(int);
1107+
// CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.slm.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 12, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> undef, i32 0, <4 x i32> undef)
1108+
auto res_slm_atomic_1 =
1109+
atomic_update<atomic_op::add, int16_t>(*local_acc, offsets, add);
1110+
}
10001111
}
10011112
// Two operand atomic.
10021113
{
@@ -1069,6 +1180,17 @@ test_atomic_update(AccType &acc, LocalAccTypeInt local_acc, float *ptrf,
10691180
res_slm_atomic_16 = atomic_update<atomic_op::cmpxchg, int, VL>(
10701181
local_acc, offsets_view.select<VL, 1>(), swap_view.select<VL, 1>(),
10711182
compare_view.select<VL, 1>());
1183+
1184+
// Expect LSC for short.
1185+
{
1186+
using LocalAccType = sycl::local_accessor<int16_t, 1>;
1187+
LocalAccType *local_acc = nullptr;
1188+
auto compare = simd<int16_t, VL>(VL, 1);
1189+
auto swap = compare * 2;
1190+
// CHECK: call <4 x i32> @llvm.genx.lsc.xatomic.slm.v4i32.v4i1.v4i32(<4 x i1> {{[^)]+}}, i8 18, i8 0, i8 0, i16 1, i32 0, i8 6, i8 1, i8 1, i8 0, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 0, <4 x i32> undef)
1191+
auto res_slm_atomic_1 = atomic_update<atomic_op::cmpxchg, int16_t, VL>(
1192+
*local_acc, offsets, swap, compare);
1193+
}
10721194
}
10731195
}
10741196

0 commit comments

Comments
 (0)