Skip to content

Commit a6a0dea

Browse files
[SYCL][ESIMD][EMU] Add implementation of atomic operations in ESIMD emulator (#6661)
Co-authored-by: kbobrovs <[email protected]>
1 parent dd70c33 commit a6a0dea

File tree

4 files changed

+288
-209
lines changed

4 files changed

+288
-209
lines changed

sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp

Lines changed: 107 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,126 +16,205 @@ namespace __ESIMD_DNS {
1616
// This function implements atomic update of pre-existing variable in the
1717
// absense of C++ 20's atomic_ref.
1818

19-
template <typename Ty> Ty atomic_load(Ty *ptr) {
19+
// __atomic_* functions support only integral types. In order to
20+
// support floating types for certain operations like min/max,
21+
// 'cmpxchg' operation is applied for result values using
22+
// 'bridging' variables in integral type.
23+
template <typename Ty> using CmpxchgTy = __ESIMD_DNS::uint_type_t<sizeof(Ty)>;
24+
25+
template <typename Ty> inline Ty atomic_load(Ty *ptr) {
2026
#ifdef _WIN32
2127
// TODO: Windows will be supported soon
2228
__ESIMD_UNSUPPORTED_ON_HOST;
2329
#else
24-
return __atomic_load(ptr, __ATOMIC_SEQ_CST);
30+
__ESIMD_UNSUPPORTED_ON_HOST;
31+
// TODO : Enable with unit test
32+
/* return sycl::bit_cast<Ty>(__atomic_load_n((CmpxchgTy<Ty> *)ptr,
33+
__ATOMIC_SEQ_CST)); */
2534
#endif
2635
}
2736

28-
template <typename Ty> Ty atomic_store(Ty *ptr, Ty val) {
37+
template <typename Ty> inline Ty atomic_store(Ty *ptr, Ty val) {
2938
#ifdef _WIN32
3039
// TODO: Windows will be supported soon
3140
__ESIMD_UNSUPPORTED_ON_HOST;
3241
#else
33-
__atomic_store(ptr, val, __ATOMIC_SEQ_CST);
42+
Ty ret = atomic_load<Ty>((CmpxchgTy<Ty> *)ptr);
43+
__atomic_store_n((CmpxchgTy<Ty> *)ptr, val, __ATOMIC_SEQ_CST);
44+
return ret;
3445
#endif
3546
}
3647

37-
template <typename Ty> Ty atomic_add_fetch(Ty *ptr, Ty val) {
48+
template <typename Ty> inline Ty atomic_add(Ty *ptr, Ty val) {
3849
#ifdef _WIN32
3950
// TODO: Windows will be supported soon
4051
__ESIMD_UNSUPPORTED_ON_HOST;
4152
#else
42-
return __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST);
53+
if constexpr (std::is_integral_v<Ty>) {
54+
return __atomic_fetch_add(ptr, val, __ATOMIC_SEQ_CST);
55+
} else {
56+
// For Floating type
57+
Ty _old, _new;
58+
CmpxchgTy<Ty> _old_bits, _new_bits;
59+
do {
60+
_old = *ptr;
61+
_new = _old + val;
62+
_old_bits = *(CmpxchgTy<Ty> *)&_old;
63+
_new_bits = *(CmpxchgTy<Ty> *)&_new;
64+
} while (!__atomic_compare_exchange_n((CmpxchgTy<Ty> *)ptr, &_old_bits,
65+
_new_bits, false, __ATOMIC_SEQ_CST,
66+
__ATOMIC_SEQ_CST));
67+
return _old;
68+
}
4369
#endif
4470
}
4571

46-
template <typename Ty> Ty atomic_sub_fetch(Ty *ptr, Ty val) {
72+
template <typename Ty> inline Ty atomic_sub(Ty *ptr, Ty val) {
4773
#ifdef _WIN32
4874
// TODO: Windows will be supported soon
4975
__ESIMD_UNSUPPORTED_ON_HOST;
5076
#else
51-
return __atomic_sub_fetch(ptr, val, __ATOMIC_SEQ_CST);
77+
if constexpr (std::is_integral_v<Ty>) {
78+
return __atomic_fetch_sub(ptr, val, __ATOMIC_SEQ_CST);
79+
} else {
80+
// For Floating type
81+
Ty _old, _new;
82+
CmpxchgTy<Ty> _old_bits, _new_bits;
83+
do {
84+
_old = *ptr;
85+
_new = _old - val;
86+
_old_bits = *(CmpxchgTy<Ty> *)&_old;
87+
_new_bits = *(CmpxchgTy<Ty> *)&_new;
88+
} while (!__atomic_compare_exchange_n((CmpxchgTy<Ty> *)ptr, &_old_bits,
89+
_new_bits, false, __ATOMIC_SEQ_CST,
90+
__ATOMIC_SEQ_CST));
91+
return _old;
92+
}
5293
#endif
5394
}
5495

55-
template <typename Ty> Ty atomic_and_fetch(Ty *ptr, Ty val) {
96+
template <typename Ty> inline Ty atomic_and(Ty *ptr, Ty val) {
5697
#ifdef _WIN32
5798
// TODO: Windows will be supported soon
5899
__ESIMD_UNSUPPORTED_ON_HOST;
59100
#else
60-
return __atomic_and_fetch(ptr, val, __ATOMIC_SEQ_CST);
101+
static_assert(std::is_integral<Ty>::value);
102+
return __atomic_fetch_and(ptr, val, __ATOMIC_SEQ_CST);
61103
#endif
62104
}
63105

64-
template <typename Ty> Ty atomic_or_fetch(Ty *ptr, Ty val) {
106+
template <typename Ty> inline Ty atomic_or(Ty *ptr, Ty val) {
65107
#ifdef _WIN32
66108
// TODO: Windows will be supported soon
67109
__ESIMD_UNSUPPORTED_ON_HOST;
68110
#else
69-
return __atomic_or_fetch(ptr, val, __ATOMIC_SEQ_CST);
111+
static_assert(std::is_integral<Ty>::value);
112+
return __atomic_fetch_or(ptr, val, __ATOMIC_SEQ_CST);
70113
#endif
71114
}
72115

73-
template <typename Ty> Ty atomic_xor_fetch(Ty *ptr, Ty val) {
116+
template <typename Ty> inline Ty atomic_xor(Ty *ptr, Ty val) {
74117
#ifdef _WIN32
75118
// TODO: Windows will be supported soon
76119
__ESIMD_UNSUPPORTED_ON_HOST;
77120
#else
78-
return __atomic_xor_fetch(ptr, val, __ATOMIC_SEQ_CST);
121+
static_assert(std::is_integral<Ty>::value);
122+
return __atomic_fetch_xor(ptr, val, __ATOMIC_SEQ_CST);
79123
#endif
80124
}
81125

82-
template <typename Ty> Ty atomic_min(Ty *ptr, Ty val) {
126+
template <typename Ty> inline Ty atomic_min(Ty *ptr, Ty val) {
83127
#ifdef _WIN32
84128
// TODO: Windows will be supported soon
85129
__ESIMD_UNSUPPORTED_ON_HOST;
86130
#else
87-
// TODO FIXME: fix implementation for FP types.
88131
if constexpr (std::is_integral_v<Ty>) {
89132
Ty _old, _new;
90133
do {
91134
_old = *ptr;
92135
_new = std::min<Ty>(_old, val);
93136
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
94137
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
95-
return _new;
138+
return _old;
96139
} else {
97-
__ESIMD_UNSUPPORTED_ON_HOST;
140+
Ty _old, _new;
141+
CmpxchgTy<Ty> _old_bits, _new_bits;
142+
do {
143+
_old = *ptr;
144+
_new = std::min(_old, val);
145+
_old_bits = *(CmpxchgTy<Ty> *)&_old;
146+
_new_bits = *(CmpxchgTy<Ty> *)&_new;
147+
} while (!__atomic_compare_exchange_n((CmpxchgTy<Ty> *)ptr, &_old_bits,
148+
_new_bits, false, __ATOMIC_SEQ_CST,
149+
__ATOMIC_SEQ_CST));
150+
return _old;
98151
}
99152
#endif
100153
}
101154

102-
template <typename Ty> Ty atomic_max(Ty *ptr, Ty val) {
155+
template <typename Ty> inline Ty atomic_max(Ty *ptr, Ty val) {
103156
#ifdef _WIN32
104157
// TODO: Windows will be supported soon
105158
__ESIMD_UNSUPPORTED_ON_HOST;
106159
#else
107-
// TODO FIXME: fix implementation for FP types.
108160
if constexpr (std::is_integral_v<Ty>) {
109161
Ty _old, _new;
110162
do {
111163
_old = *ptr;
112164
_new = std::max<Ty>(_old, val);
113165
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
114166
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
115-
return _new;
167+
return _old;
116168
} else {
117-
__ESIMD_UNSUPPORTED_ON_HOST;
169+
Ty _old, _new;
170+
CmpxchgTy<Ty> _old_bits, _new_bits;
171+
do {
172+
_old = *ptr;
173+
_new = std::max(_old, val);
174+
_old_bits = *(CmpxchgTy<Ty> *)&_old;
175+
_new_bits = *(CmpxchgTy<Ty> *)&_new;
176+
} while (!__atomic_compare_exchange_n((CmpxchgTy<Ty> *)(CmpxchgTy<Ty> *)ptr,
177+
&_old_bits, _new_bits, false,
178+
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
179+
return _old;
118180
}
119181
#endif
120182
}
121183

122-
template <typename Ty> Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) {
184+
template <typename Ty>
185+
inline Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) {
123186
#ifdef _WIN32
124187
// TODO: Windows will be supported soon
125188
__ESIMD_UNSUPPORTED_ON_HOST;
126189
#else
127-
// TODO FIXME: fix implementation for FP types.
128190
if constexpr (std::is_integral_v<Ty>) {
129-
Ty _old = expected;
130-
__atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST,
191+
Ty local = expected;
192+
__atomic_compare_exchange_n(ptr, &local, desired, false, __ATOMIC_SEQ_CST,
131193
__ATOMIC_SEQ_CST);
132-
return *ptr;
194+
// if exchange occured, this means 'local=expected=*ptr'. So local
195+
// is returned as old val
196+
// if exchange did not occur, *ptr value compared against 'local'
197+
// is stored in 'local'. So local is returned as old val
198+
return local;
133199
} else {
134-
__ESIMD_UNSUPPORTED_ON_HOST;
200+
CmpxchgTy<Ty> desired_bits = *(CmpxchgTy<Ty> *)&desired;
201+
CmpxchgTy<Ty> local_bits = *(CmpxchgTy<Ty> *)&expected;
202+
__atomic_compare_exchange_n((CmpxchgTy<Ty> *)ptr, &local_bits, desired_bits,
203+
false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
204+
return *((Ty *)&local_bits);
135205
}
136206
#endif
137207
}
138208

209+
inline void atomic_fence() {
210+
#ifdef _WIN32
211+
// TODO: Windows will be supported soon
212+
__ESIMD_UNSUPPORTED_ON_HOST;
213+
#else
214+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
215+
#endif
216+
}
217+
139218
} // namespace __ESIMD_DNS
140219

141220
/// @endcond ESIMD_DETAIL

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,25 @@ __esimd_svm_atomic0(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
485485
;
486486
#else
487487
{
488-
__ESIMD_UNSUPPORTED_ON_HOST;
488+
__ESIMD_DNS::vector_type_t<Ty, N> Oldval = 0;
489+
490+
for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) {
491+
if (pred[AddrIdx] == 0) {
492+
// Skip Oldval vector elements correpsonding to
493+
// predicates whose value is zero
494+
continue;
495+
}
496+
if constexpr (Op == __ESIMD_NS::atomic_op::load) {
497+
Oldval[AddrIdx] = __ESIMD_DNS::atomic_load<Ty>((Ty *)addrs[AddrIdx]);
498+
} else if constexpr (Op == __ESIMD_NS::atomic_op::inc) {
499+
Oldval[AddrIdx] =
500+
__ESIMD_DNS::atomic_add<Ty>((Ty *)addrs[AddrIdx], static_cast<Ty>(1));
501+
} else if constexpr (Op == __ESIMD_NS::atomic_op::dec) {
502+
Oldval[AddrIdx] =
503+
__ESIMD_DNS::atomic_sub<Ty>((Ty *)addrs[AddrIdx], static_cast<Ty>(1));
504+
}
505+
}
506+
return Oldval;
489507
}
490508
#endif // __SYCL_DEVICE_ONLY__
491509

@@ -498,23 +516,49 @@ __esimd_svm_atomic1(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
498516
;
499517
#else
500518
{
501-
__ESIMD_DNS::vector_type_t<Ty, N> retv;
519+
__ESIMD_DNS::vector_type_t<Ty, N> Oldval;
502520

503-
for (int i = 0; i < N; i++) {
504-
if (pred[i]) {
505-
Ty *p = reinterpret_cast<Ty *>(addrs[i]);
521+
for (int AddrIdx = 0; AddrIdx < N; AddrIdx++) {
522+
if (pred[AddrIdx] == 0) {
523+
// Skip Output vector elements correpsonding to
524+
// predicates whose value is zero
525+
continue;
526+
}
506527

507-
switch (Op) {
508-
case __ESIMD_NS::atomic_op::add:
509-
retv[i] = __ESIMD_DNS::atomic_add_fetch<Ty>(p, src0[i]);
510-
break;
511-
default:
512-
__ESIMD_UNSUPPORTED_ON_HOST;
513-
}
528+
if constexpr (Op == __ESIMD_NS::atomic_op::store) {
529+
Oldval[AddrIdx] =
530+
__ESIMD_DNS::atomic_store<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
531+
} else if constexpr ((Op == __ESIMD_NS::atomic_op::add) ||
532+
(Op == __ESIMD_NS::atomic_op::fadd)) {
533+
Oldval[AddrIdx] =
534+
__ESIMD_DNS::atomic_add<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
535+
} else if constexpr ((Op == __ESIMD_NS::atomic_op::sub) ||
536+
(Op == __ESIMD_NS::atomic_op::fsub)) {
537+
Oldval[AddrIdx] =
538+
__ESIMD_DNS::atomic_sub<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
539+
} else if constexpr ((Op == __ESIMD_NS::atomic_op::minsint) ||
540+
(Op == __ESIMD_NS::atomic_op::min) ||
541+
(Op == __ESIMD_NS::atomic_op::fmin)) {
542+
Oldval[AddrIdx] =
543+
__ESIMD_DNS::atomic_min<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
544+
} else if constexpr ((Op == __ESIMD_NS::atomic_op::maxsint) ||
545+
(Op == __ESIMD_NS::atomic_op::max) ||
546+
(Op == __ESIMD_NS::atomic_op::fmax)) {
547+
Oldval[AddrIdx] =
548+
__ESIMD_DNS::atomic_max<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
549+
} else if constexpr (Op == __ESIMD_NS::atomic_op::bit_and) {
550+
Oldval[AddrIdx] =
551+
__ESIMD_DNS::atomic_and<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
552+
} else if constexpr (Op == __ESIMD_NS::atomic_op::bit_or) {
553+
Oldval[AddrIdx] =
554+
__ESIMD_DNS::atomic_or<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
555+
} else if constexpr (Op == __ESIMD_NS::atomic_op::bit_xor) {
556+
Oldval[AddrIdx] =
557+
__ESIMD_DNS::atomic_xor<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
514558
}
515559
}
516560

517-
return retv;
561+
return Oldval;
518562
}
519563
#endif // __SYCL_DEVICE_ONLY__
520564

@@ -528,7 +572,20 @@ __esimd_svm_atomic2(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
528572
;
529573
#else
530574
{
531-
__ESIMD_UNSUPPORTED_ON_HOST;
575+
__ESIMD_DNS::vector_type_t<Ty, N> Oldval;
576+
577+
for (int AddrIdx = 0; AddrIdx < N; AddrIdx++) {
578+
if (pred[AddrIdx] == 0) {
579+
// Skip Output vector elements correpsonding to
580+
// predicates whose value is zero
581+
continue;
582+
}
583+
static_assert((Op == __ESIMD_NS::atomic_op::cmpxchg) ||
584+
(Op == __ESIMD_NS::atomic_op::fcmpxchg));
585+
Oldval[AddrIdx] = __ESIMD_DNS::atomic_cmpxchg((Ty *)addrs[AddrIdx],
586+
src0[AddrIdx], src1[AddrIdx]);
587+
}
588+
return Oldval;
532589
}
533590
#endif // __SYCL_DEVICE_ONLY__
534591

@@ -557,7 +614,9 @@ __ESIMD_INTRIN void __esimd_fence(uint8_t cntl)
557614
;
558615
#else
559616
{
560-
sycl::detail::getESIMDDeviceInterface()->cm_fence_ptr();
617+
// CM_EMU's 'cm_fence' is NOP. Disabled.
618+
// sycl::detail::getESIMDDeviceInterface()->cm_fence_ptr();
619+
__ESIMD_DNS::atomic_fence();
561620
}
562621
#endif // __SYCL_DEVICE_ONLY__
563622

@@ -849,7 +908,7 @@ __esimd_dword_atomic0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
849908

850909
switch (Op) {
851910
case __ESIMD_NS::atomic_op::inc:
852-
retv[i] = __ESIMD_DNS::atomic_add_fetch<Ty>(p, 1);
911+
retv[i] = __ESIMD_DNS::atomic_add<Ty>(p, 1);
853912
break;
854913
default:
855914
__ESIMD_UNSUPPORTED_ON_HOST;

0 commit comments

Comments
 (0)