Skip to content

Commit 2eb9d4b

Browse files
authored
[ESIMD] Introduce atomic_update<native::lsc::fadd>(...) and similar ops. (#6629)
* [ESIMD] Introduce atomic_update<native::lsc::fadd>(...) and similar ops. - Introduce sycl::ext::intel::esimd::native::lsc::atomic_op - When esimd::atomic_update is invoked with esimd::native::lsc::atomic_op (rather than esimd::atomic_op), then implementation is redirected to Xe-specific LSC-based implementation (won't run until Gen12) - Using FP operations from esimd::atomic_op cause warning and are replaced with corresponding LSC operations. - A number of code chunks moved to fit header inclusion order (distribution of auxiliary code in headers needs to be revised some day)
1 parent 372cc94 commit 2eb9d4b

File tree

16 files changed

+660
-465
lines changed

16 files changed

+660
-465
lines changed

sycl/include/sycl/ext/intel/esimd.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@
8181

8282
#include <sycl/ext/intel/esimd/alt_ui.hpp>
8383
#include <sycl/ext/intel/esimd/common.hpp>
84-
#include <sycl/ext/intel/esimd/math.hpp>
85-
#include <sycl/ext/intel/esimd/memory.hpp>
8684
#include <sycl/ext/intel/esimd/simd.hpp>
8785
#include <sycl/ext/intel/esimd/simd_view.hpp>
8886
#include <sycl/ext/intel/experimental/esimd/kernel_properties.hpp>

sycl/include/sycl/ext/intel/esimd/common.hpp

Lines changed: 182 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111
#pragma once
1212

13+
#include <sycl/ext/intel/esimd/detail/defines_elementary.hpp>
14+
#include <sycl/ext/intel/esimd/native/common.hpp>
15+
#include <sycl/ext/intel/experimental/esimd/common.hpp>
16+
1317
#include <sycl/detail/defines.hpp>
1418

1519
#include <cstdint> // for uint* types
@@ -18,59 +22,13 @@
1822
/// @cond ESIMD_DETAIL
1923

2024
#ifdef __SYCL_DEVICE_ONLY__
21-
#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd))
22-
#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd))
23-
24-
// Mark a function being nodebug.
25-
#define ESIMD_NODEBUG __attribute__((nodebug))
26-
// Mark a "ESIMD global": accessible from all functions in current translation
27-
// unit, separate copy per subgroup (work-item), mapped to SPIR-V private
28-
// storage class.
29-
#define ESIMD_PRIVATE \
30-
__attribute__((opencl_private)) __attribute__((sycl_explicit_simd))
31-
// Bind a ESIMD global variable to a specific register.
32-
#define ESIMD_REGISTER(n) __attribute__((register_num(n)))
33-
34-
#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE
35-
3625
#define __ESIMD_UNSUPPORTED_ON_HOST
37-
3826
#else // __SYCL_DEVICE_ONLY__
39-
#define SYCL_ESIMD_KERNEL
40-
#define SYCL_ESIMD_FUNCTION
41-
42-
// TODO ESIMD define what this means on Windows host
43-
#define ESIMD_NODEBUG
44-
// On host device ESIMD global is a thread local static var. This assumes that
45-
// each work-item is mapped to a separate OS thread on host device.
46-
#define ESIMD_PRIVATE thread_local
47-
#define ESIMD_REGISTER(n)
48-
49-
#define __ESIMD_API ESIMD_INLINE
50-
5127
#define __ESIMD_UNSUPPORTED_ON_HOST \
5228
throw sycl::exception(sycl::errc::feature_not_supported, \
5329
"This ESIMD feature is not supported on HOST")
54-
5530
#endif // __SYCL_DEVICE_ONLY__
5631

57-
// Mark a function being noinline
58-
#define ESIMD_NOINLINE __attribute__((noinline))
59-
// Force a function to be inlined. 'inline' is used to preserve ODR for
60-
// functions defined in a header.
61-
#define ESIMD_INLINE inline __attribute__((always_inline))
62-
63-
// Macros for internal use
64-
#define __ESIMD_NS sycl::ext::intel::esimd
65-
#define __ESIMD_DNS sycl::ext::intel::esimd::detail
66-
#define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail
67-
68-
#define __ESIMD_QUOTE1(m) #m
69-
#define __ESIMD_QUOTE(m) __ESIMD_QUOTE1(m)
70-
#define __ESIMD_NS_QUOTED __ESIMD_QUOTE(__ESIMD_NS)
71-
#define __ESIMD_DEPRECATED(new_api) \
72-
__SYCL_DEPRECATED("use " __ESIMD_NS_QUOTED "::" __ESIMD_QUOTE(new_api))
73-
7432
/// @endcond ESIMD_DETAIL
7533

7634
namespace sycl {
@@ -106,6 +64,19 @@ enum class rgba_channel : uint8_t { R, G, B, A };
10664
using SurfaceIndex = unsigned int;
10765

10866
namespace detail {
67+
68+
/// Check if a given 32 bit positive integer is a power of 2 at compile time.
69+
ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) {
70+
return (n & (n - 1)) == 0;
71+
}
72+
73+
/// Check at compile time if given 32 bit positive integer is both:
74+
/// - a power of 2
75+
/// - less or equal to given limit
76+
ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) {
77+
return (n & (n - 1)) == 0 && n <= limit;
78+
}
79+
10980
template <rgba_channel Ch>
11081
static inline constexpr uint8_t ch = 1 << static_cast<int>(Ch);
11182
static inline constexpr uint8_t chR = ch<rgba_channel::R>;
@@ -151,6 +122,10 @@ constexpr int get_num_channels_enabled(rgba_channel_mask M) {
151122
is_channel_enabled(M, rgba_channel::A);
152123
}
153124

125+
#define __ESIMD_USM_DWORD_ATOMIC_TO_LSC \
126+
" is supported only on ACM, PVC. USM-based atomic will be auto-converted " \
127+
"to LSC version."
128+
154129
/// Represents an atomic operation. Operations always return the old value(s) of
155130
/// the target memory location(s) as it was before the operation was applied.
156131
/// Each operation is annotated with a pseudocode illustrating its semantics,
@@ -167,9 +142,11 @@ enum class atomic_op : uint8_t {
167142
/// Decrement: <code>*addr = *addr - 1</code>.
168143
dec = 0x3,
169144
/// Minimum: <code>*addr = min(*addr, src0)</code>.
170-
min = 0x4,
145+
umin = 0x4,
146+
min __SYCL_DEPRECATED("use umin") = umin,
171147
/// Maximum: <code>*addr = max(*addr, src0)</code>.
172-
max = 0x5,
148+
umax = 0x5,
149+
max __SYCL_DEPRECATED("use smax") = umax,
173150
/// Exchange. <code>*addr == src0;</code>
174151
xchg = 0x6,
175152
/// Compare and exchange. <code>if (*addr == src0) *sddr = src1;</code>
@@ -181,27 +158,177 @@ enum class atomic_op : uint8_t {
181158
/// Bit \c xor: <code>*addr = *addr | src0</code>.
182159
bit_xor = 0xa,
183160
/// Minimum (signed integer): <code>*addr = min(*addr, src0)</code>.
184-
minsint = 0xb,
161+
smin = 0xb,
162+
minsint __SYCL_DEPRECATED("use smin") = smin,
185163
/// Maximum (signed integer): <code>*addr = max(*addr, src0)</code>.
186-
maxsint = 0xc,
164+
smax = 0xc,
165+
maxsint __SYCL_DEPRECATED("use smax") = 0xc,
187166
/// Minimum (floating point): <code>*addr = min(*addr, src0)</code>.
188-
fmax = 0x10,
167+
fmax __SYCL_DEPRECATED("fmax" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x10,
189168
/// Maximum (floating point): <code>*addr = max(*addr, src0)</code>.
190-
fmin = 0x11,
169+
fmin __SYCL_DEPRECATED("fmin" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x11,
191170
/// Compare and exchange (floating point).
192171
/// <code>if (*addr == src0) *addr = src1;</code>
193-
fcmpwr = 0x12,
194-
fadd = 0x13,
195-
fsub = 0x14,
172+
fcmpxchg = 0x12,
173+
fcmpwr __SYCL_DEPRECATED("fcmpwr" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = fcmpxchg,
174+
fadd __SYCL_DEPRECATED("fadd" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x13,
175+
fsub __SYCL_DEPRECATED("fsub" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x14,
196176
load = 0x15,
197177
store = 0x16,
198178
/// Decrement: <code>*addr = *addr - 1</code>. The only operation which
199179
/// returns new value of the destination rather than old.
200180
predec = 0xff,
201181
};
202182

183+
#undef __ESIMD_USM_DWORD_TO_LSC_MSG
184+
203185
/// @} sycl_esimd_core
204186

187+
namespace detail {
188+
template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() {
189+
if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc ||
190+
Op == __ESIMD_NS::native::lsc::atomic_op::dec ||
191+
Op == __ESIMD_NS::native::lsc::atomic_op::load) {
192+
return 0;
193+
} else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store ||
194+
Op == __ESIMD_NS::native::lsc::atomic_op::add ||
195+
Op == __ESIMD_NS::native::lsc::atomic_op::sub ||
196+
Op == __ESIMD_NS::native::lsc::atomic_op::smin ||
197+
Op == __ESIMD_NS::native::lsc::atomic_op::smax ||
198+
Op == __ESIMD_NS::native::lsc::atomic_op::umin ||
199+
Op == __ESIMD_NS::native::lsc::atomic_op::umax ||
200+
Op == __ESIMD_NS::native::lsc::atomic_op::fadd ||
201+
Op == __ESIMD_NS::native::lsc::atomic_op::fsub ||
202+
Op == __ESIMD_NS::native::lsc::atomic_op::fmin ||
203+
Op == __ESIMD_NS::native::lsc::atomic_op::fmax ||
204+
Op == __ESIMD_NS::native::lsc::atomic_op::bit_and ||
205+
Op == __ESIMD_NS::native::lsc::atomic_op::bit_or ||
206+
Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) {
207+
return 1;
208+
} else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg ||
209+
Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) {
210+
return 2;
211+
} else {
212+
return -1; // error
213+
}
214+
}
215+
216+
template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() {
217+
switch (Op) {
218+
case __ESIMD_NS::atomic_op::xchg:
219+
case __ESIMD_NS::atomic_op::predec:
220+
return false;
221+
default:
222+
return true;
223+
}
224+
}
225+
226+
template <__ESIMD_NS::atomic_op Op>
227+
constexpr __ESIMD_NS::native::lsc::atomic_op to_lsc_atomic_op() {
228+
switch (Op) {
229+
case __ESIMD_NS::atomic_op::add:
230+
return __ESIMD_NS::native::lsc::atomic_op::add;
231+
case __ESIMD_NS::atomic_op::sub:
232+
return __ESIMD_NS::native::lsc::atomic_op::sub;
233+
case __ESIMD_NS::atomic_op::inc:
234+
return __ESIMD_NS::native::lsc::atomic_op::inc;
235+
case __ESIMD_NS::atomic_op::dec:
236+
return __ESIMD_NS::native::lsc::atomic_op::dec;
237+
case __ESIMD_NS::atomic_op::min:
238+
return __ESIMD_NS::native::lsc::atomic_op::umin;
239+
case __ESIMD_NS::atomic_op::max:
240+
return __ESIMD_NS::native::lsc::atomic_op::umax;
241+
case __ESIMD_NS::atomic_op::cmpxchg:
242+
return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
243+
case __ESIMD_NS::atomic_op::bit_and:
244+
return __ESIMD_NS::native::lsc::atomic_op::bit_and;
245+
case __ESIMD_NS::atomic_op::bit_or:
246+
return __ESIMD_NS::native::lsc::atomic_op::bit_or;
247+
case __ESIMD_NS::atomic_op::bit_xor:
248+
return __ESIMD_NS::native::lsc::atomic_op::bit_xor;
249+
case __ESIMD_NS::atomic_op::minsint:
250+
return __ESIMD_NS::native::lsc::atomic_op::smin;
251+
case __ESIMD_NS::atomic_op::maxsint:
252+
return __ESIMD_NS::native::lsc::atomic_op::smax;
253+
case __ESIMD_NS::atomic_op::fmax:
254+
return __ESIMD_NS::native::lsc::atomic_op::fmax;
255+
case __ESIMD_NS::atomic_op::fmin:
256+
return __ESIMD_NS::native::lsc::atomic_op::fmin;
257+
case __ESIMD_NS::atomic_op::fcmpwr:
258+
return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
259+
case __ESIMD_NS::atomic_op::fadd:
260+
return __ESIMD_NS::native::lsc::atomic_op::fadd;
261+
case __ESIMD_NS::atomic_op::fsub:
262+
return __ESIMD_NS::native::lsc::atomic_op::fsub;
263+
case __ESIMD_NS::atomic_op::load:
264+
return __ESIMD_NS::native::lsc::atomic_op::load;
265+
case __ESIMD_NS::atomic_op::store:
266+
return __ESIMD_NS::native::lsc::atomic_op::store;
267+
default:
268+
static_assert(has_lsc_equivalent<Op>() && "Unsupported LSC atomic op");
269+
}
270+
}
271+
272+
template <__ESIMD_NS::native::lsc::atomic_op Op>
273+
constexpr __ESIMD_NS::atomic_op to_atomic_op() {
274+
switch (Op) {
275+
case __ESIMD_NS::native::lsc::atomic_op::add:
276+
return __ESIMD_NS::atomic_op::add;
277+
case __ESIMD_NS::native::lsc::atomic_op::sub:
278+
return __ESIMD_NS::atomic_op::sub;
279+
case __ESIMD_NS::native::lsc::atomic_op::inc:
280+
return __ESIMD_NS::atomic_op::inc;
281+
case __ESIMD_NS::native::lsc::atomic_op::dec:
282+
return __ESIMD_NS::atomic_op::dec;
283+
case __ESIMD_NS::native::lsc::atomic_op::umin:
284+
return __ESIMD_NS::atomic_op::min;
285+
case __ESIMD_NS::native::lsc::atomic_op::umax:
286+
return __ESIMD_NS::atomic_op::max;
287+
case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
288+
return __ESIMD_NS::atomic_op::cmpxchg;
289+
case __ESIMD_NS::native::lsc::atomic_op::bit_and:
290+
return __ESIMD_NS::atomic_op::bit_and;
291+
case __ESIMD_NS::native::lsc::atomic_op::bit_or:
292+
return __ESIMD_NS::atomic_op::bit_or;
293+
case __ESIMD_NS::native::lsc::atomic_op::bit_xor:
294+
return __ESIMD_NS::atomic_op::bit_xor;
295+
case __ESIMD_NS::native::lsc::atomic_op::smin:
296+
return __ESIMD_NS::atomic_op::minsint;
297+
case __ESIMD_NS::native::lsc::atomic_op::smax:
298+
return __ESIMD_NS::atomic_op::maxsint;
299+
case __ESIMD_NS::native::lsc::atomic_op::fmax:
300+
return __ESIMD_NS::atomic_op::fmax;
301+
case __ESIMD_NS::native::lsc::atomic_op::fmin:
302+
return __ESIMD_NS::atomic_op::fmin;
303+
case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
304+
return __ESIMD_NS::atomic_op::fcmpwr;
305+
case __ESIMD_NS::native::lsc::atomic_op::fadd:
306+
return __ESIMD_NS::atomic_op::fadd;
307+
case __ESIMD_NS::native::lsc::atomic_op::fsub:
308+
return __ESIMD_NS::atomic_op::fsub;
309+
case __ESIMD_NS::native::lsc::atomic_op::load:
310+
return __ESIMD_NS::atomic_op::load;
311+
case __ESIMD_NS::native::lsc::atomic_op::store:
312+
return __ESIMD_NS::atomic_op::store;
313+
}
314+
}
315+
316+
template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() {
317+
if constexpr (has_lsc_equivalent<Op>()) {
318+
return get_num_args<to_lsc_atomic_op<Op>()>();
319+
} else {
320+
switch (Op) {
321+
case __ESIMD_NS::atomic_op::xchg:
322+
case __ESIMD_NS::atomic_op::predec:
323+
return 1;
324+
default:
325+
return -1; // error
326+
}
327+
}
328+
}
329+
330+
} // namespace detail
331+
205332
} // namespace ext::intel::esimd
206333
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
207334
} // namespace sycl

sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,18 @@ template <typename Ty> Ty atomic_min(Ty *ptr, Ty val) {
8484
// TODO: Windows will be supported soon
8585
__ESIMD_UNSUPPORTED_ON_HOST;
8686
#else
87-
Ty _old, _new;
88-
do {
89-
_old = *ptr;
90-
_new = std::min<Ty>(_old, val);
91-
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
92-
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
93-
return _new;
87+
// TODO FIXME: fix implementation for FP types.
88+
if constexpr (std::is_integral_v<Ty>) {
89+
Ty _old, _new;
90+
do {
91+
_old = *ptr;
92+
_new = std::min<Ty>(_old, val);
93+
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
94+
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
95+
return _new;
96+
} else {
97+
__ESIMD_UNSUPPORTED_ON_HOST;
98+
}
9499
#endif
95100
}
96101

@@ -99,13 +104,18 @@ template <typename Ty> Ty atomic_max(Ty *ptr, Ty val) {
99104
// TODO: Windows will be supported soon
100105
__ESIMD_UNSUPPORTED_ON_HOST;
101106
#else
102-
Ty _old, _new;
103-
do {
104-
_old = *ptr;
105-
_new = std::max<Ty>(_old, val);
106-
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
107-
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
108-
return _new;
107+
// TODO FIXME: fix implementation for FP types.
108+
if constexpr (std::is_integral_v<Ty>) {
109+
Ty _old, _new;
110+
do {
111+
_old = *ptr;
112+
_new = std::max<Ty>(_old, val);
113+
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
114+
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
115+
return _new;
116+
} else {
117+
__ESIMD_UNSUPPORTED_ON_HOST;
118+
}
109119
#endif
110120
}
111121

@@ -114,10 +124,15 @@ template <typename Ty> Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) {
114124
// TODO: Windows will be supported soon
115125
__ESIMD_UNSUPPORTED_ON_HOST;
116126
#else
117-
Ty _old = expected;
118-
__atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST,
119-
__ATOMIC_SEQ_CST);
120-
return *ptr;
127+
// TODO FIXME: fix implementation for FP types.
128+
if constexpr (std::is_integral_v<Ty>) {
129+
Ty _old = expected;
130+
__atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST,
131+
__ATOMIC_SEQ_CST);
132+
return *ptr;
133+
} else {
134+
__ESIMD_UNSUPPORTED_ON_HOST;
135+
}
121136
#endif
122137
}
123138

0 commit comments

Comments
 (0)