Skip to content

Commit 92c00d5

Browse files
committed
[ESIMD] Introduce atomic_update<native::lsc::fadd>(...) and similar ops.
Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent cdb0dfd commit 92c00d5

File tree

16 files changed

+676
-462
lines changed

16 files changed

+676
-462
lines changed

sycl/include/sycl/ext/intel/esimd.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@
8181

8282
#include <sycl/ext/intel/esimd/alt_ui.hpp>
8383
#include <sycl/ext/intel/esimd/common.hpp>
84-
#include <sycl/ext/intel/esimd/math.hpp>
85-
#include <sycl/ext/intel/esimd/memory.hpp>
8684
#include <sycl/ext/intel/esimd/simd.hpp>
8785
#include <sycl/ext/intel/esimd/simd_view.hpp>
8886
#include <sycl/ext/intel/experimental/esimd/kernel_properties.hpp>

sycl/include/sycl/ext/intel/esimd/common.hpp

Lines changed: 176 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111
#pragma once
1212

13+
#include <sycl/ext/intel/esimd/detail/defines_elementary.hpp>
14+
#include <sycl/ext/intel/esimd/native/common.hpp>
15+
#include <sycl/ext/intel/experimental/esimd/common.hpp>
16+
1317
#include <sycl/detail/defines.hpp>
1418

1519
#include <cstdint> // for uint* types
@@ -18,64 +22,20 @@
1822
/// @cond ESIMD_DETAIL
1923

2024
#ifdef __SYCL_DEVICE_ONLY__
21-
#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd))
22-
#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd))
23-
24-
// Mark a function being nodebug.
25-
#define ESIMD_NODEBUG __attribute__((nodebug))
26-
// Mark a "ESIMD global": accessible from all functions in current translation
27-
// unit, separate copy per subgroup (work-item), mapped to SPIR-V private
28-
// storage class.
29-
#define ESIMD_PRIVATE \
30-
__attribute__((opencl_private)) __attribute__((sycl_explicit_simd))
31-
// Bind a ESIMD global variable to a specific register.
32-
#define ESIMD_REGISTER(n) __attribute__((register_num(n)))
33-
34-
#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE
35-
3625
#define __ESIMD_UNSUPPORTED_ON_HOST
37-
3826
#else // __SYCL_DEVICE_ONLY__
39-
#define SYCL_ESIMD_KERNEL
40-
#define SYCL_ESIMD_FUNCTION
41-
42-
// TODO ESIMD define what this means on Windows host
43-
#define ESIMD_NODEBUG
44-
// On host device ESIMD global is a thread local static var. This assumes that
45-
// each work-item is mapped to a separate OS thread on host device.
46-
#define ESIMD_PRIVATE thread_local
47-
#define ESIMD_REGISTER(n)
48-
49-
#define __ESIMD_API ESIMD_INLINE
50-
5127
#define __ESIMD_UNSUPPORTED_ON_HOST \
5228
throw sycl::exception(sycl::errc::feature_not_supported, \
5329
"This ESIMD feature is not supported on HOST")
54-
5530
#endif // __SYCL_DEVICE_ONLY__
5631

57-
// Mark a function being noinline
58-
#define ESIMD_NOINLINE __attribute__((noinline))
59-
// Force a function to be inlined. 'inline' is used to preserve ODR for
60-
// functions defined in a header.
61-
#define ESIMD_INLINE inline __attribute__((always_inline))
62-
63-
// Macros for internal use
64-
#define __ESIMD_NS sycl::ext::intel::esimd
65-
#define __ESIMD_DNS sycl::ext::intel::esimd::detail
66-
#define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail
67-
68-
#define __ESIMD_QUOTE1(m) #m
69-
#define __ESIMD_QUOTE(m) __ESIMD_QUOTE1(m)
70-
#define __ESIMD_NS_QUOTED __ESIMD_QUOTE(__ESIMD_NS)
71-
#define __ESIMD_DEPRECATED(new_api) \
72-
__SYCL_DEPRECATED("use " __ESIMD_NS_QUOTED "::" __ESIMD_QUOTE(new_api))
73-
7432
/// @endcond ESIMD_DETAIL
7533

7634
namespace sycl {
7735
__SYCL_INLINE_VER_NAMESPACE(_V1) {
78-
namespace ext::intel::esimd {
36+
namespace ext {
37+
namespace intel {
38+
namespace esimd {
7939

8040
/// @addtogroup sycl_esimd_core
8141
/// @{
@@ -106,6 +66,16 @@ enum class rgba_channel : uint8_t { R, G, B, A };
10666
using SurfaceIndex = unsigned int;
10767

10868
namespace detail {
69+
70+
/// Check if a given 32 bit positive integer is a power of 2 at compile time.
71+
ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) {
72+
return (n & (n - 1)) == 0;
73+
}
74+
75+
ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) {
76+
return (n & (n - 1)) == 0 && n <= limit;
77+
}
78+
10979
template <rgba_channel Ch>
11080
static inline constexpr uint8_t ch = 1 << static_cast<int>(Ch);
11181
static inline constexpr uint8_t chR = ch<rgba_channel::R>;
@@ -151,6 +121,10 @@ constexpr int get_num_channels_enabled(rgba_channel_mask M) {
151121
is_channel_enabled(M, rgba_channel::A);
152122
}
153123

124+
#define __ESIMD_USM_DWORD_ATOMIC_TO_LSC \
125+
" is supported only on ACM, PVC. USM-based atomic will be auto-converted " \
126+
"to LSC version."
127+
154128
/// Represents an atomic operation. Operations always return the old value(s) of
155129
/// the target memory location(s) as it was before the operation was applied.
156130
/// Each operation is annotated with a pseudocode illustrating its semantics,
@@ -185,23 +159,172 @@ enum class atomic_op : uint8_t {
185159
/// Maximum (signed integer): <code>*addr = max(*addr, src0)</code>.
186160
maxsint = 0xc,
187161
/// Minimum (floating point): <code>*addr = min(*addr, src0)</code>.
188-
fmax = 0x10,
162+
fmax __SYCL_DEPRECATED("fmax" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x10,
189163
/// Maximum (floating point): <code>*addr = max(*addr, src0)</code>.
190-
fmin = 0x11,
164+
fmin __SYCL_DEPRECATED("fmin" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x11,
191165
/// Compare and exchange (floating point).
192166
/// <code>if (*addr == src0) *addr = src1;</code>
193-
fcmpwr = 0x12,
194-
fadd = 0x13,
195-
fsub = 0x14,
167+
fcmpwr __SYCL_DEPRECATED("fcmpwr" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x12,
168+
fadd __SYCL_DEPRECATED("fadd" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x13,
169+
fsub __SYCL_DEPRECATED("fsub" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x14,
196170
load = 0x15,
197171
store = 0x16,
198172
/// Decrement: <code>*addr = *addr - 1</code>. The only operation which
199173
/// returns new value of the destination rather than old.
200174
predec = 0xff,
201175
};
202176

177+
#undef __ESIMD_USM_DWORD_TO_LSC_MSG
178+
203179
/// @} sycl_esimd_core
204180

205-
} // namespace ext::intel::esimd
181+
namespace detail {
182+
template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() {
183+
if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc ||
184+
Op == __ESIMD_NS::native::lsc::atomic_op::dec ||
185+
Op == __ESIMD_NS::native::lsc::atomic_op::load) {
186+
return 0;
187+
} else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store ||
188+
Op == __ESIMD_NS::native::lsc::atomic_op::add ||
189+
Op == __ESIMD_NS::native::lsc::atomic_op::sub ||
190+
Op == __ESIMD_NS::native::lsc::atomic_op::minsint ||
191+
Op == __ESIMD_NS::native::lsc::atomic_op::maxsint ||
192+
Op == __ESIMD_NS::native::lsc::atomic_op::min ||
193+
Op == __ESIMD_NS::native::lsc::atomic_op::max ||
194+
Op == __ESIMD_NS::native::lsc::atomic_op::fadd ||
195+
Op == __ESIMD_NS::native::lsc::atomic_op::fsub ||
196+
Op == __ESIMD_NS::native::lsc::atomic_op::fmin ||
197+
Op == __ESIMD_NS::native::lsc::atomic_op::fmax ||
198+
Op == __ESIMD_NS::native::lsc::atomic_op::bit_and ||
199+
Op == __ESIMD_NS::native::lsc::atomic_op::bit_or ||
200+
Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) {
201+
return 1;
202+
} else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg ||
203+
Op == __ESIMD_NS::native::lsc::atomic_op::fcmpwr) {
204+
return 2;
205+
} else {
206+
return -1; // error
207+
}
208+
}
209+
210+
template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() {
211+
switch (Op) {
212+
case __ESIMD_NS::atomic_op::xchg:
213+
case __ESIMD_NS::atomic_op::predec:
214+
return false;
215+
default:
216+
return true;
217+
}
218+
}
219+
220+
template <__ESIMD_NS::atomic_op Op>
221+
constexpr __ESIMD_NS::native::lsc::atomic_op to_lsc_atomic_op() {
222+
switch (Op) {
223+
case __ESIMD_NS::atomic_op::add:
224+
return __ESIMD_NS::native::lsc::atomic_op::add;
225+
case __ESIMD_NS::atomic_op::sub:
226+
return __ESIMD_NS::native::lsc::atomic_op::sub;
227+
case __ESIMD_NS::atomic_op::inc:
228+
return __ESIMD_NS::native::lsc::atomic_op::inc;
229+
case __ESIMD_NS::atomic_op::dec:
230+
return __ESIMD_NS::native::lsc::atomic_op::dec;
231+
case __ESIMD_NS::atomic_op::min:
232+
return __ESIMD_NS::native::lsc::atomic_op::min;
233+
case __ESIMD_NS::atomic_op::max:
234+
return __ESIMD_NS::native::lsc::atomic_op::max;
235+
case __ESIMD_NS::atomic_op::cmpxchg:
236+
return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
237+
case __ESIMD_NS::atomic_op::bit_and:
238+
return __ESIMD_NS::native::lsc::atomic_op::bit_and;
239+
case __ESIMD_NS::atomic_op::bit_or:
240+
return __ESIMD_NS::native::lsc::atomic_op::bit_or;
241+
case __ESIMD_NS::atomic_op::bit_xor:
242+
return __ESIMD_NS::native::lsc::atomic_op::bit_xor;
243+
case __ESIMD_NS::atomic_op::minsint:
244+
return __ESIMD_NS::native::lsc::atomic_op::minsint;
245+
case __ESIMD_NS::atomic_op::maxsint:
246+
return __ESIMD_NS::native::lsc::atomic_op::maxsint;
247+
case __ESIMD_NS::atomic_op::fmax:
248+
return __ESIMD_NS::native::lsc::atomic_op::fmax;
249+
case __ESIMD_NS::atomic_op::fmin:
250+
return __ESIMD_NS::native::lsc::atomic_op::fmin;
251+
case __ESIMD_NS::atomic_op::fcmpwr:
252+
return __ESIMD_NS::native::lsc::atomic_op::fcmpwr;
253+
case __ESIMD_NS::atomic_op::fadd:
254+
return __ESIMD_NS::native::lsc::atomic_op::fadd;
255+
case __ESIMD_NS::atomic_op::fsub:
256+
return __ESIMD_NS::native::lsc::atomic_op::fsub;
257+
case __ESIMD_NS::atomic_op::load:
258+
return __ESIMD_NS::native::lsc::atomic_op::load;
259+
case __ESIMD_NS::atomic_op::store:
260+
return __ESIMD_NS::native::lsc::atomic_op::store;
261+
default:
262+
static_assert(has_lsc_equivalent<Op>() && "Unsupported LSC atomic op");
263+
}
264+
}
265+
266+
template <__ESIMD_NS::native::lsc::atomic_op Op>
267+
constexpr __ESIMD_NS::atomic_op to_atomic_op() {
268+
switch (Op) {
269+
case __ESIMD_NS::native::lsc::atomic_op::add:
270+
return __ESIMD_NS::atomic_op::add;
271+
case __ESIMD_NS::native::lsc::atomic_op::sub:
272+
return __ESIMD_NS::atomic_op::sub;
273+
case __ESIMD_NS::native::lsc::atomic_op::inc:
274+
return __ESIMD_NS::atomic_op::inc;
275+
case __ESIMD_NS::native::lsc::atomic_op::dec:
276+
return __ESIMD_NS::atomic_op::dec;
277+
case __ESIMD_NS::native::lsc::atomic_op::min:
278+
return __ESIMD_NS::atomic_op::min;
279+
case __ESIMD_NS::native::lsc::atomic_op::max:
280+
return __ESIMD_NS::atomic_op::max;
281+
case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
282+
return __ESIMD_NS::atomic_op::cmpxchg;
283+
case __ESIMD_NS::native::lsc::atomic_op::bit_and:
284+
return __ESIMD_NS::atomic_op::bit_and;
285+
case __ESIMD_NS::native::lsc::atomic_op::bit_or:
286+
return __ESIMD_NS::atomic_op::bit_or;
287+
case __ESIMD_NS::native::lsc::atomic_op::bit_xor:
288+
return __ESIMD_NS::atomic_op::bit_xor;
289+
case __ESIMD_NS::native::lsc::atomic_op::minsint:
290+
return __ESIMD_NS::atomic_op::minsint;
291+
case __ESIMD_NS::native::lsc::atomic_op::maxsint:
292+
return __ESIMD_NS::atomic_op::maxsint;
293+
case __ESIMD_NS::native::lsc::atomic_op::fmax:
294+
return __ESIMD_NS::atomic_op::fmax;
295+
case __ESIMD_NS::native::lsc::atomic_op::fmin:
296+
return __ESIMD_NS::atomic_op::fmin;
297+
case __ESIMD_NS::native::lsc::atomic_op::fcmpwr:
298+
return __ESIMD_NS::atomic_op::fcmpwr;
299+
case __ESIMD_NS::native::lsc::atomic_op::fadd:
300+
return __ESIMD_NS::atomic_op::fadd;
301+
case __ESIMD_NS::native::lsc::atomic_op::fsub:
302+
return __ESIMD_NS::atomic_op::fsub;
303+
case __ESIMD_NS::native::lsc::atomic_op::load:
304+
return __ESIMD_NS::atomic_op::load;
305+
case __ESIMD_NS::native::lsc::atomic_op::store:
306+
return __ESIMD_NS::atomic_op::store;
307+
}
308+
}
309+
310+
template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() {
311+
if constexpr (has_lsc_equivalent<Op>()) {
312+
return get_num_args<to_lsc_atomic_op<Op>()>();
313+
} else {
314+
switch (Op) {
315+
case __ESIMD_NS::atomic_op::xchg:
316+
case __ESIMD_NS::atomic_op::predec:
317+
return 1;
318+
default:
319+
return -1; // error
320+
}
321+
}
322+
}
323+
324+
} // namespace detail
325+
326+
} // namespace esimd
327+
} // namespace intel
328+
} // namespace ext
206329
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
207330
} // namespace sycl

sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,18 @@ template <typename Ty> Ty atomic_min(Ty *ptr, Ty val) {
8484
// TODO: Windows will be supported soon
8585
__ESIMD_UNSUPPORTED_ON_HOST;
8686
#else
87-
Ty _old, _new;
88-
do {
89-
_old = *ptr;
90-
_new = std::min<Ty>(_old, val);
91-
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
92-
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
93-
return _new;
87+
// TODO FIXME: fix implementation for FP types.
88+
if constexpr (std::is_integral_v<Ty>) {
89+
Ty _old, _new;
90+
do {
91+
_old = *ptr;
92+
_new = std::min<Ty>(_old, val);
93+
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
94+
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
95+
return _new;
96+
} else {
97+
__ESIMD_UNSUPPORTED_ON_HOST;
98+
}
9499
#endif
95100
}
96101

@@ -99,13 +104,18 @@ template <typename Ty> Ty atomic_max(Ty *ptr, Ty val) {
99104
// TODO: Windows will be supported soon
100105
__ESIMD_UNSUPPORTED_ON_HOST;
101106
#else
102-
Ty _old, _new;
103-
do {
104-
_old = *ptr;
105-
_new = std::max<Ty>(_old, val);
106-
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
107-
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
108-
return _new;
107+
// TODO FIXME: fix implementation for FP types.
108+
if constexpr (std::is_integral_v<Ty>) {
109+
Ty _old, _new;
110+
do {
111+
_old = *ptr;
112+
_new = std::max<Ty>(_old, val);
113+
} while (!__atomic_compare_exchange_n(ptr, &_old, _new, false,
114+
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
115+
return _new;
116+
} else {
117+
__ESIMD_UNSUPPORTED_ON_HOST;
118+
}
109119
#endif
110120
}
111121

@@ -114,10 +124,15 @@ template <typename Ty> Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) {
114124
// TODO: Windows will be supported soon
115125
__ESIMD_UNSUPPORTED_ON_HOST;
116126
#else
117-
Ty _old = expected;
118-
__atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST,
119-
__ATOMIC_SEQ_CST);
120-
return *ptr;
127+
// TODO FIXME: fix implementation for FP types.
128+
if constexpr (std::is_integral_v<Ty>) {
129+
Ty _old = expected;
130+
__atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST,
131+
__ATOMIC_SEQ_CST);
132+
return *ptr;
133+
} else {
134+
__ESIMD_UNSUPPORTED_ON_HOST;
135+
}
121136
#endif
122137
}
123138

0 commit comments

Comments
 (0)