10
10
11
11
#pragma once
12
12
13
+ #include < sycl/ext/intel/esimd/detail/defines_elementary.hpp>
14
+ #include < sycl/ext/intel/esimd/native/common.hpp>
15
+ #include < sycl/ext/intel/experimental/esimd/common.hpp>
16
+
13
17
#include < sycl/detail/defines.hpp>
14
18
15
19
#include < cstdint> // for uint* types
18
22
// / @cond ESIMD_DETAIL
19
23
20
24
#ifdef __SYCL_DEVICE_ONLY__
21
- #define SYCL_ESIMD_KERNEL __attribute__ ((sycl_explicit_simd))
22
- #define SYCL_ESIMD_FUNCTION __attribute__ ((sycl_explicit_simd))
23
-
24
- // Mark a function being nodebug.
25
- #define ESIMD_NODEBUG __attribute__ ((nodebug))
26
- // Mark a "ESIMD global": accessible from all functions in current translation
27
- // unit, separate copy per subgroup (work-item), mapped to SPIR-V private
28
- // storage class.
29
- #define ESIMD_PRIVATE \
30
- __attribute__ ((opencl_private)) __attribute__((sycl_explicit_simd))
31
- // Bind a ESIMD global variable to a specific register.
32
- #define ESIMD_REGISTER (n ) __attribute__((register_num(n)))
33
-
34
- #define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE
35
-
36
25
#define __ESIMD_UNSUPPORTED_ON_HOST
37
-
38
26
#else // __SYCL_DEVICE_ONLY__
39
- #define SYCL_ESIMD_KERNEL
40
- #define SYCL_ESIMD_FUNCTION
41
-
42
- // TODO ESIMD define what this means on Windows host
43
- #define ESIMD_NODEBUG
44
- // On host device ESIMD global is a thread local static var. This assumes that
45
- // each work-item is mapped to a separate OS thread on host device.
46
- #define ESIMD_PRIVATE thread_local
47
- #define ESIMD_REGISTER (n )
48
-
49
- #define __ESIMD_API ESIMD_INLINE
50
-
51
27
#define __ESIMD_UNSUPPORTED_ON_HOST \
52
28
throw sycl::exception (sycl::errc::feature_not_supported, \
53
29
" This ESIMD feature is not supported on HOST" )
54
-
55
30
#endif // __SYCL_DEVICE_ONLY__
56
31
57
- // Mark a function being noinline
58
- #define ESIMD_NOINLINE __attribute__ ((noinline))
59
- // Force a function to be inlined. 'inline' is used to preserve ODR for
60
- // functions defined in a header.
61
- #define ESIMD_INLINE inline __attribute__ ((always_inline))
62
-
63
- // Macros for internal use
64
- #define __ESIMD_NS sycl::ext::intel::esimd
65
- #define __ESIMD_DNS sycl::ext::intel::esimd::detail
66
- #define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail
67
-
68
- #define __ESIMD_QUOTE1 (m ) #m
69
- #define __ESIMD_QUOTE (m ) __ESIMD_QUOTE1(m)
70
- #define __ESIMD_NS_QUOTED __ESIMD_QUOTE (__ESIMD_NS)
71
- #define __ESIMD_DEPRECATED (new_api ) \
72
- __SYCL_DEPRECATED (" use " __ESIMD_NS_QUOTED " ::" __ESIMD_QUOTE(new_api))
73
-
74
32
// / @endcond ESIMD_DETAIL
75
33
76
34
namespace sycl {
@@ -106,6 +64,19 @@ enum class rgba_channel : uint8_t { R, G, B, A };
106
64
using SurfaceIndex = unsigned int ;
107
65
108
66
namespace detail {
67
+
68
+ // / Check if a given 32 bit positive integer is a power of 2 at compile time.
69
+ ESIMD_INLINE constexpr bool isPowerOf2 (unsigned int n) {
70
+ return (n & (n - 1 )) == 0 ;
71
+ }
72
+
73
+ // / Check at compile time if given 32 bit positive integer is both:
74
+ // / - a power of 2
75
+ // / - less or equal to given limit
76
+ ESIMD_INLINE constexpr bool isPowerOf2 (unsigned int n, unsigned int limit) {
77
+ return (n & (n - 1 )) == 0 && n <= limit;
78
+ }
79
+
109
80
template <rgba_channel Ch>
110
81
static inline constexpr uint8_t ch = 1 << static_cast <int >(Ch);
111
82
static inline constexpr uint8_t chR = ch<rgba_channel::R>;
@@ -151,6 +122,10 @@ constexpr int get_num_channels_enabled(rgba_channel_mask M) {
151
122
is_channel_enabled (M, rgba_channel::A);
152
123
}
153
124
125
+ #define __ESIMD_USM_DWORD_ATOMIC_TO_LSC \
126
+ " is supported only on ACM, PVC. USM-based atomic will be auto-converted " \
127
+ " to LSC version."
128
+
154
129
// / Represents an atomic operation. Operations always return the old value(s) of
155
130
// / the target memory location(s) as it was before the operation was applied.
156
131
// / Each operation is annotated with a pseudocode illustrating its semantics,
@@ -167,9 +142,11 @@ enum class atomic_op : uint8_t {
167
142
// / Decrement: <code>*addr = *addr - 1</code>.
168
143
dec = 0x3 ,
169
144
// / Minimum: <code>*addr = min(*addr, src0)</code>.
170
- min = 0x4 ,
145
+ umin = 0x4 ,
146
+ min __SYCL_DEPRECATED (" use umin" ) = umin,
171
147
// / Maximum: <code>*addr = max(*addr, src0)</code>.
172
- max = 0x5 ,
148
+ umax = 0x5,
149
+ max __SYCL_DEPRECATED(" use smax" ) = umax,
173
150
// / Exchange. <code>*addr == src0;</code>
174
151
xchg = 0x6,
175
152
// / Compare and exchange. <code>if (*addr == src0) *sddr = src1;</code>
@@ -181,27 +158,177 @@ enum class atomic_op : uint8_t {
181
158
// / Bit \c xor: <code>*addr = *addr | src0</code>.
182
159
bit_xor = 0xa,
183
160
// / Minimum (signed integer): <code>*addr = min(*addr, src0)</code>.
184
- minsint = 0xb ,
161
+ smin = 0xb,
162
+ minsint __SYCL_DEPRECATED(" use smin" ) = smin,
185
163
// / Maximum (signed integer): <code>*addr = max(*addr, src0)</code>.
186
- maxsint = 0xc ,
164
+ smax = 0xc,
165
+ maxsint __SYCL_DEPRECATED(" use smax" ) = 0xc,
187
166
// / Minimum (floating point): <code>*addr = min(*addr, src0)</code>.
188
- fmax = 0x10 ,
167
+ fmax __SYCL_DEPRECATED( " fmax " __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x10,
189
168
// / Maximum (floating point): <code>*addr = max(*addr, src0)</code>.
190
- fmin = 0x11 ,
169
+ fmin __SYCL_DEPRECATED( " fmin " __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x11,
191
170
// / Compare and exchange (floating point).
192
171
// / <code>if (*addr == src0) *addr = src1;</code>
193
- fcmpwr = 0x12 ,
194
- fadd = 0x13 ,
195
- fsub = 0x14 ,
172
+ fcmpxchg = 0x12,
173
+ fcmpwr __SYCL_DEPRECATED(" fcmpwr" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = fcmpxchg,
174
+ fadd __SYCL_DEPRECATED(" fadd" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x13,
175
+ fsub __SYCL_DEPRECATED(" fsub" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x14,
196
176
load = 0x15,
197
177
store = 0x16,
198
178
// / Decrement: <code>*addr = *addr - 1</code>. The only operation which
199
179
// / returns new value of the destination rather than old.
200
180
predec = 0xff,
201
181
};
202
182
183
+ #undef __ESIMD_USM_DWORD_TO_LSC_MSG
184
+
203
185
// / @} sycl_esimd_core
204
186
187
+ namespace detail {
188
+ template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args () {
189
+ if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc ||
190
+ Op == __ESIMD_NS::native::lsc::atomic_op::dec ||
191
+ Op == __ESIMD_NS::native::lsc::atomic_op::load) {
192
+ return 0 ;
193
+ } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store ||
194
+ Op == __ESIMD_NS::native::lsc::atomic_op::add ||
195
+ Op == __ESIMD_NS::native::lsc::atomic_op::sub ||
196
+ Op == __ESIMD_NS::native::lsc::atomic_op::smin ||
197
+ Op == __ESIMD_NS::native::lsc::atomic_op::smax ||
198
+ Op == __ESIMD_NS::native::lsc::atomic_op::umin ||
199
+ Op == __ESIMD_NS::native::lsc::atomic_op::umax ||
200
+ Op == __ESIMD_NS::native::lsc::atomic_op::fadd ||
201
+ Op == __ESIMD_NS::native::lsc::atomic_op::fsub ||
202
+ Op == __ESIMD_NS::native::lsc::atomic_op::fmin ||
203
+ Op == __ESIMD_NS::native::lsc::atomic_op::fmax ||
204
+ Op == __ESIMD_NS::native::lsc::atomic_op::bit_and ||
205
+ Op == __ESIMD_NS::native::lsc::atomic_op::bit_or ||
206
+ Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) {
207
+ return 1 ;
208
+ } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg ||
209
+ Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) {
210
+ return 2 ;
211
+ } else {
212
+ return -1 ; // error
213
+ }
214
+ }
215
+
216
+ template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent () {
217
+ switch (Op) {
218
+ case __ESIMD_NS::atomic_op::xchg:
219
+ case __ESIMD_NS::atomic_op::predec:
220
+ return false ;
221
+ default :
222
+ return true ;
223
+ }
224
+ }
225
+
226
+ template <__ESIMD_NS::atomic_op Op>
227
+ constexpr __ESIMD_NS::native::lsc::atomic_op to_lsc_atomic_op () {
228
+ switch (Op) {
229
+ case __ESIMD_NS::atomic_op::add:
230
+ return __ESIMD_NS::native::lsc::atomic_op::add;
231
+ case __ESIMD_NS::atomic_op::sub:
232
+ return __ESIMD_NS::native::lsc::atomic_op::sub;
233
+ case __ESIMD_NS::atomic_op::inc:
234
+ return __ESIMD_NS::native::lsc::atomic_op::inc;
235
+ case __ESIMD_NS::atomic_op::dec:
236
+ return __ESIMD_NS::native::lsc::atomic_op::dec;
237
+ case __ESIMD_NS::atomic_op::min:
238
+ return __ESIMD_NS::native::lsc::atomic_op::umin;
239
+ case __ESIMD_NS::atomic_op::max:
240
+ return __ESIMD_NS::native::lsc::atomic_op::umax;
241
+ case __ESIMD_NS::atomic_op::cmpxchg:
242
+ return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
243
+ case __ESIMD_NS::atomic_op::bit_and:
244
+ return __ESIMD_NS::native::lsc::atomic_op::bit_and;
245
+ case __ESIMD_NS::atomic_op::bit_or:
246
+ return __ESIMD_NS::native::lsc::atomic_op::bit_or;
247
+ case __ESIMD_NS::atomic_op::bit_xor:
248
+ return __ESIMD_NS::native::lsc::atomic_op::bit_xor;
249
+ case __ESIMD_NS::atomic_op::minsint:
250
+ return __ESIMD_NS::native::lsc::atomic_op::smin;
251
+ case __ESIMD_NS::atomic_op::maxsint:
252
+ return __ESIMD_NS::native::lsc::atomic_op::smax;
253
+ case __ESIMD_NS::atomic_op::fmax:
254
+ return __ESIMD_NS::native::lsc::atomic_op::fmax;
255
+ case __ESIMD_NS::atomic_op::fmin:
256
+ return __ESIMD_NS::native::lsc::atomic_op::fmin;
257
+ case __ESIMD_NS::atomic_op::fcmpwr:
258
+ return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
259
+ case __ESIMD_NS::atomic_op::fadd:
260
+ return __ESIMD_NS::native::lsc::atomic_op::fadd;
261
+ case __ESIMD_NS::atomic_op::fsub:
262
+ return __ESIMD_NS::native::lsc::atomic_op::fsub;
263
+ case __ESIMD_NS::atomic_op::load:
264
+ return __ESIMD_NS::native::lsc::atomic_op::load;
265
+ case __ESIMD_NS::atomic_op::store:
266
+ return __ESIMD_NS::native::lsc::atomic_op::store;
267
+ default :
268
+ static_assert (has_lsc_equivalent<Op>() && " Unsupported LSC atomic op" );
269
+ }
270
+ }
271
+
272
+ template <__ESIMD_NS::native::lsc::atomic_op Op>
273
+ constexpr __ESIMD_NS::atomic_op to_atomic_op () {
274
+ switch (Op) {
275
+ case __ESIMD_NS::native::lsc::atomic_op::add:
276
+ return __ESIMD_NS::atomic_op::add;
277
+ case __ESIMD_NS::native::lsc::atomic_op::sub:
278
+ return __ESIMD_NS::atomic_op::sub;
279
+ case __ESIMD_NS::native::lsc::atomic_op::inc:
280
+ return __ESIMD_NS::atomic_op::inc;
281
+ case __ESIMD_NS::native::lsc::atomic_op::dec:
282
+ return __ESIMD_NS::atomic_op::dec;
283
+ case __ESIMD_NS::native::lsc::atomic_op::umin:
284
+ return __ESIMD_NS::atomic_op::min;
285
+ case __ESIMD_NS::native::lsc::atomic_op::umax:
286
+ return __ESIMD_NS::atomic_op::max;
287
+ case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
288
+ return __ESIMD_NS::atomic_op::cmpxchg;
289
+ case __ESIMD_NS::native::lsc::atomic_op::bit_and:
290
+ return __ESIMD_NS::atomic_op::bit_and;
291
+ case __ESIMD_NS::native::lsc::atomic_op::bit_or:
292
+ return __ESIMD_NS::atomic_op::bit_or;
293
+ case __ESIMD_NS::native::lsc::atomic_op::bit_xor:
294
+ return __ESIMD_NS::atomic_op::bit_xor;
295
+ case __ESIMD_NS::native::lsc::atomic_op::smin:
296
+ return __ESIMD_NS::atomic_op::minsint;
297
+ case __ESIMD_NS::native::lsc::atomic_op::smax:
298
+ return __ESIMD_NS::atomic_op::maxsint;
299
+ case __ESIMD_NS::native::lsc::atomic_op::fmax:
300
+ return __ESIMD_NS::atomic_op::fmax;
301
+ case __ESIMD_NS::native::lsc::atomic_op::fmin:
302
+ return __ESIMD_NS::atomic_op::fmin;
303
+ case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
304
+ return __ESIMD_NS::atomic_op::fcmpwr;
305
+ case __ESIMD_NS::native::lsc::atomic_op::fadd:
306
+ return __ESIMD_NS::atomic_op::fadd;
307
+ case __ESIMD_NS::native::lsc::atomic_op::fsub:
308
+ return __ESIMD_NS::atomic_op::fsub;
309
+ case __ESIMD_NS::native::lsc::atomic_op::load:
310
+ return __ESIMD_NS::atomic_op::load;
311
+ case __ESIMD_NS::native::lsc::atomic_op::store:
312
+ return __ESIMD_NS::atomic_op::store;
313
+ }
314
+ }
315
+
316
+ template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args () {
317
+ if constexpr (has_lsc_equivalent<Op>()) {
318
+ return get_num_args<to_lsc_atomic_op<Op>()>();
319
+ } else {
320
+ switch (Op) {
321
+ case __ESIMD_NS::atomic_op::xchg:
322
+ case __ESIMD_NS::atomic_op::predec:
323
+ return 1 ;
324
+ default :
325
+ return -1 ; // error
326
+ }
327
+ }
328
+ }
329
+
330
+ } // namespace detail
331
+
205
332
} // namespace ext::intel::esimd
206
333
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
207
334
} // namespace sycl
0 commit comments