|
10 | 10 |
|
11 | 11 | #pragma once
|
12 | 12 |
|
| 13 | +#include <sycl/ext/intel/esimd/detail/defines_elementary.hpp> |
| 14 | +#include <sycl/ext/intel/esimd/native/common.hpp> |
| 15 | +#include <sycl/ext/intel/experimental/esimd/common.hpp> |
| 16 | + |
13 | 17 | #include <sycl/detail/defines.hpp>
|
14 | 18 |
|
15 | 19 | #include <cstdint> // for uint* types
|
|
18 | 22 | /// @cond ESIMD_DETAIL
|
19 | 23 |
|
20 | 24 | #ifdef __SYCL_DEVICE_ONLY__
|
21 |
| -#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd)) |
22 |
| -#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd)) |
23 |
| - |
24 |
| -// Mark a function being nodebug. |
25 |
| -#define ESIMD_NODEBUG __attribute__((nodebug)) |
26 |
| -// Mark a "ESIMD global": accessible from all functions in current translation |
27 |
| -// unit, separate copy per subgroup (work-item), mapped to SPIR-V private |
28 |
| -// storage class. |
29 |
| -#define ESIMD_PRIVATE \ |
30 |
| - __attribute__((opencl_private)) __attribute__((sycl_explicit_simd)) |
31 |
| -// Bind a ESIMD global variable to a specific register. |
32 |
| -#define ESIMD_REGISTER(n) __attribute__((register_num(n))) |
33 |
| - |
34 |
| -#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE |
35 |
| - |
36 | 25 | #define __ESIMD_UNSUPPORTED_ON_HOST
|
37 |
| - |
38 | 26 | #else // __SYCL_DEVICE_ONLY__
|
39 |
| -#define SYCL_ESIMD_KERNEL |
40 |
| -#define SYCL_ESIMD_FUNCTION |
41 |
| - |
42 |
| -// TODO ESIMD define what this means on Windows host |
43 |
| -#define ESIMD_NODEBUG |
44 |
| -// On host device ESIMD global is a thread local static var. This assumes that |
45 |
| -// each work-item is mapped to a separate OS thread on host device. |
46 |
| -#define ESIMD_PRIVATE thread_local |
47 |
| -#define ESIMD_REGISTER(n) |
48 |
| - |
49 |
| -#define __ESIMD_API ESIMD_INLINE |
50 |
| - |
51 | 27 | #define __ESIMD_UNSUPPORTED_ON_HOST \
|
52 | 28 | throw sycl::exception(sycl::errc::feature_not_supported, \
|
53 | 29 | "This ESIMD feature is not supported on HOST")
|
54 |
| - |
55 | 30 | #endif // __SYCL_DEVICE_ONLY__
|
56 | 31 |
|
57 |
| -// Mark a function being noinline |
58 |
| -#define ESIMD_NOINLINE __attribute__((noinline)) |
59 |
| -// Force a function to be inlined. 'inline' is used to preserve ODR for |
60 |
| -// functions defined in a header. |
61 |
| -#define ESIMD_INLINE inline __attribute__((always_inline)) |
62 |
| - |
63 |
| -// Macros for internal use |
64 |
| -#define __ESIMD_NS sycl::ext::intel::esimd |
65 |
| -#define __ESIMD_DNS sycl::ext::intel::esimd::detail |
66 |
| -#define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail |
67 |
| - |
68 |
| -#define __ESIMD_QUOTE1(m) #m |
69 |
| -#define __ESIMD_QUOTE(m) __ESIMD_QUOTE1(m) |
70 |
| -#define __ESIMD_NS_QUOTED __ESIMD_QUOTE(__ESIMD_NS) |
71 |
| -#define __ESIMD_DEPRECATED(new_api) \ |
72 |
| - __SYCL_DEPRECATED("use " __ESIMD_NS_QUOTED "::" __ESIMD_QUOTE(new_api)) |
73 |
| - |
74 | 32 | /// @endcond ESIMD_DETAIL
|
75 | 33 |
|
76 | 34 | namespace sycl {
|
77 | 35 | __SYCL_INLINE_VER_NAMESPACE(_V1) {
|
78 |
| -namespace ext::intel::esimd { |
| 36 | +namespace ext { |
| 37 | +namespace intel { |
| 38 | +namespace esimd { |
79 | 39 |
|
80 | 40 | /// @addtogroup sycl_esimd_core
|
81 | 41 | /// @{
|
@@ -106,6 +66,16 @@ enum class rgba_channel : uint8_t { R, G, B, A };
|
106 | 66 | using SurfaceIndex = unsigned int;
|
107 | 67 |
|
108 | 68 | namespace detail {
|
| 69 | + |
| 70 | +/// Check if a given 32 bit positive integer is a power of 2 at compile time. |
| 71 | +ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) { |
| 72 | + return (n & (n - 1)) == 0; |
| 73 | +} |
| 74 | + |
| 75 | +ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) { |
| 76 | + return (n & (n - 1)) == 0 && n <= limit; |
| 77 | +} |
| 78 | + |
109 | 79 | template <rgba_channel Ch>
|
110 | 80 | static inline constexpr uint8_t ch = 1 << static_cast<int>(Ch);
|
111 | 81 | static inline constexpr uint8_t chR = ch<rgba_channel::R>;
|
@@ -151,6 +121,10 @@ constexpr int get_num_channels_enabled(rgba_channel_mask M) {
|
151 | 121 | is_channel_enabled(M, rgba_channel::A);
|
152 | 122 | }
|
153 | 123 |
|
| 124 | +#define __ESIMD_USM_DWORD_ATOMIC_TO_LSC \ |
| 125 | + " is supported only on ACM, PVC. USM-based atomic will be auto-converted " \ |
| 126 | + "to LSC version." |
| 127 | + |
154 | 128 | /// Represents an atomic operation. Operations always return the old value(s) of
|
155 | 129 | /// the target memory location(s) as it was before the operation was applied.
|
156 | 130 | /// Each operation is annotated with a pseudocode illustrating its semantics,
|
@@ -185,23 +159,172 @@ enum class atomic_op : uint8_t {
|
185 | 159 | /// Maximum (signed integer): <code>*addr = max(*addr, src0)</code>.
|
186 | 160 | maxsint = 0xc,
|
187 | 161 | /// Minimum (floating point): <code>*addr = min(*addr, src0)</code>.
|
188 |
| - fmax = 0x10, |
| 162 | + fmax __SYCL_DEPRECATED("fmax" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x10, |
189 | 163 | /// Maximum (floating point): <code>*addr = max(*addr, src0)</code>.
|
190 |
| - fmin = 0x11, |
| 164 | + fmin __SYCL_DEPRECATED("fmin" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x11, |
191 | 165 | /// Compare and exchange (floating point).
|
192 | 166 | /// <code>if (*addr == src0) *addr = src1;</code>
|
193 |
| - fcmpwr = 0x12, |
194 |
| - fadd = 0x13, |
195 |
| - fsub = 0x14, |
| 167 | + fcmpwr __SYCL_DEPRECATED("fcmpwr" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x12, |
| 168 | + fadd __SYCL_DEPRECATED("fadd" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x13, |
| 169 | + fsub __SYCL_DEPRECATED("fsub" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x14, |
196 | 170 | load = 0x15,
|
197 | 171 | store = 0x16,
|
198 | 172 | /// Decrement: <code>*addr = *addr - 1</code>. The only operation which
|
199 | 173 | /// returns new value of the destination rather than old.
|
200 | 174 | predec = 0xff,
|
201 | 175 | };
|
202 | 176 |
|
| 177 | +#undef __ESIMD_USM_DWORD_TO_LSC_MSG |
| 178 | + |
203 | 179 | /// @} sycl_esimd_core
|
204 | 180 |
|
205 |
| -} // namespace ext::intel::esimd |
| 181 | +namespace detail { |
| 182 | +template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() { |
| 183 | + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc || |
| 184 | + Op == __ESIMD_NS::native::lsc::atomic_op::dec || |
| 185 | + Op == __ESIMD_NS::native::lsc::atomic_op::load) { |
| 186 | + return 0; |
| 187 | + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store || |
| 188 | + Op == __ESIMD_NS::native::lsc::atomic_op::add || |
| 189 | + Op == __ESIMD_NS::native::lsc::atomic_op::sub || |
| 190 | + Op == __ESIMD_NS::native::lsc::atomic_op::minsint || |
| 191 | + Op == __ESIMD_NS::native::lsc::atomic_op::maxsint || |
| 192 | + Op == __ESIMD_NS::native::lsc::atomic_op::min || |
| 193 | + Op == __ESIMD_NS::native::lsc::atomic_op::max || |
| 194 | + Op == __ESIMD_NS::native::lsc::atomic_op::fadd || |
| 195 | + Op == __ESIMD_NS::native::lsc::atomic_op::fsub || |
| 196 | + Op == __ESIMD_NS::native::lsc::atomic_op::fmin || |
| 197 | + Op == __ESIMD_NS::native::lsc::atomic_op::fmax || |
| 198 | + Op == __ESIMD_NS::native::lsc::atomic_op::bit_and || |
| 199 | + Op == __ESIMD_NS::native::lsc::atomic_op::bit_or || |
| 200 | + Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) { |
| 201 | + return 1; |
| 202 | + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg || |
| 203 | + Op == __ESIMD_NS::native::lsc::atomic_op::fcmpwr) { |
| 204 | + return 2; |
| 205 | + } else { |
| 206 | + return -1; // error |
| 207 | + } |
| 208 | +} |
| 209 | + |
| 210 | +template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() { |
| 211 | + switch (Op) { |
| 212 | + case __ESIMD_NS::atomic_op::xchg: |
| 213 | + case __ESIMD_NS::atomic_op::predec: |
| 214 | + return false; |
| 215 | + default: |
| 216 | + return true; |
| 217 | + } |
| 218 | +} |
| 219 | + |
| 220 | +template <__ESIMD_NS::atomic_op Op> |
| 221 | +constexpr __ESIMD_NS::native::lsc::atomic_op to_lsc_atomic_op() { |
| 222 | + switch (Op) { |
| 223 | + case __ESIMD_NS::atomic_op::add: |
| 224 | + return __ESIMD_NS::native::lsc::atomic_op::add; |
| 225 | + case __ESIMD_NS::atomic_op::sub: |
| 226 | + return __ESIMD_NS::native::lsc::atomic_op::sub; |
| 227 | + case __ESIMD_NS::atomic_op::inc: |
| 228 | + return __ESIMD_NS::native::lsc::atomic_op::inc; |
| 229 | + case __ESIMD_NS::atomic_op::dec: |
| 230 | + return __ESIMD_NS::native::lsc::atomic_op::dec; |
| 231 | + case __ESIMD_NS::atomic_op::min: |
| 232 | + return __ESIMD_NS::native::lsc::atomic_op::min; |
| 233 | + case __ESIMD_NS::atomic_op::max: |
| 234 | + return __ESIMD_NS::native::lsc::atomic_op::max; |
| 235 | + case __ESIMD_NS::atomic_op::cmpxchg: |
| 236 | + return __ESIMD_NS::native::lsc::atomic_op::cmpxchg; |
| 237 | + case __ESIMD_NS::atomic_op::bit_and: |
| 238 | + return __ESIMD_NS::native::lsc::atomic_op::bit_and; |
| 239 | + case __ESIMD_NS::atomic_op::bit_or: |
| 240 | + return __ESIMD_NS::native::lsc::atomic_op::bit_or; |
| 241 | + case __ESIMD_NS::atomic_op::bit_xor: |
| 242 | + return __ESIMD_NS::native::lsc::atomic_op::bit_xor; |
| 243 | + case __ESIMD_NS::atomic_op::minsint: |
| 244 | + return __ESIMD_NS::native::lsc::atomic_op::minsint; |
| 245 | + case __ESIMD_NS::atomic_op::maxsint: |
| 246 | + return __ESIMD_NS::native::lsc::atomic_op::maxsint; |
| 247 | + case __ESIMD_NS::atomic_op::fmax: |
| 248 | + return __ESIMD_NS::native::lsc::atomic_op::fmax; |
| 249 | + case __ESIMD_NS::atomic_op::fmin: |
| 250 | + return __ESIMD_NS::native::lsc::atomic_op::fmin; |
| 251 | + case __ESIMD_NS::atomic_op::fcmpwr: |
| 252 | + return __ESIMD_NS::native::lsc::atomic_op::fcmpwr; |
| 253 | + case __ESIMD_NS::atomic_op::fadd: |
| 254 | + return __ESIMD_NS::native::lsc::atomic_op::fadd; |
| 255 | + case __ESIMD_NS::atomic_op::fsub: |
| 256 | + return __ESIMD_NS::native::lsc::atomic_op::fsub; |
| 257 | + case __ESIMD_NS::atomic_op::load: |
| 258 | + return __ESIMD_NS::native::lsc::atomic_op::load; |
| 259 | + case __ESIMD_NS::atomic_op::store: |
| 260 | + return __ESIMD_NS::native::lsc::atomic_op::store; |
| 261 | + default: |
| 262 | + static_assert(has_lsc_equivalent<Op>() && "Unsupported LSC atomic op"); |
| 263 | + } |
| 264 | +} |
| 265 | + |
| 266 | +template <__ESIMD_NS::native::lsc::atomic_op Op> |
| 267 | +constexpr __ESIMD_NS::atomic_op to_atomic_op() { |
| 268 | + switch (Op) { |
| 269 | + case __ESIMD_NS::native::lsc::atomic_op::add: |
| 270 | + return __ESIMD_NS::atomic_op::add; |
| 271 | + case __ESIMD_NS::native::lsc::atomic_op::sub: |
| 272 | + return __ESIMD_NS::atomic_op::sub; |
| 273 | + case __ESIMD_NS::native::lsc::atomic_op::inc: |
| 274 | + return __ESIMD_NS::atomic_op::inc; |
| 275 | + case __ESIMD_NS::native::lsc::atomic_op::dec: |
| 276 | + return __ESIMD_NS::atomic_op::dec; |
| 277 | + case __ESIMD_NS::native::lsc::atomic_op::min: |
| 278 | + return __ESIMD_NS::atomic_op::min; |
| 279 | + case __ESIMD_NS::native::lsc::atomic_op::max: |
| 280 | + return __ESIMD_NS::atomic_op::max; |
| 281 | + case __ESIMD_NS::native::lsc::atomic_op::cmpxchg: |
| 282 | + return __ESIMD_NS::atomic_op::cmpxchg; |
| 283 | + case __ESIMD_NS::native::lsc::atomic_op::bit_and: |
| 284 | + return __ESIMD_NS::atomic_op::bit_and; |
| 285 | + case __ESIMD_NS::native::lsc::atomic_op::bit_or: |
| 286 | + return __ESIMD_NS::atomic_op::bit_or; |
| 287 | + case __ESIMD_NS::native::lsc::atomic_op::bit_xor: |
| 288 | + return __ESIMD_NS::atomic_op::bit_xor; |
| 289 | + case __ESIMD_NS::native::lsc::atomic_op::minsint: |
| 290 | + return __ESIMD_NS::atomic_op::minsint; |
| 291 | + case __ESIMD_NS::native::lsc::atomic_op::maxsint: |
| 292 | + return __ESIMD_NS::atomic_op::maxsint; |
| 293 | + case __ESIMD_NS::native::lsc::atomic_op::fmax: |
| 294 | + return __ESIMD_NS::atomic_op::fmax; |
| 295 | + case __ESIMD_NS::native::lsc::atomic_op::fmin: |
| 296 | + return __ESIMD_NS::atomic_op::fmin; |
| 297 | + case __ESIMD_NS::native::lsc::atomic_op::fcmpwr: |
| 298 | + return __ESIMD_NS::atomic_op::fcmpwr; |
| 299 | + case __ESIMD_NS::native::lsc::atomic_op::fadd: |
| 300 | + return __ESIMD_NS::atomic_op::fadd; |
| 301 | + case __ESIMD_NS::native::lsc::atomic_op::fsub: |
| 302 | + return __ESIMD_NS::atomic_op::fsub; |
| 303 | + case __ESIMD_NS::native::lsc::atomic_op::load: |
| 304 | + return __ESIMD_NS::atomic_op::load; |
| 305 | + case __ESIMD_NS::native::lsc::atomic_op::store: |
| 306 | + return __ESIMD_NS::atomic_op::store; |
| 307 | + } |
| 308 | +} |
| 309 | + |
| 310 | +template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() { |
| 311 | + if constexpr (has_lsc_equivalent<Op>()) { |
| 312 | + return get_num_args<to_lsc_atomic_op<Op>()>(); |
| 313 | + } else { |
| 314 | + switch (Op) { |
| 315 | + case __ESIMD_NS::atomic_op::xchg: |
| 316 | + case __ESIMD_NS::atomic_op::predec: |
| 317 | + return 1; |
| 318 | + default: |
| 319 | + return -1; // error |
| 320 | + } |
| 321 | + } |
| 322 | +} |
| 323 | + |
| 324 | +} // namespace detail |
| 325 | + |
| 326 | +} // namespace esimd |
| 327 | +} // namespace intel |
| 328 | +} // namespace ext |
206 | 329 | } // __SYCL_INLINE_VER_NAMESPACE(_V1)
|
207 | 330 | } // namespace sycl
|
0 commit comments