14
14
#include < CL/sycl/INTEL/esimd/detail/esimd_types.hpp>
15
15
#include < CL/sycl/INTEL/esimd/detail/esimd_util.hpp>
16
16
#include < CL/sycl/INTEL/esimd/esimd_enum.hpp>
17
- #include < CL/sycl/detail/accessor_impl.hpp>
18
17
19
18
#include < assert.h>
20
19
#include < cstdint>
21
20
21
+ #define __SIGD sycl::INTEL::gpu::detail
22
+
22
23
// \brief __esimd_rdregion: region access intrinsic.
23
24
//
24
25
// @param T the element data type, one of i8, i16, i32, i64, half, float,
63
64
//
64
65
template <typename T, int N, int M, int VStride, int Width, int Stride,
65
66
int ParentWidth = 0 >
66
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, M>
67
- __esimd_rdregion (sycl::INTEL::gpu ::vector_type_t <T, N> Input, uint16_t Offset);
67
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, M>
68
+ __esimd_rdregion (__SIGD ::vector_type_t <T, N> Input, uint16_t Offset);
68
69
69
70
template <typename T, int N, int M, int ParentWidth = 0 >
70
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, M>
71
- __esimd_rdindirect (sycl::INTEL::gpu ::vector_type_t <T, N> Input,
72
- sycl::INTEL::gpu ::vector_type_t <uint16_t , M> Offset);
71
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, M>
72
+ __esimd_rdindirect (__SIGD ::vector_type_t <T, N> Input,
73
+ __SIGD ::vector_type_t <uint16_t , M> Offset);
73
74
74
75
// __esimd_wrregion returns the updated vector with the region updated.
75
76
//
@@ -120,46 +121,28 @@ __esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
120
121
//
121
122
template <typename T, int N, int M, int VStride, int Width, int Stride,
122
123
int ParentWidth = 0 >
123
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, N>
124
- __esimd_wrregion (sycl::INTEL::gpu ::vector_type_t <T, N> OldVal,
125
- sycl::INTEL::gpu ::vector_type_t <T, M> NewVal, uint16_t Offset,
124
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, N>
125
+ __esimd_wrregion (__SIGD ::vector_type_t <T, N> OldVal,
126
+ __SIGD ::vector_type_t <T, M> NewVal, uint16_t Offset,
126
127
sycl::INTEL::gpu::mask_type_t <M> Mask = 1 );
127
128
128
129
template <typename T, int N, int M, int ParentWidth = 0 >
129
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, N>
130
- __esimd_wrindirect (sycl::INTEL::gpu ::vector_type_t <T, N> OldVal,
131
- sycl::INTEL::gpu ::vector_type_t <T, M> NewVal,
132
- sycl::INTEL::gpu ::vector_type_t <uint16_t , M> Offset,
130
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, N>
131
+ __esimd_wrindirect (__SIGD ::vector_type_t <T, N> OldVal,
132
+ __SIGD ::vector_type_t <T, M> NewVal,
133
+ __SIGD ::vector_type_t <uint16_t , M> Offset,
133
134
sycl::INTEL::gpu::mask_type_t <M> Mask = 1 );
134
135
135
136
__SYCL_INLINE_NAMESPACE (cl) {
136
137
namespace sycl {
137
138
namespace INTEL {
138
139
namespace gpu {
139
- // TODO dependencies on the std SYCL concepts like images
140
- // should be refactored in a separate header
141
- class AccessorPrivateProxy {
142
- public:
143
- #ifdef __SYCL_DEVICE_ONLY__
144
- template <typename AccessorTy>
145
- static auto getNativeImageObj (const AccessorTy &Acc) {
146
- return Acc.getNativeImageObj ();
147
- }
148
- #else
149
- template <typename AccessorTy>
150
- static auto getImageRange (const AccessorTy &Acc) {
151
- return Acc.getAccessRange ();
152
- }
153
- static auto getElemSize (const sycl::detail::AccessorBaseHost &Acc) {
154
- return Acc.getElemSize ();
155
- }
156
- #endif
157
- };
140
+ namespace detail {
158
141
159
142
// / read from a basic region of a vector, return a vector
160
143
template <typename BT, int BN, typename RTy>
161
- vector_type_t <typename RTy::element_type, RTy::length>
162
- ESIMD_INLINE readRegion (const vector_type_t <BT, BN> &Base, RTy Region) {
144
+ __SIGD:: vector_type_t <typename RTy::element_type, RTy::length> ESIMD_INLINE
145
+ readRegion (const __SIGD:: vector_type_t <BT, BN> &Base, RTy Region) {
163
146
using ElemTy = typename RTy::element_type;
164
147
auto Base1 = bitcast<ElemTy, BT, BN>(Base);
165
148
constexpr int Bytes = BN * sizeof (BT);
@@ -180,8 +163,8 @@ vector_type_t<typename RTy::element_type, RTy::length>
180
163
181
164
// / read from a nested region of a vector, return a vector
182
165
template <typename BT, int BN, typename T, typename U>
183
- ESIMD_INLINE vector_type_t <typename T::element_type, T::length>
184
- readRegion (const vector_type_t <BT, BN> &Base, std::pair<T, U> Region) {
166
+ ESIMD_INLINE __SIGD:: vector_type_t <typename T::element_type, T::length>
167
+ readRegion (const __SIGD:: vector_type_t <BT, BN> &Base, std::pair<T, U> Region) {
185
168
// parent-region type
186
169
using PaTy = typename shape_type<U>::type;
187
170
constexpr int BN1 = PaTy::length;
@@ -222,6 +205,7 @@ readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
222
205
}
223
206
}
224
207
208
+ } // namespace detail
225
209
} // namespace gpu
226
210
} // namespace INTEL
227
211
} // namespace sycl
@@ -233,37 +217,37 @@ readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
233
217
// optimization on simd object
234
218
//
235
219
template <typename T, int N>
236
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, N>
237
- __esimd_vload (const sycl::INTEL::gpu ::vector_type_t <T, N> *ptr);
220
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, N>
221
+ __esimd_vload (const __SIGD ::vector_type_t <T, N> *ptr);
238
222
239
223
// vstore
240
224
//
241
225
// map to the backend vstore intrinsic, used by compiler to control
242
226
// optimization on simd object
243
227
template <typename T, int N>
244
- SYCL_EXTERNAL void __esimd_vstore (sycl::INTEL::gpu ::vector_type_t <T, N> *ptr,
245
- sycl::INTEL::gpu ::vector_type_t <T, N> vals);
228
+ SYCL_EXTERNAL void __esimd_vstore (__SIGD ::vector_type_t <T, N> *ptr,
229
+ __SIGD ::vector_type_t <T, N> vals);
246
230
247
231
template <typename T, int N>
248
- SYCL_EXTERNAL uint16_t __esimd_any (sycl::INTEL::gpu ::vector_type_t <T, N> src);
232
+ SYCL_EXTERNAL uint16_t __esimd_any (__SIGD ::vector_type_t <T, N> src);
249
233
250
234
template <typename T, int N>
251
- SYCL_EXTERNAL uint16_t __esimd_all (sycl::INTEL::gpu ::vector_type_t <T, N> src);
235
+ SYCL_EXTERNAL uint16_t __esimd_all (__SIGD ::vector_type_t <T, N> src);
252
236
253
237
#ifndef __SYCL_DEVICE_ONLY__
254
238
255
239
// Implementations of ESIMD intrinsics for the SYCL host device
256
240
template <typename T, int N, int M, int VStride, int Width, int Stride,
257
241
int ParentWidth>
258
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, M>
259
- __esimd_rdregion (sycl::INTEL::gpu ::vector_type_t <T, N> Input, uint16_t Offset) {
242
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, M>
243
+ __esimd_rdregion (__SIGD ::vector_type_t <T, N> Input, uint16_t Offset) {
260
244
uint16_t EltOffset = Offset / sizeof (T);
261
245
assert (Offset % sizeof (T) == 0 );
262
246
263
247
int NumRows = M / Width;
264
248
assert (M % Width == 0 );
265
249
266
- sycl::INTEL::gpu ::vector_type_t <T, M> Result;
250
+ __SIGD ::vector_type_t <T, M> Result;
267
251
int Index = 0 ;
268
252
for (int i = 0 ; i < NumRows; ++i) {
269
253
for (int j = 0 ; j < Width; ++j) {
@@ -274,10 +258,10 @@ __esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset) {
274
258
}
275
259
276
260
template <typename T, int N, int M, int ParentWidth>
277
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, M>
278
- __esimd_rdindirect (sycl::INTEL::gpu ::vector_type_t <T, N> Input,
279
- sycl::INTEL::gpu ::vector_type_t <uint16_t , M> Offset) {
280
- sycl::INTEL::gpu ::vector_type_t <T, M> Result;
261
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, M>
262
+ __esimd_rdindirect (__SIGD ::vector_type_t <T, N> Input,
263
+ __SIGD ::vector_type_t <uint16_t , M> Offset) {
264
+ __SIGD ::vector_type_t <T, M> Result;
281
265
for (int i = 0 ; i < M; ++i) {
282
266
uint16_t EltOffset = Offset[i] / sizeof (T);
283
267
assert (Offset[i] % sizeof (T) == 0 );
@@ -289,17 +273,17 @@ __esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
289
273
290
274
template <typename T, int N, int M, int VStride, int Width, int Stride,
291
275
int ParentWidth>
292
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, N>
293
- __esimd_wrregion (sycl::INTEL::gpu ::vector_type_t <T, N> OldVal,
294
- sycl::INTEL::gpu ::vector_type_t <T, M> NewVal, uint16_t Offset,
276
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, N>
277
+ __esimd_wrregion (__SIGD ::vector_type_t <T, N> OldVal,
278
+ __SIGD ::vector_type_t <T, M> NewVal, uint16_t Offset,
295
279
sycl::INTEL::gpu::mask_type_t <M> Mask) {
296
280
uint16_t EltOffset = Offset / sizeof (T);
297
281
assert (Offset % sizeof (T) == 0 );
298
282
299
283
int NumRows = M / Width;
300
284
assert (M % Width == 0 );
301
285
302
- sycl::INTEL::gpu ::vector_type_t <T, N> Result = OldVal;
286
+ __SIGD ::vector_type_t <T, N> Result = OldVal;
303
287
int Index = 0 ;
304
288
for (int i = 0 ; i < NumRows; ++i) {
305
289
for (int j = 0 ; j < Width; ++j) {
@@ -312,12 +296,12 @@ __esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
312
296
}
313
297
314
298
template <typename T, int N, int M, int ParentWidth>
315
- SYCL_EXTERNAL sycl::INTEL::gpu ::vector_type_t <T, N>
316
- __esimd_wrindirect (sycl::INTEL::gpu ::vector_type_t <T, N> OldVal,
317
- sycl::INTEL::gpu ::vector_type_t <T, M> NewVal,
318
- sycl::INTEL::gpu ::vector_type_t <uint16_t , M> Offset,
299
+ SYCL_EXTERNAL __SIGD ::vector_type_t <T, N>
300
+ __esimd_wrindirect (__SIGD ::vector_type_t <T, N> OldVal,
301
+ __SIGD ::vector_type_t <T, M> NewVal,
302
+ __SIGD ::vector_type_t <uint16_t , M> Offset,
319
303
sycl::INTEL::gpu::mask_type_t <M> Mask) {
320
- sycl::INTEL::gpu ::vector_type_t <T, N> Result = OldVal;
304
+ __SIGD ::vector_type_t <T, N> Result = OldVal;
321
305
for (int i = 0 ; i < M; ++i) {
322
306
if (Mask[i]) {
323
307
uint16_t EltOffset = Offset[i] / sizeof (T);
@@ -330,3 +314,5 @@ __esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
330
314
}
331
315
332
316
#endif // __SYCL_DEVICE_ONLY__
317
+
318
+ #undef __SIGD
0 commit comments