Skip to content

Commit 9da3104

Browse files
authored
[SYCL][ESIMD][NFC] Fix namespace of ESIMD implementation details. (#3487)
* [SYCL][ESIMD] Fix namespace of ESIMD implementation details. - Move ESIMD API declarations that are implementaion details into sycl::INTEL::gpu::detail and sycl::INTEL::gpu::emu::detail namespaces. - Make enum { BYTE = 1, WORD = 2, ..., GRF = 32 } local to a struct to avoid global namespace pollution. - Relocate AccessorPrivateProxy to memory intrinsics header(s) where it is only supposed to be used. - get rid of extra __esimd namespace Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 29d5893 commit 9da3104

14 files changed

+914
-805
lines changed

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_host_util.hpp

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,19 @@
1414

1515
#define SIMDCF_ELEMENT_SKIP(i)
1616

17-
namespace cl {
17+
__SYCL_INLINE_NAMESPACE(cl) {
1818
namespace sycl {
19+
1920
namespace detail {
2021
namespace half_impl {
2122
class half;
2223
} // namespace half_impl
2324
} // namespace detail
24-
} // namespace sycl
25-
} // namespace cl
2625

27-
using half = cl::sycl::detail::half_impl::half;
28-
29-
namespace EsimdEmulSys {
26+
namespace INTEL {
27+
namespace gpu {
28+
namespace emu {
29+
namespace detail {
3030

3131
constexpr int sat_is_on = 1;
3232

@@ -44,14 +44,10 @@ template <typename RT> struct satur {
4444
return (RT)val;
4545
}
4646

47-
#ifdef max
48-
#undef max
49-
#endif
50-
#ifdef min
51-
#undef min
52-
#endif
53-
const RT t_max = std::numeric_limits<RT>::max();
54-
const RT t_min = std::numeric_limits<RT>::min();
47+
// min/max can be macros on Windows, so wrap them into parens to avoid their
48+
// expansion
49+
const RT t_max = (std::numeric_limits<RT>::max)();
50+
const RT t_min = (std::numeric_limits<RT>::min)();
5551

5652
if (val > t_max) {
5753
return t_max;
@@ -112,8 +108,6 @@ template <> struct SetSatur<double, true> {
112108
static unsigned int set() { return sat_is_on; }
113109
};
114110

115-
} // namespace EsimdEmulSys
116-
117111
// used for intermediate type in dp4a emulation
118112
template <typename T1, typename T2> struct restype_ex {
119113
private:
@@ -470,10 +464,11 @@ template <typename T> struct dwordtype;
470464
template <> struct dwordtype<int> { static const bool value = true; };
471465
template <> struct dwordtype<unsigned int> { static const bool value = true; };
472466

473-
template <unsigned int N1, unsigned int N2> struct ressize {
474-
static const unsigned int size = (N1 > N2) ? N1 : N2;
475-
static const bool conformable =
476-
check_true < N1 % size == 0 && N2 % size == 0 > ::value;
477-
};
467+
} // namespace detail
468+
} // namespace emu
469+
} // namespace gpu
470+
} // namespace INTEL
471+
} // namespace sycl
472+
} // __SYCL_INLINE_NAMESPACE(cl)
478473

479-
#endif
474+
#endif // #ifndef __SYCL_DEVICE_ONLY__

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp

Lines changed: 44 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@
1414
#include <CL/sycl/INTEL/esimd/detail/esimd_types.hpp>
1515
#include <CL/sycl/INTEL/esimd/detail/esimd_util.hpp>
1616
#include <CL/sycl/INTEL/esimd/esimd_enum.hpp>
17-
#include <CL/sycl/detail/accessor_impl.hpp>
1817

1918
#include <assert.h>
2019
#include <cstdint>
2120

21+
#define __SIGD sycl::INTEL::gpu::detail
22+
2223
// \brief __esimd_rdregion: region access intrinsic.
2324
//
2425
// @param T the element data type, one of i8, i16, i32, i64, half, float,
@@ -63,13 +64,13 @@
6364
//
6465
template <typename T, int N, int M, int VStride, int Width, int Stride,
6566
int ParentWidth = 0>
66-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
67-
__esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset);
67+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
68+
__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset);
6869

6970
template <typename T, int N, int M, int ParentWidth = 0>
70-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
71-
__esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
72-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset);
71+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
72+
__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
73+
__SIGD::vector_type_t<uint16_t, M> Offset);
7374

7475
// __esimd_wrregion returns the updated vector with the region updated.
7576
//
@@ -120,46 +121,28 @@ __esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
120121
//
121122
template <typename T, int N, int M, int VStride, int Width, int Stride,
122123
int ParentWidth = 0>
123-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
124-
__esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
125-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal, uint16_t Offset,
124+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
125+
__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
126+
__SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
126127
sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
127128

128129
template <typename T, int N, int M, int ParentWidth = 0>
129-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
130-
__esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
131-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal,
132-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset,
130+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
131+
__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
132+
__SIGD::vector_type_t<T, M> NewVal,
133+
__SIGD::vector_type_t<uint16_t, M> Offset,
133134
sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
134135

135136
__SYCL_INLINE_NAMESPACE(cl) {
136137
namespace sycl {
137138
namespace INTEL {
138139
namespace gpu {
139-
// TODO dependencies on the std SYCL concepts like images
140-
// should be refactored in a separate header
141-
class AccessorPrivateProxy {
142-
public:
143-
#ifdef __SYCL_DEVICE_ONLY__
144-
template <typename AccessorTy>
145-
static auto getNativeImageObj(const AccessorTy &Acc) {
146-
return Acc.getNativeImageObj();
147-
}
148-
#else
149-
template <typename AccessorTy>
150-
static auto getImageRange(const AccessorTy &Acc) {
151-
return Acc.getAccessRange();
152-
}
153-
static auto getElemSize(const sycl::detail::AccessorBaseHost &Acc) {
154-
return Acc.getElemSize();
155-
}
156-
#endif
157-
};
140+
namespace detail {
158141

159142
/// read from a basic region of a vector, return a vector
160143
template <typename BT, int BN, typename RTy>
161-
vector_type_t<typename RTy::element_type, RTy::length>
162-
ESIMD_INLINE readRegion(const vector_type_t<BT, BN> &Base, RTy Region) {
144+
__SIGD::vector_type_t<typename RTy::element_type, RTy::length> ESIMD_INLINE
145+
readRegion(const __SIGD::vector_type_t<BT, BN> &Base, RTy Region) {
163146
using ElemTy = typename RTy::element_type;
164147
auto Base1 = bitcast<ElemTy, BT, BN>(Base);
165148
constexpr int Bytes = BN * sizeof(BT);
@@ -180,8 +163,8 @@ vector_type_t<typename RTy::element_type, RTy::length>
180163

181164
/// read from a nested region of a vector, return a vector
182165
template <typename BT, int BN, typename T, typename U>
183-
ESIMD_INLINE vector_type_t<typename T::element_type, T::length>
184-
readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
166+
ESIMD_INLINE __SIGD::vector_type_t<typename T::element_type, T::length>
167+
readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
185168
// parent-region type
186169
using PaTy = typename shape_type<U>::type;
187170
constexpr int BN1 = PaTy::length;
@@ -222,6 +205,7 @@ readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
222205
}
223206
}
224207

208+
} // namespace detail
225209
} // namespace gpu
226210
} // namespace INTEL
227211
} // namespace sycl
@@ -233,37 +217,37 @@ readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
233217
// optimization on simd object
234218
//
235219
template <typename T, int N>
236-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
237-
__esimd_vload(const sycl::INTEL::gpu::vector_type_t<T, N> *ptr);
220+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
221+
__esimd_vload(const __SIGD::vector_type_t<T, N> *ptr);
238222

239223
// vstore
240224
//
241225
// map to the backend vstore intrinsic, used by compiler to control
242226
// optimization on simd object
243227
template <typename T, int N>
244-
SYCL_EXTERNAL void __esimd_vstore(sycl::INTEL::gpu::vector_type_t<T, N> *ptr,
245-
sycl::INTEL::gpu::vector_type_t<T, N> vals);
228+
SYCL_EXTERNAL void __esimd_vstore(__SIGD::vector_type_t<T, N> *ptr,
229+
__SIGD::vector_type_t<T, N> vals);
246230

247231
template <typename T, int N>
248-
SYCL_EXTERNAL uint16_t __esimd_any(sycl::INTEL::gpu::vector_type_t<T, N> src);
232+
SYCL_EXTERNAL uint16_t __esimd_any(__SIGD::vector_type_t<T, N> src);
249233

250234
template <typename T, int N>
251-
SYCL_EXTERNAL uint16_t __esimd_all(sycl::INTEL::gpu::vector_type_t<T, N> src);
235+
SYCL_EXTERNAL uint16_t __esimd_all(__SIGD::vector_type_t<T, N> src);
252236

253237
#ifndef __SYCL_DEVICE_ONLY__
254238

255239
// Implementations of ESIMD intrinsics for the SYCL host device
256240
template <typename T, int N, int M, int VStride, int Width, int Stride,
257241
int ParentWidth>
258-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
259-
__esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset) {
242+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
243+
__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset) {
260244
uint16_t EltOffset = Offset / sizeof(T);
261245
assert(Offset % sizeof(T) == 0);
262246

263247
int NumRows = M / Width;
264248
assert(M % Width == 0);
265249

266-
sycl::INTEL::gpu::vector_type_t<T, M> Result;
250+
__SIGD::vector_type_t<T, M> Result;
267251
int Index = 0;
268252
for (int i = 0; i < NumRows; ++i) {
269253
for (int j = 0; j < Width; ++j) {
@@ -274,10 +258,10 @@ __esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset) {
274258
}
275259

276260
template <typename T, int N, int M, int ParentWidth>
277-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
278-
__esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
279-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset) {
280-
sycl::INTEL::gpu::vector_type_t<T, M> Result;
261+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
262+
__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
263+
__SIGD::vector_type_t<uint16_t, M> Offset) {
264+
__SIGD::vector_type_t<T, M> Result;
281265
for (int i = 0; i < M; ++i) {
282266
uint16_t EltOffset = Offset[i] / sizeof(T);
283267
assert(Offset[i] % sizeof(T) == 0);
@@ -289,17 +273,17 @@ __esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
289273

290274
template <typename T, int N, int M, int VStride, int Width, int Stride,
291275
int ParentWidth>
292-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
293-
__esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
294-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal, uint16_t Offset,
276+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
277+
__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
278+
__SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
295279
sycl::INTEL::gpu::mask_type_t<M> Mask) {
296280
uint16_t EltOffset = Offset / sizeof(T);
297281
assert(Offset % sizeof(T) == 0);
298282

299283
int NumRows = M / Width;
300284
assert(M % Width == 0);
301285

302-
sycl::INTEL::gpu::vector_type_t<T, N> Result = OldVal;
286+
__SIGD::vector_type_t<T, N> Result = OldVal;
303287
int Index = 0;
304288
for (int i = 0; i < NumRows; ++i) {
305289
for (int j = 0; j < Width; ++j) {
@@ -312,12 +296,12 @@ __esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
312296
}
313297

314298
template <typename T, int N, int M, int ParentWidth>
315-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
316-
__esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
317-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal,
318-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset,
299+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
300+
__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
301+
__SIGD::vector_type_t<T, M> NewVal,
302+
__SIGD::vector_type_t<uint16_t, M> Offset,
319303
sycl::INTEL::gpu::mask_type_t<M> Mask) {
320-
sycl::INTEL::gpu::vector_type_t<T, N> Result = OldVal;
304+
__SIGD::vector_type_t<T, N> Result = OldVal;
321305
for (int i = 0; i < M; ++i) {
322306
if (Mask[i]) {
323307
uint16_t EltOffset = Offset[i] / sizeof(T);
@@ -330,3 +314,5 @@ __esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
330314
}
331315

332316
#endif // __SYCL_DEVICE_ONLY__
317+
318+
#undef __SIGD

0 commit comments

Comments
 (0)