Skip to content

Commit 92da579

Browse files
authored
[SYCL][ESIMD] Move ESIMD APIs to sycl::ext::intel::experimental::esimd. (#3695)
* [SYCL][ESIMD] Move ESIMD APIs to sycl::ext::intel::experimental::esimd. This makes the ESIMD extension namespace conforming to the SYCL2020 spec. Necessary file renames will be done as a separate commit. Signed-off-by: kbobrovs <[email protected]>
1 parent 325d664 commit 92da579

35 files changed

+930
-869
lines changed

llvm/lib/SYCLLowerIR/LowerESIMD.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,9 @@ SmallPtrSet<Type *, 4> collectGenXVolatileTypes(Module &M) {
12431243
if (!PTy)
12441244
continue;
12451245
auto GTy = dyn_cast<StructType>(PTy->getPointerElementType());
1246-
if (!GTy || !GTy->getName().endswith("cl::sycl::INTEL::gpu::simd"))
1246+
// TODO FIXME relying on type name in LLVM IR is fragile, needs rework
1247+
if (!GTy || !GTy->getName().endswith(
1248+
"cl::sycl::ext::intel::experimental::esimd::simd"))
12471249
continue;
12481250
assert(GTy->getNumContainedTypes() == 1);
12491251
auto VTy = GTy->getContainedType(0);
@@ -1326,7 +1328,8 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
13261328

13271329
// process ESIMD builtins that go through special handling instead of
13281330
// the translation procedure
1329-
if (Name.startswith("N2cl4sycl5INTEL3gpu8slm_init")) {
1331+
// TODO FIXME slm_init should be made top-level __esimd_slm_init
1332+
if (Name.startswith("N2cl4sycl3ext5intel12experimental5esimd8slm_init")) {
13301333
// tag the kernel with meta-data SLMSize, and remove this builtin
13311334
translateSLMInit(*CI);
13321335
ESIMDToErases.push_back(CI);

llvm/test/SYCLLowerIR/esimd_lower_intrins.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ define dso_local spir_func void @FUNC_29() {
167167

168168
define dso_local spir_kernel void @FUNC_30() {
169169
; CHECK: define dso_local spir_kernel void @FUNC_30()
170-
call spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32 1023)
170+
call spir_func void @_ZN2cl4sycl3ext5intel12experimental5esimd8slm_initEj(i32 1023)
171171
ret void
172172
; CHECK-NEXT: ret void
173173
}
@@ -358,7 +358,7 @@ declare dso_local spir_func <32 x i32> @_Z24__esimd_media_block_loadIiLi4ELi8E14
358358
declare dso_local spir_func void @_Z25__esimd_media_block_storeIiLi4ELi8E14ocl_image2d_woEvjT2_jjjjN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeE(i32 %0, %opencl.image2d_wo_t addrspace(1)* %1, i32 %2, i32 %3, i32 %4, i32 %5, <32 x i32> %6)
359359
declare dso_local spir_func <32 x i32> @_Z13__esimd_vloadIiLi32EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<32 x i32> addrspace(4)* %0)
360360
declare dso_local spir_func void @_Z14__esimd_vstoreIfLi16EEvPN2cm3gen13__vector_typeIT_XT0_EE4typeES5_(<16 x float> addrspace(4)* %0, <16 x float> %1)
361-
declare dso_local spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32)
361+
declare dso_local spir_func void @_ZN2cl4sycl3ext5intel12experimental5esimd8slm_initEj(i32)
362362
declare dso_local spir_func <16 x i32> @_Z14__esimd_uudp4aIjjjjLi16EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
363363
declare dso_local spir_func <16 x i32> @_Z14__esimd_usdp4aIjiiiLi16EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
364364
declare dso_local spir_func <16 x i32> @_Z14__esimd_sudp4aIijjjLi16EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_host_util.hpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,20 @@
1616

1717
__SYCL_INLINE_NAMESPACE(cl) {
1818
namespace sycl {
19-
2019
namespace detail {
2120
namespace half_impl {
2221
class half;
2322
} // namespace half_impl
2423
} // namespace detail
24+
} // namespace sycl
25+
} // __SYCL_INLINE_NAMESPACE(cl)
2526

26-
namespace INTEL {
27-
namespace gpu {
27+
__SYCL_INLINE_NAMESPACE(cl) {
28+
namespace sycl {
29+
namespace ext {
30+
namespace intel {
31+
namespace experimental {
32+
namespace esimd {
2833
namespace emu {
2934
namespace detail {
3035

@@ -466,8 +471,10 @@ template <> struct dwordtype<unsigned int> { static const bool value = true; };
466471

467472
} // namespace detail
468473
} // namespace emu
469-
} // namespace gpu
470-
} // namespace INTEL
474+
} // namespace esimd
475+
} // namespace experimental
476+
} // namespace intel
477+
} // namespace ext
471478
} // namespace sycl
472479
} // __SYCL_INLINE_NAMESPACE(cl)
473480

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
#include <assert.h>
1919
#include <cstdint>
2020

21-
#define __SIGD sycl::INTEL::gpu::detail
21+
#define __SEIEED sycl::ext::intel::experimental::esimd::detail
22+
#define __SEIEE sycl::ext::intel::experimental::esimd
2223

2324
// \brief __esimd_rdregion: region access intrinsic.
2425
//
@@ -64,13 +65,13 @@
6465
//
6566
template <typename T, int N, int M, int VStride, int Width, int Stride,
6667
int ParentWidth = 0>
67-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
68-
__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset);
68+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
69+
__esimd_rdregion(__SEIEED::vector_type_t<T, N> Input, uint16_t Offset);
6970

7071
template <typename T, int N, int M, int ParentWidth = 0>
71-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
72-
__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
73-
__SIGD::vector_type_t<uint16_t, M> Offset);
72+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
73+
__esimd_rdindirect(__SEIEED::vector_type_t<T, N> Input,
74+
__SEIEED::vector_type_t<uint16_t, M> Offset);
7475

7576
// __esimd_wrregion returns the updated vector with the region updated.
7677
//
@@ -121,28 +122,30 @@ __esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
121122
//
122123
template <typename T, int N, int M, int VStride, int Width, int Stride,
123124
int ParentWidth = 0>
124-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
125-
__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
126-
__SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
127-
sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
125+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
126+
__esimd_wrregion(__SEIEED::vector_type_t<T, N> OldVal,
127+
__SEIEED::vector_type_t<T, M> NewVal, uint16_t Offset,
128+
__SEIEE::mask_type_t<M> Mask = 1);
128129

129130
template <typename T, int N, int M, int ParentWidth = 0>
130-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
131-
__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
132-
__SIGD::vector_type_t<T, M> NewVal,
133-
__SIGD::vector_type_t<uint16_t, M> Offset,
134-
sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
131+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
132+
__esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,
133+
__SEIEED::vector_type_t<T, M> NewVal,
134+
__SEIEED::vector_type_t<uint16_t, M> Offset,
135+
__SEIEE::mask_type_t<M> Mask = 1);
135136

136137
__SYCL_INLINE_NAMESPACE(cl) {
137138
namespace sycl {
138-
namespace INTEL {
139-
namespace gpu {
139+
namespace ext {
140+
namespace intel {
141+
namespace experimental {
142+
namespace esimd {
140143
namespace detail {
141144

142145
/// read from a basic region of a vector, return a vector
143146
template <typename BT, int BN, typename RTy>
144-
__SIGD::vector_type_t<typename RTy::element_type, RTy::length> ESIMD_INLINE
145-
readRegion(const __SIGD::vector_type_t<BT, BN> &Base, RTy Region) {
147+
__SEIEED::vector_type_t<typename RTy::element_type, RTy::length> ESIMD_INLINE
148+
readRegion(const __SEIEED::vector_type_t<BT, BN> &Base, RTy Region) {
146149
using ElemTy = typename RTy::element_type;
147150
auto Base1 = bitcast<ElemTy, BT, BN>(Base);
148151
constexpr int Bytes = BN * sizeof(BT);
@@ -163,8 +166,9 @@ readRegion(const __SIGD::vector_type_t<BT, BN> &Base, RTy Region) {
163166

164167
/// read from a nested region of a vector, return a vector
165168
template <typename BT, int BN, typename T, typename U>
166-
ESIMD_INLINE __SIGD::vector_type_t<typename T::element_type, T::length>
167-
readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
169+
ESIMD_INLINE __SEIEED::vector_type_t<typename T::element_type, T::length>
170+
readRegion(const __SEIEED::vector_type_t<BT, BN> &Base,
171+
std::pair<T, U> Region) {
168172
// parent-region type
169173
using PaTy = typename shape_type<U>::type;
170174
constexpr int BN1 = PaTy::length;
@@ -206,8 +210,11 @@ readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
206210
}
207211

208212
} // namespace detail
209-
} // namespace gpu
210-
} // namespace INTEL
213+
214+
} // namespace esimd
215+
} // namespace experimental
216+
} // namespace intel
217+
} // namespace ext
211218
} // namespace sycl
212219
} // __SYCL_INLINE_NAMESPACE(cl)
213220

@@ -217,40 +224,40 @@ readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
217224
// optimization on simd object
218225
//
219226
template <typename T, int N>
220-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
221-
__esimd_vload(const __SIGD::vector_type_t<T, N> *ptr);
227+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
228+
__esimd_vload(const __SEIEED::vector_type_t<T, N> *ptr);
222229

223230
// vstore
224231
//
225232
// map to the backend vstore intrinsic, used by compiler to control
226233
// optimization on simd object
227234
template <typename T, int N>
228235
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void
229-
__esimd_vstore(__SIGD::vector_type_t<T, N> *ptr,
230-
__SIGD::vector_type_t<T, N> vals);
236+
__esimd_vstore(__SEIEED::vector_type_t<T, N> *ptr,
237+
__SEIEED::vector_type_t<T, N> vals);
231238

232239
template <typename T, int N>
233240
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION uint16_t
234-
__esimd_any(__SIGD::vector_type_t<T, N> src);
241+
__esimd_any(__SEIEED::vector_type_t<T, N> src);
235242

236243
template <typename T, int N>
237244
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION uint16_t
238-
__esimd_all(__SIGD::vector_type_t<T, N> src);
245+
__esimd_all(__SEIEED::vector_type_t<T, N> src);
239246

240247
#ifndef __SYCL_DEVICE_ONLY__
241248

242249
// Implementations of ESIMD intrinsics for the SYCL host device
243250
template <typename T, int N, int M, int VStride, int Width, int Stride,
244251
int ParentWidth>
245-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
246-
__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset) {
252+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
253+
__esimd_rdregion(__SEIEED::vector_type_t<T, N> Input, uint16_t Offset) {
247254
uint16_t EltOffset = Offset / sizeof(T);
248255
assert(Offset % sizeof(T) == 0);
249256

250257
int NumRows = M / Width;
251258
assert(M % Width == 0);
252259

253-
__SIGD::vector_type_t<T, M> Result;
260+
__SEIEED::vector_type_t<T, M> Result;
254261
int Index = 0;
255262
for (int i = 0; i < NumRows; ++i) {
256263
for (int j = 0; j < Width; ++j) {
@@ -261,10 +268,10 @@ __esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset) {
261268
}
262269

263270
template <typename T, int N, int M, int ParentWidth>
264-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
265-
__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
266-
__SIGD::vector_type_t<uint16_t, M> Offset) {
267-
__SIGD::vector_type_t<T, M> Result;
271+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
272+
__esimd_rdindirect(__SEIEED::vector_type_t<T, N> Input,
273+
__SEIEED::vector_type_t<uint16_t, M> Offset) {
274+
__SEIEED::vector_type_t<T, M> Result;
268275
for (int i = 0; i < M; ++i) {
269276
uint16_t EltOffset = Offset[i] / sizeof(T);
270277
assert(Offset[i] % sizeof(T) == 0);
@@ -276,17 +283,17 @@ __esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
276283

277284
template <typename T, int N, int M, int VStride, int Width, int Stride,
278285
int ParentWidth>
279-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
280-
__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
281-
__SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
282-
sycl::INTEL::gpu::mask_type_t<M> Mask) {
286+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
287+
__esimd_wrregion(__SEIEED::vector_type_t<T, N> OldVal,
288+
__SEIEED::vector_type_t<T, M> NewVal, uint16_t Offset,
289+
__SEIEE::mask_type_t<M> Mask) {
283290
uint16_t EltOffset = Offset / sizeof(T);
284291
assert(Offset % sizeof(T) == 0);
285292

286293
int NumRows = M / Width;
287294
assert(M % Width == 0);
288295

289-
__SIGD::vector_type_t<T, N> Result = OldVal;
296+
__SEIEED::vector_type_t<T, N> Result = OldVal;
290297
int Index = 0;
291298
for (int i = 0; i < NumRows; ++i) {
292299
for (int j = 0; j < Width; ++j) {
@@ -299,12 +306,12 @@ __esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
299306
}
300307

301308
template <typename T, int N, int M, int ParentWidth>
302-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
303-
__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
304-
__SIGD::vector_type_t<T, M> NewVal,
305-
__SIGD::vector_type_t<uint16_t, M> Offset,
306-
sycl::INTEL::gpu::mask_type_t<M> Mask) {
307-
__SIGD::vector_type_t<T, N> Result = OldVal;
309+
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
310+
__esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,
311+
__SEIEED::vector_type_t<T, M> NewVal,
312+
__SEIEED::vector_type_t<uint16_t, M> Offset,
313+
__SEIEE::mask_type_t<M> Mask) {
314+
__SEIEED::vector_type_t<T, N> Result = OldVal;
308315
for (int i = 0; i < M; ++i) {
309316
if (Mask[i]) {
310317
uint16_t EltOffset = Offset[i] / sizeof(T);
@@ -318,4 +325,5 @@ __esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
318325

319326
#endif // __SYCL_DEVICE_ONLY__
320327

321-
#undef __SIGD
328+
#undef __SEIEE
329+
#undef __SEIEED

0 commit comments

Comments
 (0)