intel · pvchupin · May 6, 2021 · May 5, 2021 · May 5, 2021 · May 5, 2021
@@ -1243,7 +1243,9 @@ SmallPtrSet<Type *, 4> collectGenXVolatileTypes(Module &M) {
     if (!PTy)
       continue;
     auto GTy = dyn_cast<StructType>(PTy->getPointerElementType());
-    if (!GTy || !GTy->getName().endswith("cl::sycl::INTEL::gpu::simd"))
+    // TODO FIXME relying on type name in LLVM IR is fragile, needs rework
+    if (!GTy || !GTy->getName().endswith(
+                    "cl::sycl::ext::intel::experimental::esimd::simd"))
       continue;
     assert(GTy->getNumContainedTypes() == 1);
     auto VTy = GTy->getContainedType(0);
@@ -1326,7 +1328,8 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
 
       // process ESIMD builtins that go through special handling instead of
       // the translation procedure
-      if (Name.startswith("N2cl4sycl5INTEL3gpu8slm_init")) {
+      // TODO FIXME slm_init should be made top-level __esimd_slm_init
+      if (Name.startswith("N2cl4sycl3ext5intel12experimental5esimd8slm_init")) {
         // tag the kernel with meta-data SLMSize, and remove this builtin
         translateSLMInit(*CI);
         ESIMDToErases.push_back(CI);

@@ -167,7 +167,7 @@ define dso_local spir_func void  @FUNC_29() {
 
 define dso_local spir_kernel void  @FUNC_30() {
 ; CHECK: define dso_local spir_kernel void  @FUNC_30()
-  call spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32 1023)
+  call spir_func void @_ZN2cl4sycl3ext5intel12experimental5esimd8slm_initEj(i32 1023)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -358,7 +358,7 @@ declare dso_local spir_func <32 x i32> @_Z24__esimd_media_block_loadIiLi4ELi8E14
 declare dso_local spir_func void @_Z25__esimd_media_block_storeIiLi4ELi8E14ocl_image2d_woEvjT2_jjjjN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeE(i32 %0, %opencl.image2d_wo_t addrspace(1)* %1, i32 %2, i32 %3, i32 %4, i32 %5, <32 x i32> %6)
 declare dso_local spir_func <32 x i32> @_Z13__esimd_vloadIiLi32EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<32 x i32> addrspace(4)* %0)
 declare dso_local spir_func void @_Z14__esimd_vstoreIfLi16EEvPN2cm3gen13__vector_typeIT_XT0_EE4typeES5_(<16 x float> addrspace(4)* %0, <16 x float> %1)
-declare dso_local spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32)
+declare dso_local spir_func void @_ZN2cl4sycl3ext5intel12experimental5esimd8slm_initEj(i32)
 declare dso_local spir_func <16 x i32> @_Z14__esimd_uudp4aIjjjjLi16EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
 declare dso_local spir_func <16 x i32> @_Z14__esimd_usdp4aIjiiiLi16EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
 declare dso_local spir_func <16 x i32> @_Z14__esimd_sudp4aIijjjLi16EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)

@@ -16,15 +16,20 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-
 namespace detail {
 namespace half_impl {
 class half;
 } // namespace half_impl
 } // namespace detail
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
 
-namespace INTEL {
-namespace gpu {
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace ext {
+namespace intel {
+namespace experimental {
+namespace esimd {
 namespace emu {
 namespace detail {
 
@@ -466,8 +471,10 @@ template <> struct dwordtype<unsigned int> { static const bool value = true; };
 
 } // namespace detail
 } // namespace emu
-} // namespace gpu
-} // namespace INTEL
+} // namespace esimd
+} // namespace experimental
+} // namespace intel
+} // namespace ext
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
 

@@ -18,7 +18,8 @@
 #include <assert.h>
 #include <cstdint>
 
-#define __SIGD sycl::INTEL::gpu::detail
+#define __SEIEED sycl::ext::intel::experimental::esimd::detail
+#define __SEIEE sycl::ext::intel::experimental::esimd
 
 // \brief __esimd_rdregion: region access intrinsic.
 //
@@ -64,13 +65,13 @@
 //
 template <typename T, int N, int M, int VStride, int Width, int Stride,
           int ParentWidth = 0>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
-__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset);
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
+__esimd_rdregion(__SEIEED::vector_type_t<T, N> Input, uint16_t Offset);
 
 template <typename T, int N, int M, int ParentWidth = 0>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
-__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
-                   __SIGD::vector_type_t<uint16_t, M> Offset);
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
+__esimd_rdindirect(__SEIEED::vector_type_t<T, N> Input,
+                   __SEIEED::vector_type_t<uint16_t, M> Offset);
 
 // __esimd_wrregion returns the updated vector with the region updated.
 //
@@ -121,28 +122,30 @@ __esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
 //
 template <typename T, int N, int M, int VStride, int Width, int Stride,
           int ParentWidth = 0>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
-__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
-                 __SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
-                 sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
+__esimd_wrregion(__SEIEED::vector_type_t<T, N> OldVal,
+                 __SEIEED::vector_type_t<T, M> NewVal, uint16_t Offset,
+                 __SEIEE::mask_type_t<M> Mask = 1);
 
 template <typename T, int N, int M, int ParentWidth = 0>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
-__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
-                   __SIGD::vector_type_t<T, M> NewVal,
-                   __SIGD::vector_type_t<uint16_t, M> Offset,
-                   sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
+__esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,
+                   __SEIEED::vector_type_t<T, M> NewVal,
+                   __SEIEED::vector_type_t<uint16_t, M> Offset,
+                   __SEIEE::mask_type_t<M> Mask = 1);
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace INTEL {
-namespace gpu {
+namespace ext {
+namespace intel {
+namespace experimental {
+namespace esimd {
 namespace detail {
 
 /// read from a basic region of a vector, return a vector
 template <typename BT, int BN, typename RTy>
-__SIGD::vector_type_t<typename RTy::element_type, RTy::length> ESIMD_INLINE
-readRegion(const __SIGD::vector_type_t<BT, BN> &Base, RTy Region) {
+__SEIEED::vector_type_t<typename RTy::element_type, RTy::length> ESIMD_INLINE
+readRegion(const __SEIEED::vector_type_t<BT, BN> &Base, RTy Region) {
   using ElemTy = typename RTy::element_type;
   auto Base1 = bitcast<ElemTy, BT, BN>(Base);
   constexpr int Bytes = BN * sizeof(BT);
@@ -163,8 +166,9 @@ readRegion(const __SIGD::vector_type_t<BT, BN> &Base, RTy Region) {
 
 /// read from a nested region of a vector, return a vector
 template <typename BT, int BN, typename T, typename U>
-ESIMD_INLINE __SIGD::vector_type_t<typename T::element_type, T::length>
-readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
+ESIMD_INLINE __SEIEED::vector_type_t<typename T::element_type, T::length>
+readRegion(const __SEIEED::vector_type_t<BT, BN> &Base,
+           std::pair<T, U> Region) {
   // parent-region type
   using PaTy = typename shape_type<U>::type;
   constexpr int BN1 = PaTy::length;
@@ -206,8 +210,11 @@ readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
 }
 
 } // namespace detail
-} // namespace gpu
-} // namespace INTEL
+
+} // namespace esimd
+} // namespace experimental
+} // namespace intel
+} // namespace ext
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
 
@@ -217,40 +224,40 @@ readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
 // optimization on simd object
 //
 template <typename T, int N>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
-__esimd_vload(const __SIGD::vector_type_t<T, N> *ptr);
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
+__esimd_vload(const __SEIEED::vector_type_t<T, N> *ptr);
 
 // vstore
 //
 // map to the backend vstore intrinsic, used by compiler to control
 // optimization on simd object
 template <typename T, int N>
 SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void
-__esimd_vstore(__SIGD::vector_type_t<T, N> *ptr,
-               __SIGD::vector_type_t<T, N> vals);
+__esimd_vstore(__SEIEED::vector_type_t<T, N> *ptr,
+               __SEIEED::vector_type_t<T, N> vals);
 
 template <typename T, int N>
 SYCL_EXTERNAL SYCL_ESIMD_FUNCTION uint16_t
-__esimd_any(__SIGD::vector_type_t<T, N> src);
+__esimd_any(__SEIEED::vector_type_t<T, N> src);
 
 template <typename T, int N>
 SYCL_EXTERNAL SYCL_ESIMD_FUNCTION uint16_t
-__esimd_all(__SIGD::vector_type_t<T, N> src);
+__esimd_all(__SEIEED::vector_type_t<T, N> src);
 
 #ifndef __SYCL_DEVICE_ONLY__
 
 // Implementations of ESIMD intrinsics for the SYCL host device
 template <typename T, int N, int M, int VStride, int Width, int Stride,
           int ParentWidth>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
-__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset) {
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
+__esimd_rdregion(__SEIEED::vector_type_t<T, N> Input, uint16_t Offset) {
   uint16_t EltOffset = Offset / sizeof(T);
   assert(Offset % sizeof(T) == 0);
 
   int NumRows = M / Width;
   assert(M % Width == 0);
 
-  __SIGD::vector_type_t<T, M> Result;
+  __SEIEED::vector_type_t<T, M> Result;
   int Index = 0;
   for (int i = 0; i < NumRows; ++i) {
     for (int j = 0; j < Width; ++j) {
@@ -261,10 +268,10 @@ __esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset) {
 }
 
 template <typename T, int N, int M, int ParentWidth>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, M>
-__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
-                   __SIGD::vector_type_t<uint16_t, M> Offset) {
-  __SIGD::vector_type_t<T, M> Result;
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, M>
+__esimd_rdindirect(__SEIEED::vector_type_t<T, N> Input,
+                   __SEIEED::vector_type_t<uint16_t, M> Offset) {
+  __SEIEED::vector_type_t<T, M> Result;
   for (int i = 0; i < M; ++i) {
     uint16_t EltOffset = Offset[i] / sizeof(T);
     assert(Offset[i] % sizeof(T) == 0);
@@ -276,17 +283,17 @@ __esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
 
 template <typename T, int N, int M, int VStride, int Width, int Stride,
           int ParentWidth>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
-__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
-                 __SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
-                 sycl::INTEL::gpu::mask_type_t<M> Mask) {
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
+__esimd_wrregion(__SEIEED::vector_type_t<T, N> OldVal,
+                 __SEIEED::vector_type_t<T, M> NewVal, uint16_t Offset,
+                 __SEIEE::mask_type_t<M> Mask) {
   uint16_t EltOffset = Offset / sizeof(T);
   assert(Offset % sizeof(T) == 0);
 
   int NumRows = M / Width;
   assert(M % Width == 0);
 
-  __SIGD::vector_type_t<T, N> Result = OldVal;
+  __SEIEED::vector_type_t<T, N> Result = OldVal;
   int Index = 0;
   for (int i = 0; i < NumRows; ++i) {
     for (int j = 0; j < Width; ++j) {
@@ -299,12 +306,12 @@ __esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
 }
 
 template <typename T, int N, int M, int ParentWidth>
-SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SIGD::vector_type_t<T, N>
-__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
-                   __SIGD::vector_type_t<T, M> NewVal,
-                   __SIGD::vector_type_t<uint16_t, M> Offset,
-                   sycl::INTEL::gpu::mask_type_t<M> Mask) {
-  __SIGD::vector_type_t<T, N> Result = OldVal;
+SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
+__esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,
+                   __SEIEED::vector_type_t<T, M> NewVal,
+                   __SEIEED::vector_type_t<uint16_t, M> Offset,
+                   __SEIEE::mask_type_t<M> Mask) {
+  __SEIEED::vector_type_t<T, N> Result = OldVal;
   for (int i = 0; i < M; ++i) {
     if (Mask[i]) {
       uint16_t EltOffset = Offset[i] / sizeof(T);
@@ -318,4 +325,5 @@ __esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
 
 #endif // __SYCL_DEVICE_ONLY__
 
-#undef __SIGD
+#undef __SEIEE
+#undef __SEIEED