intel
diff --git a/‎SYCL/Basic/alloc_pinned_host_memory.cpp
Lines changed: 2 additions & 1 deletion b/‎SYCL/Basic/alloc_pinned_host_memory.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎SYCL/Basic/event_profiling_info.cpp
Lines changed: 2 additions & 4 deletions b/‎SYCL/Basic/event_profiling_info.cpp
Lines changed: 2 additions & 4 deletions
diff --git a/‎SYCL/Basic/parallel_for_indexers.cpp
Lines changed: 0 additions & 4 deletions b/‎SYCL/Basic/parallel_for_indexers.cpp
Lines changed: 0 additions & 4 deletions
diff --git a/‎SYCL/ESIMD/BitonicSortK.cpp
Lines changed: 12 additions & 11 deletions b/‎SYCL/ESIMD/BitonicSortK.cpp
Lines changed: 12 additions & 11 deletions
diff --git a/‎SYCL/ESIMD/BitonicSortKv2.cpp
Lines changed: 8 additions & 8 deletions b/‎SYCL/ESIMD/BitonicSortKv2.cpp
Lines changed: 8 additions & 8 deletions
diff --git a/‎SYCL/ESIMD/PrefixSum.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/PrefixSum.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/Prefix_Local_sum1.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/Prefix_Local_sum1.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/Prefix_Local_sum2.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/Prefix_Local_sum2.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/Prefix_Local_sum3.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/Prefix_Local_sum3.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/Stencil.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/Stencil.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/accessor.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/accessor.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/accessor_gather_scatter.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/accessor_gather_scatter.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/accessor_load_store.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/accessor_load_store.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/dp4a.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/dp4a.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/ext_math.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/ext_math.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/fp_args_size/Inputs/fp_args_size_common.hpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/fp_args_size/Inputs/fp_args_size_common.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/fp_call_from_func.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/fp_call_from_func.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/fp_call_recursive.cpp
Lines changed: 7 additions & 7 deletions b/‎SYCL/ESIMD/fp_call_recursive.cpp
Lines changed: 7 additions & 7 deletions
diff --git a/‎SYCL/ESIMD/fp_in_phi.cpp
Lines changed: 12 additions & 12 deletions b/‎SYCL/ESIMD/fp_in_phi.cpp
Lines changed: 12 additions & 12 deletions
diff --git a/‎SYCL/ESIMD/fp_in_select.cpp
Lines changed: 7 additions & 7 deletions b/‎SYCL/ESIMD/fp_in_select.cpp
Lines changed: 7 additions & 7 deletions
diff --git a/‎SYCL/ESIMD/histogram.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/histogram.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/histogram_256_slm.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/histogram_256_slm.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/histogram_256_slm_spec.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/histogram_256_slm_spec.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/histogram_2d.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/histogram_2d.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/histogram_raw_send.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/histogram_raw_send.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/kmeans/kmeans.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/kmeans/kmeans.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/linear/linear.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/linear/linear.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/mandelbrot/mandelbrot.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/mandelbrot/mandelbrot.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/mandelbrot/mandelbrot_spec.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/mandelbrot/mandelbrot_spec.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/ESIMD/matrix_transpose.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/ESIMD/matrix_transpose.cpp
Lines changed: 1 addition & 1 deletion
@@ -37,6 +37,7 @@ int main() {
   }
 }
 
+// CHECK:---> piMemBufferCreate
 // CHECK:---> piMemBufferCreate
 // CHECK-NEXT: {{.*}} : {{.*}}
-// CHECK-NEXT: {{.*}} : 9
+// CHECK-NEXT: {{.*}} : 17
@@ -1,9 +1,5 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 //
-// Looks like there is a bug in the test. There are rare sporadic failures of
-// this test on different devices.
-// REQUIRES: TEMPORARY_DISABLED
-//
 // RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -67,6 +63,8 @@ int main() {
     event kernelEvent = kernelQueue.submit([&](sycl::handler &CGH) {
       CGH.single_task<class EmptyKernel>([=]() {});
     });
+    copyEvent.wait();
+    kernelEvent.wait();
 
     assert(verifyProfiling(copyEvent) && verifyProfiling(kernelEvent));
   }
 
@@ -6,10 +6,6 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t2.out
 // RUN: %ACC_RUN_PLACEHOLDER %t2.out
 
-// TODO: Unexpected result
-// TODO: _indexers.cpp:37: int main(): Assertion `id == -1' failed.
-// XFAIL: level_zero&&gpu
-
 #include <CL/sycl.hpp>
 
 #include <cassert>
 
@@ -18,7 +18,7 @@
 #include <iostream>
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 using namespace std;
 
 #define LOG2_ELEMENTS 16 // 24
@@ -598,11 +598,11 @@ int BitonicSort::Solve(uint32_t *pInputs, uint32_t *pOutputs, uint32_t size) {
     auto e = pQueue_->submit([&](handler &cgh) {
       auto acci = bufi.get_access<access::mode::read>(cgh);
       auto acco = bufo.get_access<access::mode::write>(cgh);
-      cgh.parallel_for<class Sort256>(SortGlobalRange * SortLocalRange,
-                                      [=](id<1> i) SYCL_ESIMD_KERNEL {
-                                        using namespace sycl::INTEL::gpu;
-                                        cmk_bitonic_sort_256(acci, acco, i);
-                                      });
+      cgh.parallel_for<class Sort256>(
+          SortGlobalRange * SortLocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
+            using namespace sycl::ext::intel::experimental::esimd;
+            cmk_bitonic_sort_256(acci, acco, i);
+          });
     });
     e.wait();
     total_time += esimd_test::report_time("kernel time", e, e);
@@ -638,11 +638,12 @@ int BitonicSort::Solve(uint32_t *pInputs, uint32_t *pOutputs, uint32_t size) {
         buffer<uint32_t, 1> buf(pOutputs, range<1>(size));
         mergeEvent[k] = pQueue_->submit([&](handler &cgh) {
           auto acc = buf.get_access<access::mode::read_write>(cgh);
-          cgh.parallel_for<class Merge>(MergeGlobalRange * MergeLocalRange,
-                                        [=](id<1> tid) SYCL_ESIMD_KERNEL {
-                                          using namespace sycl::INTEL::gpu;
-                                          cmk_bitonic_merge(acc, j, i, tid);
-                                        });
+          cgh.parallel_for<class Merge>(
+              MergeGlobalRange * MergeLocalRange,
+              [=](id<1> tid) SYCL_ESIMD_KERNEL {
+                using namespace sycl::ext::intel::experimental::esimd;
+                cmk_bitonic_merge(acc, j, i, tid);
+              });
         });
         k++;
       }
 
@@ -19,7 +19,7 @@
 #include <iostream>
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 using namespace std;
 
 #define LOG2_ELEMENTS 16 // 24
@@ -517,7 +517,7 @@ int BitonicSort::Solve(uint32_t *pInputs, uint32_t *pOutputs, uint32_t size) {
     auto e = pQueue_->submit([&](handler &cgh) {
       cgh.parallel_for<class Sort256>(
           SortGlobalRange * SortLocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
             cmk_bitonic_sort_256(pInputs, pOutputs, i);
           });
     });
@@ -553,12 +553,12 @@ int BitonicSort::Solve(uint32_t *pInputs, uint32_t *pOutputs, uint32_t size) {
       // locally.
       for (int j = i; j >= 8; j--) {
         mergeEvent[k] = pQueue_->submit([&](handler &cgh) {
-          cgh.parallel_for<class Merge>(MergeGlobalRange * MergeLocalRange,
-                                        [=](id<1> tid) SYCL_ESIMD_KERNEL {
-                                          using namespace sycl::INTEL::gpu;
-                                          cmk_bitonic_merge(pOutputs, j, i,
-                                                            tid);
-                                        });
+          cgh.parallel_for<class Merge>(
+              MergeGlobalRange * MergeLocalRange,
+              [=](id<1> tid) SYCL_ESIMD_KERNEL {
+                using namespace sycl::ext::intel::experimental::esimd;
+                cmk_bitonic_merge(pOutputs, j, i, tid);
+              });
         });
         // mergeEvent[k].wait();
         k++;
 
@@ -38,7 +38,7 @@
 #define REMAINING_ENTRIES 64
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 void compute_local_prefixsum(unsigned int prefixSum[], unsigned int size,
                              unsigned elem_stride, unsigned thread_stride) {
 
@@ -36,7 +36,7 @@
 #define MIN_NUM_THREADS 1
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 void compute_local_prefixsum(unsigned int input[], unsigned int prefixSum[],
                              unsigned int size) {
 
@@ -36,7 +36,7 @@
 #define MIN_NUM_THREADS 1
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 void compute_local_prefixsum(unsigned int input[], unsigned int prefixSum[],
                              unsigned int size) {
 
@@ -38,7 +38,7 @@
 #define REMAINING_ENTRIES 64
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 void compute_local_prefixsum(unsigned int prefixSum[], unsigned int size,
                              unsigned elem_stride, unsigned thread_stride,
 
@@ -107,7 +107,7 @@ int main(int argc, char *argv[]) {
     auto e = q.submit([&](handler &cgh) {
       cgh.parallel_for<class Stencil_kernel>(
           GlobalRange * LocalRange, [=](item<2> it) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
             uint h_pos = it.get_id(0);
             uint v_pos = it.get_id(1);
 
 
@@ -50,7 +50,7 @@ int main() {
 
       cgh.parallel_for<class Test>(
           range<1>(1), [=](sycl::id<1> i) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
             unsigned int offset = 0;
             for (int k = 0; k < VL / 16; k++) {
               simd<Ty, 16> var = block_load<Ty, 16>(acc0, offset);
 
@@ -32,7 +32,7 @@ template <typename T, unsigned VL, unsigned STRIDE> struct Kernel {
   Kernel(Acc<T> acc) : acc(acc) {}
 
   void operator()(id<1> i) const SYCL_ESIMD_KERNEL {
-    using namespace sycl::INTEL::gpu;
+    using namespace sycl::ext::intel::experimental::esimd;
     uint32_t ii = static_cast<uint32_t>(i.get(0));
     // every STRIDE threads (subgroups with sg_size=1) access contiguous block
     // of STRIDE*VL elements
 
@@ -30,7 +30,7 @@ template <typename T> struct Kernel {
   Kernel(Acc<T> acc) : acc(acc) {}
 
   void operator()(id<1> i) const SYCL_ESIMD_KERNEL {
-    using namespace sycl::INTEL::gpu;
+    using namespace sycl::ext::intel::experimental::esimd;
     uint32_t ii = static_cast<uint32_t>(i.get(0));
     T v = scalar_load<T>(acc, ii);
     v += ii;
 
@@ -57,7 +57,7 @@ int main(void) {
     auto e = q.submit([&](handler &cgh) {
       cgh.parallel_for<class Test>(
           Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
 
             simd<DTYPE, SIZE> src0(0);
             src0 = block_load<DTYPE, SIZE>(S0);
 
@@ -19,7 +19,7 @@
 #include <iostream>
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 // --- Data initialization functions
 
 
@@ -60,7 +60,7 @@ int main(void) {
     auto qq = q.submit([&](handler &cgh) {
       cgh.parallel_for<KernelID>(
           sycl::range<1>{1}, [=](id<1> i) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
 
             simd<a_data_t, SIZE> va(0);
             simd<b_data_t, SIZE> vb(0);
 
@@ -27,7 +27,7 @@ class KernelID;
 ESIMD_NOINLINE int add(int A, int B) { return A + B; }
 
 template <typename AccTy> ESIMD_NOINLINE void test(AccTy acc, int A, int B) {
-  using namespace sycl::INTEL::gpu;
+  using namespace sycl::ext::intel::experimental::esimd;
 
   auto foo = &add;
   auto res = foo(A, B);
 
@@ -51,15 +51,15 @@ int main(int argc, char **argv) {
     q.submit([&](handler &cgh) {
       auto acc = buf.get_access<access::mode::write>(cgh);
 
-      cgh.parallel_for<KernelID>(sycl::range<1>{1},
-                                 [=](id<1> i) SYCL_ESIMD_KERNEL {
-                                   using namespace sycl::INTEL::gpu;
+      cgh.parallel_for<KernelID>(
+          sycl::range<1>{1}, [=](id<1> i) SYCL_ESIMD_KERNEL {
+            using namespace sycl::ext::intel::experimental::esimd;
 
-                                   auto foo = &add;
-                                   auto res = foo(in1, in2, in3);
+            auto foo = &add;
+            auto res = foo(in1, in2, in3);
 
-                                   scalar_store(acc, 0, res);
-                                 });
+            scalar_store(acc, 0, res);
+          });
     });
   } catch (cl::sycl::exception const &e) {
     std::cout << "SYCL exception caught: " << e.what() << std::endl;
 
@@ -50,21 +50,21 @@ bool test(queue q, bool flag) {
       auto o_acc = o_buf.get_access<access::mode::write>(cgh);
       auto y_acc = y_buf.get_access<access::mode::write>(cgh);
 
-      cgh.parallel_for<KernelID>(sycl::range<1>{1},
-                                 [=](id<1> i) SYCL_ESIMD_KERNEL {
-                                   using namespace sycl::INTEL::gpu;
-                                   using f = int (*)(int);
+      cgh.parallel_for<KernelID>(
+          sycl::range<1>{1}, [=](id<1> i) SYCL_ESIMD_KERNEL {
+            using namespace sycl::ext::intel::experimental::esimd;
+            using f = int (*)(int);
 
-                                   f a[] = {f1, f2};
-                                   if (flag) {
-                                     a[0] = f3;
-                                     scalar_store(y_acc, 0, 2);
-                                   }
+            f a[] = {f1, f2};
+            if (flag) {
+              a[0] = f3;
+              scalar_store(y_acc, 0, 2);
+            }
 
-                                   auto res = a[0](in1) + a[1](in2);
+            auto res = a[0](in1) + a[1](in2);
 
-                                   scalar_store(o_acc, 0, res);
-                                 });
+            scalar_store(o_acc, 0, res);
+          });
     });
   } catch (cl::sycl::exception const &e) {
     std::cout << "SYCL exception caught: " << e.what() << std::endl;
 
@@ -45,15 +45,15 @@ bool test(queue q, bool flag) {
     q.submit([&](handler &cgh) {
       auto acc = buf.get_access<access::mode::write>(cgh);
 
-      cgh.parallel_for<KernelID>(sycl::range<1>{1},
-                                 [=](id<1> i) SYCL_ESIMD_KERNEL {
-                                   using namespace sycl::INTEL::gpu;
+      cgh.parallel_for<KernelID>(
+          sycl::range<1>{1}, [=](id<1> i) SYCL_ESIMD_KERNEL {
+            using namespace sycl::ext::intel::experimental::esimd;
 
-                                   auto foo = flag ? &add : &sub;
-                                   auto res = foo(in1, in2);
+            auto foo = flag ? &add : &sub;
+            auto res = foo(in1, in2);
 
-                                   scalar_store(acc, 0, res);
-                                 });
+            scalar_store(acc, 0, res);
+          });
     });
   } catch (cl::sycl::exception const &e) {
     std::cout << "SYCL exception caught: " << e.what() << std::endl;
 
@@ -153,7 +153,7 @@ int main(int argc, char *argv[]) {
 
         cgh.parallel_for<class Hist>(
             Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
-              using namespace sycl::INTEL::gpu;
+              using namespace sycl::ext::intel::experimental::esimd;
 
               // Get thread origin offsets
               uint tid = ndi.get_group(0);
 
@@ -22,7 +22,7 @@ static constexpr int BLOCK_WIDTH = 32;
 static constexpr int NUM_BLOCKS = 32;
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 // Histogram kernel: computes the distribution of pixel intensities
 ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
 
@@ -23,7 +23,7 @@ static constexpr int BLOCK_WIDTH = 32;
 static constexpr int NUM_BLOCKS = 32;
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 // Histogram kernel: computes the distribution of pixel intensities
 ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
 
@@ -148,7 +148,7 @@ int main(int argc, char *argv[]) {
 
       cgh.parallel_for<class Hist>(
           Range, [=](nd_item<2> ndi) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
 
             // Get thread origin offsets
             uint h_pos = ndi.get_group(0) * BLOCK_WIDTH;
 
@@ -62,7 +62,7 @@ int checkHistogram(unsigned int *refHistogram, unsigned int *hist) {
   return 1;
 }
 
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 template <EsimdAtomicOpType Op, typename T, int n>
 ESIMD_INLINE void atomic_write(T *bins, simd<unsigned, n> offset,
                                simd<T, n> src0, simd<ushort, n> pred) {
 
@@ -23,7 +23,7 @@
 #include <vector>
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 using namespace std;
 
 inline float dist(Point p, Centroid c) {
 
@@ -87,7 +87,7 @@ int main(int argc, char *argv[]) {
 
       cgh.parallel_for<class Test>(
           GlobalRange * LocalRange, [=](item<2> it) SYCL_ESIMD_KERNEL {
-            using namespace sycl::INTEL::gpu;
+            using namespace sycl::ext::intel::experimental::esimd;
 
             simd<unsigned char, 8 * 32> vin;
             auto in = vin.format<unsigned char, 8, 32>();
 
@@ -18,7 +18,7 @@
 #include <memory>
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 #ifdef _SIM_MODE_
 #define CRUNCH 32
 
@@ -19,7 +19,7 @@
 #include <memory>
 
 using namespace cl::sycl;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 #ifdef _SIM_MODE_
 #define CRUNCH 32
 
@@ -18,7 +18,7 @@
 
 using namespace cl::sycl;
 using namespace std;
-using namespace sycl::INTEL::gpu;
+using namespace sycl::ext::intel::experimental::esimd;
 
 void initMatrix(int *M, unsigned N) {
   assert(N >= 8 && (((N - 1) & N) == 0) &&
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@ int main() {`
`37`	`37`	`}`
`38`	`38`	`}`
`39`	`39`
	`40`	`+// CHECK:---> piMemBufferCreate`
`40`	`41`	`// CHECK:---> piMemBufferCreate`
`41`	`42`	`// CHECK-NEXT: {{.}} : {{.}}`
`42`		`-// CHECK-NEXT: {{.*}} : 9`
	`43`	`+// CHECK-NEXT: {{.*}} : 17`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ int checkHistogram(unsigned int refHistogram, unsigned int hist) {`
`62`	`62`	`return 1;`
`63`	`63`	`}`
`64`	`64`
`65`		`-using namespace sycl::INTEL::gpu;`
	`65`	`+using namespace sycl::ext::intel::experimental::esimd;`
`66`	`66`	`template <EsimdAtomicOpType Op, typename T, int n>`
`67`	`67`	`ESIMD_INLINE void atomic_write(T *bins, simd<unsigned, n> offset,`
`68`	`68`	`simd<T, n> src0, simd<ushort, n> pred) {`