intel
diff --git a/‎SYCL/Assert/assert_in_simultaneous_kernels.cpp
Lines changed: 2 additions & 2 deletions b/‎SYCL/Assert/assert_in_simultaneous_kernels.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎SYCL/AtomicRef/assignment_atomic64.cpp
Lines changed: 8 additions & 3 deletions b/‎SYCL/AtomicRef/assignment_atomic64.cpp
Lines changed: 8 additions & 3 deletions
diff --git a/‎SYCL/AtomicRef/assignment_atomic64_generic.cpp
Lines changed: 8 additions & 3 deletions b/‎SYCL/AtomicRef/assignment_atomic64_generic.cpp
Lines changed: 8 additions & 3 deletions
diff --git a/‎SYCL/Basic/buffer/buffer.cpp
Lines changed: 13 additions & 13 deletions b/‎SYCL/Basic/buffer/buffer.cpp
Lines changed: 13 additions & 13 deletions
diff --git a/‎SYCL/Basic/intel-ext-device.cpp
Lines changed: 56 additions & 11 deletions b/‎SYCL/Basic/intel-ext-device.cpp
Lines changed: 56 additions & 11 deletions
diff --git a/‎SYCL/Basic/subdevice.cpp
Lines changed: 0 additions & 5 deletions b/‎SYCL/Basic/subdevice.cpp
Lines changed: 0 additions & 5 deletions
diff --git a/‎SYCL/Basic/subsubdevice.cpp
Lines changed: 0 additions & 5 deletions b/‎SYCL/Basic/subsubdevice.cpp
Lines changed: 0 additions & 5 deletions
@@ -1,6 +1,6 @@
 // REQUIRES: linux
-// FIXME: Flaky on HIP
-// UNSUPPORTED: hip
+// FIXME: Flaky on HIP and cuda
+// UNSUPPORTED: hip || cuda
 // RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
 // RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
 
@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
@@ -10,13 +10,18 @@ using namespace sycl;
 int main() {
   queue q;
 
-  if (!q.get_device().has(aspect::atomic64)) {
+  device dev = q.get_device();
+
+  if (!dev.has(aspect::atomic64)) {
     std::cout << "Skipping test\n";
     return 0;
   }
 
+  const bool DoublesSupported = dev.has(sycl::aspect::fp64);
+
   constexpr int N = 32;
-  assignment_test<double>(q, N);
+  if (DoublesSupported)
+    assignment_test<double>(q, N);
 
   // Include long tests if they are 64 bits wide
   if constexpr (sizeof(long) == 8) {
 
@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
@@ -13,13 +13,18 @@ using namespace sycl;
 int main() {
   queue q;
 
-  if (!q.get_device().has(aspect::atomic64)) {
+  device dev = q.get_device();
+
+  if (!dev.has(aspect::atomic64)) {
     std::cout << "Skipping test\n";
     return 0;
   }
 
+  const bool DoublesSupported = dev.has(sycl::aspect::fp64);
+
   constexpr int N = 32;
-  assignment_generic_test<double>(q, N);
+  if (DoublesSupported)
+    assignment_generic_test<double>(q, N);
 
   // Include long tests if they are 64 bits wide
   if constexpr (sizeof(long) == 8) {
 
@@ -510,15 +510,15 @@ int main() {
                                     [](bool *data) { delete[] data; });
     std::shared_ptr<int> int_shrd(new int[size],
                                   [](int *data) { delete[] data; });
-    std::shared_ptr<double> double_shrd(new double[size],
-                                        [](double *data) { delete[] data; });
+    std::shared_ptr<float> float_shrd(new float[size],
+                                      [](float *data) { delete[] data; });
 
     std::vector<bool> bool_vector;
     std::vector<int> int_vector;
-    std::vector<double> double_vector;
+    std::vector<float> float_vector;
     bool_vector.reserve(size);
     int_vector.reserve(size);
-    double_vector.reserve(size);
+    float_vector.reserve(size);
 
     sycl::queue Queue;
     std::mutex m;
@@ -529,40 +529,40 @@ int main() {
       sycl::buffer<int, dims> buf_int_shrd(
           int_shrd, r,
           sycl::property_list{sycl::property::buffer::use_mutex(m)});
-      sycl::buffer<double, dims> buf_double_shrd(
-          double_shrd, r,
+      sycl::buffer<float, dims> buf_float_shrd(
+          float_shrd, r,
           sycl::property_list{sycl::property::buffer::use_mutex(m)});
       m.lock();
       std::fill(bool_shrd.get(), (bool_shrd.get() + size), bool());
       std::fill(int_shrd.get(), (int_shrd.get() + size), int());
-      std::fill(double_shrd.get(), (double_shrd.get() + size), double());
+      std::fill(float_shrd.get(), (float_shrd.get() + size), float());
       m.unlock();
 
       buf_bool_shrd.set_final_data(bool_vector.begin());
       buf_int_shrd.set_final_data(int_vector.begin());
-      buf_double_shrd.set_final_data(double_vector.begin());
+      buf_float_shrd.set_final_data(float_vector.begin());
       buf_bool_shrd.set_write_back(true);
       buf_int_shrd.set_write_back(true);
-      buf_double_shrd.set_write_back(true);
+      buf_float_shrd.set_write_back(true);
 
       Queue.submit([&](sycl::handler &cgh) {
         auto Accessor_bool =
             buf_bool_shrd.get_access<sycl::access::mode::write>(cgh);
         auto Accessor_int =
             buf_int_shrd.get_access<sycl::access::mode::write>(cgh);
-        auto Accessor_double =
-            buf_double_shrd.get_access<sycl::access::mode::write>(cgh);
+        auto Accessor_float =
+            buf_float_shrd.get_access<sycl::access::mode::write>(cgh);
         cgh.parallel_for<class FillBuffer>(r, [=](sycl::id<1> WIid) {
           Accessor_bool[WIid] = true;
           Accessor_int[WIid] = 3;
-          Accessor_double[WIid] = 7.5;
+          Accessor_float[WIid] = 7.5;
         });
       });
     } // Data is copied back
 
     for (size_t i = 0; i < size; i++) {
       if (bool_vector[i] != true || int_vector[i] != 3 ||
-          double_vector[i] != 7.5) {
+          float_vector[i] != 7.5) {
         assert(false && "Data was not copied back");
         return 1;
       }
 
@@ -6,7 +6,6 @@
 // UNSUPPORTED: cuda
 // UNSUPPORTED: hip
 // Temporarily disable on L0 due to fails in CI
-// UNSUPPORTED: level_zero
 
 //==--------- intel-ext-device.cpp - SYCL device test ------------==//
 //
@@ -21,6 +20,7 @@
 
 #include <sycl/sycl.hpp>
 
+#include <cassert>
 #include <iostream>
 
 using namespace sycl;
@@ -66,43 +66,45 @@ int main(int argc, char **argv) {
 
             if (dev.has(aspect::ext_intel_pci_address)) {
               std::cout << "PCI address = "
-                        << dev.get_info<info::device::ext_intel_pci_address>()
+                        << dev.get_info<ext::intel::info::device::pci_address>()
                         << std::endl;
             }
             if (dev.has(aspect::ext_intel_gpu_eu_count)) {
-              totalEUs = dev.get_info<info::device::ext_intel_gpu_eu_count>();
+              totalEUs = dev.get_info<ext::intel::info::device::gpu_eu_count>();
               std::cout << "Number of EUs = " << totalEUs << std::endl;
             }
             if (dev.has(aspect::ext_intel_gpu_eu_simd_width)) {
-              int w = dev.get_info<info::device::ext_intel_gpu_eu_simd_width>();
+              int w =
+                  dev.get_info<ext::intel::info::device::gpu_eu_simd_width>();
               std::cout << "EU SIMD width = " << w << std::endl;
             }
             if (dev.has(aspect::ext_intel_gpu_slices)) {
-              numSlices = dev.get_info<info::device::ext_intel_gpu_slices>();
+              numSlices = dev.get_info<ext::intel::info::device::gpu_slices>();
               std::cout << "Number of slices = " << numSlices << std::endl;
             }
             if (dev.has(aspect::ext_intel_gpu_subslices_per_slice)) {
               numSubslices = dev.get_info<
-                  info::device::ext_intel_gpu_subslices_per_slice>();
+                  ext::intel::info::device::gpu_subslices_per_slice>();
               std::cout << "Number of subslices per slice = " << numSubslices
                         << std::endl;
             }
             if (dev.has(aspect::ext_intel_gpu_eu_count_per_subslice)) {
               numEUsPerSubslice = dev.get_info<
-                  info::device::ext_intel_gpu_eu_count_per_subslice>();
+                  ext::intel::info::device::gpu_eu_count_per_subslice>();
               std::cout << "Number of EUs per subslice = " << numEUsPerSubslice
                         << std::endl;
             }
-            if (dev.has(aspect::ext_intel_gpu_hw_threads_per_eu)) {
-              numHWThreadsPerEU =
-                  dev.get_info<info::device::ext_intel_gpu_hw_threads_per_eu>();
+            if (SYCL_EXT_INTEL_DEVICE_INFO >= 3 &&
+                dev.has(aspect::ext_intel_gpu_hw_threads_per_eu)) {
+              numHWThreadsPerEU = dev.get_info<
+                  ext::intel::info::device::gpu_hw_threads_per_eu>();
               std::cout << "Number of HW threads per EU = " << numHWThreadsPerEU
                         << std::endl;
             }
             if (dev.has(aspect::ext_intel_max_mem_bandwidth)) {
               // not supported yet
               long m =
-                  dev.get_info<info::device::ext_intel_max_mem_bandwidth>();
+                  dev.get_info<ext::intel::info::device::max_mem_bandwidth>();
               std::cout << "Maximum memory bandwidth = " << m << std::endl;
             }
             // This is the only data we can verify.
@@ -111,8 +113,51 @@ int main(int argc, char **argv) {
               std::cout << "Failed!" << std::endl;
               return 1;
             }
+            if (SYCL_EXT_INTEL_DEVICE_INFO >= 2 &&
+                dev.has(aspect::ext_intel_device_info_uuid)) {
+              auto UUID = dev.get_info<ext::intel::info::device::uuid>();
+              std::cout << "Device UUID = ";
+              for (int i = 0; i < 16; i++) {
+                std::cout << std::to_string(UUID[i]);
+              }
+              std::cout << "\n";
+            }
           } // SYCL_EXT_INTEL_DEVICE_INFO
         }
+
+// Check if this experimental feature is supported
+#ifdef SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY
+        sycl::id<1> groupD =
+            dev.get_info<sycl::ext::oneapi::experimental::info::device::
+                             max_work_groups<1>>();
+        std::cout << "Max work group size in 1D \n";
+        std::cout << "Dimension 1:" << groupD[0] << std::endl;
+
+        sycl::id<2> group2D =
+            dev.get_info<sycl::ext::oneapi::experimental::info::device::
+                             max_work_groups<2>>();
+        std::cout << "Max work group size in 2D \n";
+        std::cout << "Dimension 1:" << group2D[0] << "\n"
+                  << "Dimension 2:" << group2D[1] << std::endl;
+
+        sycl::id<3> group3D =
+            dev.get_info<sycl::ext::oneapi::experimental::info::device::
+                             max_work_groups<3>>();
+        std::cout << "Max work group size in 3D \n";
+        std::cout << "Dimension 1:" << group3D[0] << "\n"
+                  << "Dimension 2:" << group3D[1] << "\n"
+                  << "Dimension 3:" << group3D[2] << std::endl;
+
+        size_t group_max = dev.get_info<sycl::ext::oneapi::experimental::info::
+                                            device::max_global_work_groups>();
+        std::cout << "Max global work group size:" << group_max << "\n";
+
+        assert((group3D[0] <= group_max && group3D[1] <= group_max &&
+                group3D[2] <= group_max) &&
+               "Max work-group size of each dimension must be smaller than "
+               "global work-group size");
+#endif
+
         std::cout << std::endl;
       }
     }
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
@@ -25,10 +24,6 @@ int main() {
   try {
     auto devices = device::get_devices();
     for (const auto &dev : devices) {
-      // TODO: implement subdevices creation for host device
-      if (dev.is_host())
-        continue;
-
       assert(dev.get_info<info::device::partition_type_property>() ==
              info::partition_property::no_partition);
 
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
@@ -25,10 +24,6 @@ int main() {
   try {
     auto devices = device::get_devices();
     for (const auto &dev : devices) {
-      // TODO: implement subdevices creation for host device
-      if (dev.is_host())
-        continue;
-
       assert(dev.get_info<info::device::partition_type_property>() ==
              info::partition_property::no_partition);