intel
diff --git a/‎SYCL/AOT/multiple-devices.cpp
Lines changed: 0 additions & 4 deletions b/‎SYCL/AOT/multiple-devices.cpp
Lines changed: 0 additions & 4 deletions
diff --git a/‎SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp
Lines changed: 3 additions & 0 deletions b/‎SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp
Lines changed: 2 additions & 0 deletions b/‎SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎SYCL/AtomicRef/accessor.cpp
Lines changed: 1 addition & 5 deletions b/‎SYCL/AtomicRef/accessor.cpp
Lines changed: 1 addition & 5 deletions
diff --git a/‎SYCL/AtomicRef/add.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/add.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/add.h
Lines changed: 1 addition & 2 deletions b/‎SYCL/AtomicRef/add.h
Lines changed: 1 addition & 2 deletions
diff --git a/‎SYCL/AtomicRef/add_generic.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/add_generic.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/add_generic_local.cpp
Lines changed: 1 addition & 3 deletions b/‎SYCL/AtomicRef/add_generic_local.cpp
Lines changed: 1 addition & 3 deletions
diff --git a/‎SYCL/AtomicRef/add_generic_local_native_fp.cpp
Lines changed: 1 addition & 4 deletions b/‎SYCL/AtomicRef/add_generic_local_native_fp.cpp
Lines changed: 1 addition & 4 deletions
diff --git a/‎SYCL/AtomicRef/add_generic_native_fp.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/add_generic_native_fp.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/add_local.cpp
Lines changed: 2 additions & 4 deletions b/‎SYCL/AtomicRef/add_local.cpp
Lines changed: 2 additions & 4 deletions
diff --git a/‎SYCL/AtomicRef/add_local_native_fp.cpp
Lines changed: 2 additions & 4 deletions b/‎SYCL/AtomicRef/add_local_native_fp.cpp
Lines changed: 2 additions & 4 deletions
diff --git a/‎SYCL/AtomicRef/add_native_fp.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/add_native_fp.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/and.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/and.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/and.h
Lines changed: 1 addition & 2 deletions b/‎SYCL/AtomicRef/and.h
Lines changed: 1 addition & 2 deletions
diff --git a/‎SYCL/AtomicRef/and_generic.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/and_generic.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/and_generic_local.cpp
Lines changed: 1 addition & 3 deletions b/‎SYCL/AtomicRef/and_generic_local.cpp
Lines changed: 1 addition & 3 deletions
diff --git a/‎SYCL/AtomicRef/and_local.cpp
Lines changed: 0 additions & 4 deletions b/‎SYCL/AtomicRef/and_local.cpp
Lines changed: 0 additions & 4 deletions
diff --git a/‎SYCL/AtomicRef/assignment.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/assignment.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/assignment_atomic64.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/assignment_atomic64.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/assignment_atomic64_generic.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/assignment_atomic64_generic.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/assignment_generic.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/assignment_generic.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/atomic_memory_order.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/atomic_memory_order.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp
Lines changed: 2 additions & 8 deletions b/‎SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp
Lines changed: 2 additions & 8 deletions
diff --git a/‎SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp
Lines changed: 1 addition & 6 deletions b/‎SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp
Lines changed: 1 addition & 6 deletions
diff --git a/‎SYCL/AtomicRef/compare_exchange.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/compare_exchange.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/compare_exchange.h
Lines changed: 1 addition & 2 deletions b/‎SYCL/AtomicRef/compare_exchange.h
Lines changed: 1 addition & 2 deletions
diff --git a/‎SYCL/AtomicRef/compare_exchange_generic.cpp
Lines changed: 0 additions & 1 deletion b/‎SYCL/AtomicRef/compare_exchange_generic.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎SYCL/AtomicRef/compare_exchange_generic_local.cpp
Lines changed: 1 addition & 3 deletions b/‎SYCL/AtomicRef/compare_exchange_generic_local.cpp
Lines changed: 1 addition & 3 deletions
diff --git a/‎SYCL/AtomicRef/compare_exchange_local.cpp
Lines changed: 0 additions & 4 deletions b/‎SYCL/AtomicRef/compare_exchange_local.cpp
Lines changed: 0 additions & 4 deletions
diff --git a/‎SYCL/AtomicRef/device_has_aspect_atomic64_cuda_and_hip.cpp
Lines changed: 19 additions & 0 deletions b/‎SYCL/AtomicRef/device_has_aspect_atomic64_cuda_and_hip.cpp
Lines changed: 19 additions & 0 deletions
diff --git a/‎SYCL/AtomicRef/device_has_aspect_atomic64_level_zero.cpp
Lines changed: 25 additions & 0 deletions b/‎SYCL/AtomicRef/device_has_aspect_atomic64_level_zero.cpp
Lines changed: 25 additions & 0 deletions
@@ -15,26 +15,22 @@
 
 // CPU, GPU, FPGA
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen,spir64_fpga -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_all_aot.out
-// RUN: %HOST_RUN_PLACEHOLDER %t_all_aot.out
 // RUN: %CPU_RUN_PLACEHOLDER %t_all_aot.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_all_aot.out
 // RUN: %ACC_RUN_PLACEHOLDER %t_all_aot.out
 
 // CPU, GPU
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_cpu_gpu.out
-// RUN: %HOST_RUN_PLACEHOLDER %t_cpu_gpu.out
 // RUN: %CPU_RUN_PLACEHOLDER %t_cpu_gpu.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_cpu_gpu.out
 
 // CPU, FPGA
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_fpga %t.o -o %t_cpu_fpga.out
-// RUN: %HOST_RUN_PLACEHOLDER %t_cpu_fpga.out
 // RUN: %CPU_RUN_PLACEHOLDER %t_cpu_fpga.out
 // RUN: %ACC_RUN_PLACEHOLDER %t_cpu_fpga.out
 
 // GPU, FPGA
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen,spir64_fpga -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_gpu_fpga.out
-// RUN: %HOST_RUN_PLACEHOLDER %t_gpu_fpga.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu_fpga.out
 // RUN: %ACC_RUN_PLACEHOLDER %t_gpu_fpga.out
 
 
@@ -1,3 +1,6 @@
+// FIXME flaky fail on CUDA and HIP
+// UNSUPPORTED: cuda || hip
+//
 // RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
 // RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
 
@@ -1,3 +1,5 @@
+// FIXME flaky fail on CUDA
+// UNSUPPORTED: cuda
 // RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
 // RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 
@@ -101,9 +100,6 @@ int main() {
   queue q;
   constexpr int N = 32;
   accessor_test<int>(q, N);
-  // TODO: Enable local accessor test for host when barrier is supported
-  if (!q.get_device().is_host()) {
-    local_accessor_test<int>(q, N);
-  }
+  local_accessor_test<int>(q, N);
   std::cout << "Test passed." << std::endl;
 }
@@ -2,7 +2,6 @@
 // UNSUPPORTED: hip
 
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -30,8 +30,7 @@ void add_fetch_local_test(queue q, size_t N) {
        auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
        auto out =
            output_buf.template get_access<access::mode::discard_write>(cgh);
-       accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
-                                                                           cgh);
+       local_accessor<T, 1> loc(1, cgh);
 
        cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
          int gid = it.get_global_id(0);
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,12 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // CUDA and HIP backends have had no support for the generic address space yet.
-// Host does not support barrier.
-// XFAIL: cuda || hip || host
+// XFAIL: cuda || hip
 
 #define TEST_GENERIC_IN_LOCAL 1
 
 
@@ -1,13 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // CUDA and HIP backends have had no support for the generic address space yet.
-// Host does not support barrier. HIP does not support native floating point
-// atomics
-// XFAIL: cuda, hip, host
+// XFAIL: cuda, hip
 
 #define SYCL_USE_NATIVE_FP_ATOMICS
 #define FP_TESTS_ONLY
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,12 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// Barrier is not supported on host. HIP does not support floating point
-// atomics.
-// XFAIL: host, hip
+// HIP does not support floating point atomics.
+// XFAIL: hip
 
 #include "add.h"
 
 
@@ -1,12 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// Barrier is not supported on host. HIP does not support floating point
-// atomics.
-// XFAIL: host, hip
+// HIP does not support floating point atomics.
+// XFAIL: hip
 
 #define SYCL_USE_NATIVE_FP_ATOMICS
 #define FP_TESTS_ONLY
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -30,8 +30,7 @@ void and_local_test(queue q) {
        auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
        auto out =
            output_buf.template get_access<access::mode::discard_write>(cgh);
-       accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
-                                                                           cgh);
+       local_accessor<T, 1> loc(1, cgh);
 
        cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
          int gid = it.get_global_id(0);
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,12 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // CUDA and HIP backends have had no support for the generic address space yet.
-// Host does not support barrier.
-// XFAIL: cuda || hip || host
+// XFAIL: cuda || hip
 
 #define TEST_GENERIC_IN_LOCAL 1
 
 
@@ -1,12 +1,8 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// Barrier is not supported on host.
-// XFAIL: host
-
 #include "and.h"
 
 int main() { and_test_all<access::address_space::local_space>(); }
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,15 +1,11 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // L0, OpenCL, and HIP backends don't currently support
 // info::device::atomic_memory_order_capabilities
 // UNSUPPORTED: level_zero, opencl, hip
 
-// host does not support barrier
-// XFAIL: host
-
 // NOTE: Tests fetch_add for acquire and release memory ordering.
 
 #include "atomic_memory_order.h"
@@ -76,8 +72,7 @@ template <memory_order order> void test_acquire_local() {
     q.submit([&](handler &cgh) {
        auto error =
            error_buf.template get_access<access::mode::read_write>(cgh);
-       accessor<int, 1, access::mode::read_write, access::target::local> val(
-           2, cgh);
+       local_accessor<int, 1> val(2, cgh);
        cgh.parallel_for(
            nd_range<1>(global_size, local_size), [=](nd_item<1> it) {
              size_t lid = it.get_local_id(0);
@@ -168,8 +163,7 @@ template <memory_order order> void test_release_local() {
     q.submit([&](handler &cgh) {
        auto error =
            error_buf.template get_access<access::mode::read_write>(cgh);
-       accessor<int, 1, access::mode::read_write, access::target::local> val(
-           2, cgh);
+       local_accessor<int, 1> val(2, cgh);
        cgh.parallel_for(
            nd_range<1>(global_size, local_size), [=](nd_item<1> it) {
              size_t lid = it.get_local_id(0);
 
@@ -1,15 +1,11 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // L0, OpenCL, and HIP backends don't currently support
 // info::device::atomic_memory_order_capabilities
 // UNSUPPORTED: level_zero, opencl, hip
 
-// host does not support barrier
-// XFAIL: host
-
 #include "atomic_memory_order.h"
 #include <iostream>
 #include <numeric>
@@ -120,8 +116,7 @@ template <memory_order order> void test_local() {
 
   q.submit([&](handler &cgh) {
      auto res = res_buf.template get_access<access::mode::discard_write>(cgh);
-     accessor<int, 1, access::mode::read_write, access::target::local> val(2,
-                                                                           cgh);
+     local_accessor<int, 1> val(2, cgh);
      cgh.parallel_for(nd_range<1>(N_items, N_items), [=](nd_item<1> it) {
        val[0] = 0;
        it.barrier(access::fence_space::local_space);
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -32,8 +32,7 @@ void compare_exchange_local_test(queue q, size_t N) {
                cgh);
        auto out =
            output_buf.template get_access<access::mode::discard_write>(cgh);
-       accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
-                                                                           cgh);
+       local_accessor<T, 1> loc(1, cgh);
 
        cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
          int gid = it.get_global_id(0);
 
@@ -1,5 +1,4 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
@@ -1,12 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // CUDA and HIP backends have had no support for the generic address space yet.
-// Host does not support barrier.
-// XFAIL: cuda || hip || host
+// XFAIL: cuda || hip
 
 #define TEST_GENERIC_IN_LOCAL 1
 
 
@@ -1,12 +1,8 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// Barrier is not supported on host.
-// XFAIL: host
-
 #include "compare_exchange.h"
 
 int main() { compare_exchange_test_all<access::address_space::local_space>(); }
@@ -0,0 +1,19 @@
+// REQUIRES: cuda || hip
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// XFAIL: hip
+// Expected failure because hip does not have atomic64 check implementation
+
+#include <CL/sycl.hpp>
+#include <iostream>
+
+using namespace sycl;
+
+int main() {
+  queue Queue;
+  device Dev = Queue.get_device();
+  // cout in order to ensure that the query hasn't been optimized out
+  std::cout << Dev.has(aspect::atomic64) << std::endl;
+  return 0;
+}
@@ -0,0 +1,25 @@
+// REQUIRES: level_zero, level_zero_dev_kit
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %level_zero_options
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+#include <level_zero/ze_api.h>
+
+using namespace sycl;
+
+int main() {
+  queue Queue;
+  device Dev = Queue.get_device();
+  bool Result;
+  ze_device_module_properties_t Properties;
+  zeDeviceGetModuleProperties(get_native<backend::ext_oneapi_level_zero>(Dev),
+                              &Properties);
+  if (Properties.flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS)
+    Result = true;
+  else
+    Result = false;
+  assert(Dev.has(aspect::atomic64) == Result &&
+         "The Result value differs from the implemented atomic64 check on "
+         "the L0 backend.");
+  return 0;
+}
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+// FIXME flaky fail on CUDA`
	`2`	`+// UNSUPPORTED: cuda`
`1`	`3`	`// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib`
`2`	`4`	`// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt \|\| true`
`3`	`5`	`// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out`
`2`		`-// RUN: %HOST_RUN_PLACEHOLDER %t.out`
`3`	`2`	`// RUN: %GPU_RUN_PLACEHOLDER %t.out`
`4`	`3`	`// RUN: %CPU_RUN_PLACEHOLDER %t.out`
`5`	`4`	`// RUN: %ACC_RUN_PLACEHOLDER %t.out`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out`
`2`		`-// RUN: %HOST_RUN_PLACEHOLDER %t.out`
`3`	`2`	`// RUN: %CPU_RUN_PLACEHOLDER %t.out`
`4`	`3`	`// RUN: %GPU_RUN_PLACEHOLDER %t.out`
`5`	`4`	`// RUN: %ACC_RUN_PLACEHOLDER %t.out`