intel
diff --git a/‎.github/CODEOWNERS
Lines changed: 2 additions & 1 deletion b/‎.github/CODEOWNERS
Lines changed: 2 additions & 1 deletion
diff --git a/‎SYCL/Assert/assert_in_kernels.cpp
Lines changed: 3 additions & 3 deletions b/‎SYCL/Assert/assert_in_kernels.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎SYCL/Assert/assert_in_kernels_ndebug.cpp
Lines changed: 2 additions & 2 deletions b/‎SYCL/Assert/assert_in_kernels_ndebug.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎SYCL/Assert/assert_in_kernels_win.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/Assert/assert_in_kernels_win.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/Assert/assert_in_multiple_tus.cpp
Lines changed: 3 additions & 3 deletions b/‎SYCL/Assert/assert_in_multiple_tus.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎SYCL/Assert/assert_in_multiple_tus_one_ndebug.cpp
Lines changed: 3 additions & 3 deletions b/‎SYCL/Assert/assert_in_multiple_tus_one_ndebug.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎SYCL/Assert/assert_in_multiple_tus_one_ndebug_win.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/Assert/assert_in_multiple_tus_one_ndebug_win.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/Assert/assert_in_multiple_tus_win.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/Assert/assert_in_multiple_tus_win.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/Assert/assert_in_one_kernel.cpp
Lines changed: 3 additions & 3 deletions b/‎SYCL/Assert/assert_in_one_kernel.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎SYCL/Assert/assert_in_one_kernel_win.cpp
Lines changed: 1 addition & 1 deletion b/‎SYCL/Assert/assert_in_one_kernel_win.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎SYCL/Assert/assert_in_simultaneous_kernels.cpp
Lines changed: 10 additions & 4 deletions b/‎SYCL/Assert/assert_in_simultaneous_kernels.cpp
Lines changed: 10 additions & 4 deletions
diff --git a/‎SYCL/Assert/assert_in_simultaneous_kernels_win.cpp
Lines changed: 8 additions & 2 deletions b/‎SYCL/Assert/assert_in_simultaneous_kernels_win.cpp
Lines changed: 8 additions & 2 deletions
diff --git a/‎SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp
Lines changed: 10 additions & 8 deletions b/‎SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp
Lines changed: 10 additions & 8 deletions
diff --git a/‎SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp
Lines changed: 10 additions & 8 deletions b/‎SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp
Lines changed: 10 additions & 8 deletions
diff --git a/‎SYCL/Basic/device_code_dae.cpp
Lines changed: 0 additions & 76 deletions b/‎SYCL/Basic/device_code_dae.cpp
Lines changed: 0 additions & 76 deletions
diff --git a/‎SYCL/Basic/device_event.cpp
Lines changed: 3 additions & 2 deletions b/‎SYCL/Basic/device_event.cpp
Lines changed: 3 additions & 2 deletions
diff --git a/‎SYCL/Basic/diagnostics/handler.cpp
Lines changed: 4 additions & 0 deletions b/‎SYCL/Basic/diagnostics/handler.cpp
Lines changed: 4 additions & 0 deletions
diff --git a/‎SYCL/Basic/get_backend.cpp
Lines changed: 0 additions & 5 deletions b/‎SYCL/Basic/get_backend.cpp
Lines changed: 0 additions & 5 deletions
diff --git a/‎SYCL/Basic/group_async_copy.cpp
Lines changed: 22 additions & 8 deletions b/‎SYCL/Basic/group_async_copy.cpp
Lines changed: 22 additions & 8 deletions
@@ -23,12 +23,13 @@ SYCL/DeviceCodeSplit @AlexeySachkov  @Fznamznon
 
 # Device library
 SYCL/DeviceLib @vzakhari
+SYCL/DeviceLib/ITTAnnotations @vzakhari @MrSidims @AGindinson
 
 # dot_product API
 SYCL/DotProduct @rdeodhar
 
 # Explicit SIMD
-SYCL/ESIMD @kbobrovs @DenisBakhvalov
+SYCL/ESIMD @kbobrovs @v-klochkov
 
 # Functor
 SYCL/Functor @AlexeySachkov
 
@@ -1,7 +1,7 @@
 // REQUIRES: linux
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,5 +1,5 @@
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DNDEBUG %S/assert_in_kernels.cpp -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
 
@@ -1,5 +1,5 @@
 // REQUIRES: windows
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
 // RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
 // RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
 
@@ -1,7 +1,7 @@
 // REQUIRES: linux
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,7 +1,7 @@
 // REQUIRES: linux
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,5 +1,5 @@
 // REQUIRES: windows
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,5 +1,5 @@
 // REQUIRES: windows
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,7 +1,7 @@
 // REQUIRES: linux
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,5 +1,5 @@
 // REQUIRES: windows
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 
@@ -1,9 +1,15 @@
 // REQUIRES: linux
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
-// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
+//
+// Since this is a multi-threaded application enable memory tracking and
+// deferred release feature in the Level Zero plugin to avoid releasing memory
+// too early. This is necessary because currently SYCL RT sets indirect access
+// flag for all kernels and the Level Zero runtime doesn't support deferred
+// release yet.
+// RUN: %GPU_RUN_PLACEHOLDER env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 //
 // CHECK:      {{.*}}assert_in_simultaneous_kernels.hpp:12: void assertFunc(): global id: [9,7,0], local id: [0,0,0]
 
@@ -1,7 +1,13 @@
 // REQUIRES: windows
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
-// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
+//
+// Since this is a multi-threaded application enable memory tracking and
+// deferred release feature in the Level Zero plugin to avoid releasing memory
+// too early. This is necessary because currently SYCL RT sets indirect access
+// flag for all kernels and the Level Zero runtime doesn't support deferred
+// release yet.
+// RUN: %GPU_RUN_PLACEHOLDER env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 //
 // FIXME Windows versionprints '(null)' instead of '<unknown func>' once in a
 
@@ -1,12 +1,14 @@
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// FIXME disable the test on Linux on OpenCL on FPGA as there's some funny
-// failure taking place in this configuration on CI. The test should be enabled
-// with this configuration once it's debugged.
-// UNSUPPORTED: linux && opencl && fpga
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
-// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
+//
+// Since this is a multi-threaded application enable memory tracking and
+// deferred release feature in the Level Zero plugin to avoid releasing memory
+// too early. This is necessary because currently SYCL RT sets indirect access
+// flag for all kernels and the Level Zero runtime doesn't support deferred
+// release yet.
+// RUN: %GPU_RUN_PLACEHOLDER env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 //
 // CHECK:      {{this message from file1|this message from file2}}
 
@@ -1,12 +1,14 @@
-// FIXME unsuppoerted on CUDA until fallback libdevice becomes available
-// UNSUPPORTED: cuda
-// FIXME disable the test on Linux on OpenCL on FPGA as there's some funny
-// failure taking place in this configuration on CI. The test should be enabled
-// with this configuration once it's debugged.
-// UNSUPPORTED: linux && opencl && fpga
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib
+// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
+// UNSUPPORTED: cuda || hip
+// RUN: %clangxx -DSYCL_ENABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_ERR_REDIRECT %CPU_CHECK_PLACEHOLDER
-// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
+//
+// Since this is a multi-threaded application enable memory tracking and
+// deferred release feature in the Level Zero plugin to avoid releasing memory
+// too early. This is necessary because currently SYCL RT sets indirect access
+// flag for all kernels and the Level Zero runtime doesn't support deferred
+// release yet.
+// RUN: %GPU_RUN_PLACEHOLDER env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 %t.out %GPU_ERR_REDIRECT %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_ERR_REDIRECT %ACC_CHECK_PLACEHOLDER
 //
 // CHECK:      this message from file1
 
@@ -5,8 +5,9 @@
 // TODO: nd_item::barrier() is not implemented on HOST
 // RUNx: %HOST_RUN_PLACEHOLDER %t.run
 //
-// Crashes on AMD
-// XFAIL: hip_amd
+// Crashes on AMD, returns error "Barrier is not supported on the host device
+// yet." with Nvidia.
+// XFAIL: hip_amd || hip_nvidia
 
 //==--------device_event.cpp - SYCL class device_event test ----------------==//
 //
 
@@ -1,5 +1,9 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %BE_RUN_PLACEHOLDER %t.out | FileCheck %s
+//
+// Appears to fail on HIP Nvidia because 'no device of requested type available'
+// when constructing a queue with an exception_list.
+// XFAIL: hip_nvidia
 //==------------------- handler.cpp ----------------------------------------==//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 
@@ -46,11 +46,6 @@ int main() {
         return_fail();
       }
 
-      program prog(c);
-      if (prog.get_backend() != plt.get_backend()) {
-        return_fail();
-      }
-
       default_selector sel;
       queue q(c, sel);
       if (q.get_backend() != plt.get_backend()) {
 
@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.run
+// RUN: %clangxx -fsycl -std=c++17 -fsycl-targets=%sycl_triple %s -o %t.run
 // RUN: %GPU_RUN_PLACEHOLDER %t.run
 // RUN: %CPU_RUN_PLACEHOLDER %t.run
 // RUN: %ACC_RUN_PLACEHOLDER %t.run
@@ -13,7 +13,11 @@
 
 using namespace cl::sycl;
 
-template <typename T> class KernelName;
+template <typename T> class TypeHelper;
+
+template <typename T>
+using KernelName = class TypeHelper<typename std::conditional<
+    std::is_same<T, std::byte>::value, unsigned char, T>::type>;
 
 // Define the number of work items to enqueue.
 const size_t NElems = 32;
@@ -24,14 +28,14 @@ template <typename T> void initInputBuffer(buffer<T, 1> &Buf, size_t Stride) {
   auto Acc = Buf.template get_access<access::mode::write>();
   for (size_t I = 0; I < Buf.get_count(); I += WorkGroupSize) {
     for (size_t J = 0; J < WorkGroupSize; J++)
-      Acc[I + J] = I + J + ((J % Stride == 0) ? 100 : 0);
+      Acc[I + J] = static_cast<T>(I + J + ((J % Stride == 0) ? 100 : 0));
   }
 }
 
 template <typename T> void initOutputBuffer(buffer<T, 1> &Buf) {
   auto Acc = Buf.template get_access<access::mode::write>();
   for (size_t I = 0; I < Buf.get_count(); I++)
-    Acc[I] = 0;
+    Acc[I] = static_cast<T>(0);
 }
 
 template <typename T> struct is_vec : std::false_type {};
@@ -48,9 +52,8 @@ template <typename T> bool checkEqual(vec<T, 4> A, size_t B) {
 }
 
 template <typename T>
-typename std::enable_if<!is_vec<T>::value, bool>::type checkEqual(T A,
-                                                                  size_t B) {
-  T TB = B;
+typename std::enable_if_t<!is_vec<T>::value, bool> checkEqual(T A, size_t B) {
+  T TB = static_cast<T>(B);
   return A == TB;
 }
 
@@ -67,7 +70,16 @@ template <typename T> std::string toString(vec<T, 4> A) {
 }
 
 template <typename T = void>
-typename std::enable_if<!is_vec<T>::value, std::string>::type toString(T A) {
+typename std::enable_if_t<
+    !is_vec<T>::value && std::is_same<T, std::byte>::value, std::string>
+toString(T A) {
+  return std::to_string((unsigned char)A);
+}
+
+template <typename T = void>
+typename std::enable_if_t<
+    !is_vec<T>::value && !std::is_same<T, std::byte>::value, std::string>
+toString(T A) {
   return std::to_string(A);
 }
 
@@ -156,6 +168,8 @@ int main() {
       return 1;
     if (test<cl::sycl::cl_bool>(Stride))
       return 1;
+    if (test<std::byte>(Stride))
+      return 1;
   }
 
   std::cout << "Test passed.\n";