intel · againull · Jun 11, 2020 · Jun 5, 2020 · Jun 6, 2020 · Jun 8, 2020
@@ -146,7 +146,9 @@ class ReturnHelper {
 pi_result _pi_mem::addMapping(void *MappedTo, size_t Offset, size_t Size) {
   std::lock_guard<std::mutex> Lock(MappingsMutex);
   auto Res = Mappings.insert({MappedTo, {Offset, Size}});
-  if (Res.second) {
+  // False as the second value in pair means that mapping was not inserted
+  // because mapping already exists.
+  if (!Res.second) {
     zePrint("piEnqueueMemBufferMap: duplicate mapping detected\n");
     return PI_INVALID_VALUE;
   }

@@ -77,6 +77,16 @@ add_lit_target(check-sycl-inline-asm
   DEPENDS ${SYCL_TEST_DEPS}
   )
 
+add_lit_testsuite(check-level0 "Running the SYCL regression tests for Level Zero"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  ARGS ${RT_TEST_ARGS}
+  PARAMS "SYCL_BE=PI_LEVEL0"
+  DEPENDS ${SYCL_TEST_DEPS}
+  EXCLUDE_FROM_CHECK_ALL
+  )
+
+add_dependencies(check-sycl check-level0)
+
 if(SYCL_BUILD_PI_CUDA)
   add_lit_testsuite(check-sycl-cuda "Running the SYCL regression tests for CUDA"
     ${CMAKE_CURRENT_BINARY_DIR}

@@ -2,6 +2,8 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// XFAIL: level0
 
 //==---------- reinterpret.cpp --- SYCL buffer reinterpret basic test ------==//
 //

@@ -5,6 +5,8 @@
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+// XFAIL: level0
 //==------------------- image_accessor_readsampler.cpp ---------------------==//
 //==-----------------image_accessor read API test with sampler--------------==//
 //

@@ -5,6 +5,8 @@
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+// XFAIL: windows && level0
 
 //==--------------------image_accessor_readwrite.cpp ----------------------==//
 //==----------image_accessor read without sampler & write API test---------==//

@@ -5,6 +5,8 @@
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
+//
+// XFAIL: windows && level0
 
 //==--------------------image_accessor_readwrite_half.cpp -------------------==//
 //==-image_accessor read (without sampler)& write API test for half datatype-==//

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Fail is flaky for level0, enable when fixed.
+// UNSUPPORTED: level0
 
 //==--- kernel_info.cpp - SYCL kernel info test ----------------------------==//
 //

@@ -8,7 +8,7 @@
 
 // TODO: Unexpected result
 // TODO: _indexers.cpp:37: int main(): Assertion `id == -1' failed.
-// XFAIL: cuda
+// XFAIL: cuda || level0
 
 #include <CL/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// XFAIL: cuda
+// XFAIL: cuda || level0
 // CUDA exposes broken hierarchical parallelism.
 
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
 // RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER
+//
+// TODO: Behaviour is unstable for level zero on Windows. Enable when fixed.
+// UNSUPPORTED: windows && level0
 
 #include <atomic>
 #include <condition_variable>

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// TODO: Flaky fail on Level Zero that is why mark as unsupported temporarily.
+// UNSUPPORTED: level0
 
 #include <CL/sycl.hpp>
 #include <vector>

@@ -81,6 +81,7 @@
 
 def getDeviceCount(device_type):
     is_cuda = False;
+    is_level0 = False;
     process = subprocess.Popen([get_device_count_by_type_path, device_type, backend],
         stdout=subprocess.PIPE)
     (output, err) = process.communicate()
@@ -100,15 +101,17 @@ def getDeviceCount(device_type):
             TYPE=device_type, BACKEND=backend, OUT=result[0]))
 
     # if we have found gpu and there is additional information, let's check
-    # whether this is CUDA device or not
+    # whether this is CUDA device or Level Zero device or none of these.
     if device_type == "gpu" and value > 0 and len(result[1]):
         if re.match(r".*cuda", result[1]):
             is_cuda = True;
+        if re.match(r".*level zero", result[1]):
+            is_level0 = True;
 
     if err:
         lit_config.warning("getDeviceCount {TYPE} {BACKEND} stderr:{ERR}".format(
             TYPE=device_type, BACKEND=backend, ERR=err))
-    return [value,is_cuda]
+    return [value,is_cuda,is_level0]
 
 # Every SYCL implementation provides a host implementation.
 config.available_features.add('host')
@@ -146,7 +149,8 @@ def getDeviceCount(device_type):
 gpu_check_on_linux_substitute = ""
 
 cuda = False
-[gpu_count, cuda] = getDeviceCount("gpu")
+level0 = False
+[gpu_count, cuda, level0] = getDeviceCount("gpu")
 
 if gpu_count > 0:
     found_at_least_one_device = True
@@ -156,6 +160,8 @@ def getDeviceCount(device_type):
     config.available_features.add('gpu')
     if cuda:
        config.available_features.add('cuda')
+    elif level0:
+       config.available_features.add('level0')
 
     if platform.system() == "Linux":
         gpu_run_on_linux_substitute = "env SYCL_DEVICE_TYPE=GPU SYCL_BE={SYCL_BE} ".format(SYCL_BE=backend)
@@ -181,8 +187,8 @@ def getDeviceCount(device_type):
 config.substitutions.append( ('%ACC_RUN_PLACEHOLDER',  acc_run_substitute) )
 config.substitutions.append( ('%ACC_CHECK_PLACEHOLDER',  acc_check_substitute) )
 
-# PI API either supports OpenCL or CUDA.
-if not cuda and found_at_least_one_device:
+# LIT testing either supports OpenCL or CUDA or Level Zero.
+if not cuda and not level0 and found_at_least_one_device:
     config.available_features.add('opencl')
 
 if cuda:

@@ -22,4 +22,6 @@ int main() {
 }
 
 // TODO: Address a Windows-specific issue with integration header filenames
-// XFAIL: system-windows
+// XFAIL: system-windows && !level0
+// TODO: fail is flaky on Windows for Level Zero. Enable when fixed.
+// UNSUPPORTED: system-windows && level0
@@ -4,9 +4,10 @@
 // RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
 // TODO: For now PI checks are skipped for ACC device. To decide if it's good.
 // RUN: env %ACC_RUN_PLACEHOLDER %t.out
-
-// UNSUPPORTED: cuda
+//
+// UNSUPPORTED: cuda || windows && level0
 // CUDA cannot support OpenCL spec conform images.
+// TODO: test hangs on level0, enable when fixed.
 
 //==-------------- image_access.cpp - SYCL image accessors test  -----------==//
 //

@@ -13,6 +13,8 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// XFAIL: linux && level0
 
 #include <CL/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // UNSUPPORTED: cuda
 // CUDA does not support SPIR-V.
-
+//
 // >> ---- compile src1
 // >> device compilation...
 // RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict

@@ -6,7 +6,7 @@
 // TODO: re-enable after CI drivers are updated to newer which support spec
 // constants:
 // XFAIL: linux && opencl
-// UNSUPPORTED: cuda
+// UNSUPPORTED: cuda || level0
 //
 //==----------- spec_const_hw.cpp ------------------------------------------==//
 //

@@ -6,7 +6,7 @@
 // TODO: re-enable after CI drivers are updated to newer which support spec
 // constants:
 // XFAIL: linux && opencl
-// UNSUPPORTED: cuda
+// UNSUPPORTED: cuda || level0
 //
 //==----------- spec_const_redefine.cpp ------------------------------------==//
 //

@@ -1,4 +1,4 @@
-// XFAIL: cuda
+// XFAIL: cuda || level0
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -6,12 +6,21 @@ add_subdirectory(sycl-ls)
 
 # TODO: move each tool in its own sub-directory
 add_executable(get_device_count_by_type get_device_count_by_type.cpp)
-add_dependencies(get_device_count_by_type ocl-headers ocl-icd)
+add_dependencies(get_device_count_by_type ocl-headers ocl-icd l0-loader)
+
+if(MSVC)
+  set(L0_LIBRARY
+      "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}")
+else()
+  set(L0_LIBRARY
+      "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}")
+endif()
 
 target_link_libraries(get_device_count_by_type
   PRIVATE
     OpenCL::Headers
     ${OpenCL_LIBRARIES}
+    ${L0_LIBRARY}
     $<$<BOOL:${SYCL_BUILD_PI_CUDA}>:cudadrv>
 )
 target_compile_definitions(get_device_count_by_type

@@ -14,6 +14,7 @@
 
 #include <CL/cl.h>
 #include <CL/cl_ext.h>
+#include <level_zero/zet_api.h>
 
 #ifdef USE_PI_CUDA
 #include <cuda.h>
@@ -31,7 +32,7 @@ static const std::string help =
     "   Help\n"
     "   Example: ./get_device_count_by_type cpu opencl\n"
     "   Supported device types: cpu/gpu/accelerator/default/all\n"
-    "   Supported backends: PI_CUDA/PI_OPENCL \n"
+    "   Supported backends: PI_CUDA/PI_OPENCL/PI_LEVEL0 \n"
     "   Output format: <number_of_devices>:<additional_Information>";
 
 // Return the string with all characters translated to lower case.
@@ -113,6 +114,56 @@ static bool queryOpenCL(cl_device_type deviceType, cl_uint &deviceCount,
   return true;
 }
 
+static bool queryLevelZero(cl_device_type deviceType, cl_uint &deviceCount,
+                           std::string &msg) {
+  deviceCount = 0u;
+  ze_result_t zeResult = zeInit(ZE_INIT_FLAG_NONE);
+  if (zeResult != ZE_RESULT_SUCCESS) {
+    msg = "ERROR: Level Zero initialization error";
+    return false;
+  }
+
+  uint32_t zeDriverCount = 0;
+  zeResult = zeDriverGet(&zeDriverCount, nullptr);
+  if (zeResult != ZE_RESULT_SUCCESS) {
+    msg = "ERROR: Level Zero error querying driver count";
+    return false;
+  }
+
+  if (zeDriverCount == 0) {
+    msg = "ERROR: Level Zero no driver found";
+    return false;
+  }
+
+  ze_driver_handle_t zeDriver;
+  zeResult = zeDriverGet(&zeDriverCount, &zeDriver);
+  if (zeResult != ZE_RESULT_SUCCESS) {
+    msg = "ERROR: Level Zero error querying driver";
+    return false;
+  }
+
+  switch (deviceType) {
+  case CL_DEVICE_TYPE_DEFAULT: // Fall through.
+  case CL_DEVICE_TYPE_ALL:     // Fall through.
+  case CL_DEVICE_TYPE_GPU: {
+    uint32_t zeDeviceCount = 0;
+    zeResult = zeDeviceGet(zeDriver, &zeDeviceCount, nullptr);
+    if (zeResult != ZE_RESULT_SUCCESS) {
+      msg = "ERROR: Level Zero error querying device count";
+      return false;
+    }
+    deviceCount = static_cast<cl_uint>(zeDeviceCount);
+    msg = "level zero ";
+    msg += deviceTypeToString(deviceType);
+    return true;
+  } break;
+  default:
+    msg = "WARNING: Level Zero unsupported device type ";
+    msg += deviceTypeToString(deviceType);
+    return true;
+  }
+}
+
 static bool queryCUDA(cl_device_type deviceType, cl_uint &deviceCount,
                       std::string &msg) {
   deviceCount = 0u;
@@ -208,6 +259,8 @@ int main(int argc, char *argv[]) {
 
   if (backend == "opencl" || backend == "pi_opencl") {
     querySuccess = queryOpenCL(deviceType, deviceCount, msg);
+  } else if (backend == "level0" || backend == "pi_level0") {
+    querySuccess = queryLevelZero(deviceType, deviceCount, msg);
   } else if (backend == "cuda" || backend == "pi_cuda") {
     querySuccess = queryCUDA(deviceType, deviceCount, msg);
   } else {