[SYCL][CUDA] Add cuda-experimental context and device test (intel#1041)

AidanBeltonS · bb-sycl · commit 22ba7ccb4875 · 2022-06-20T01:46:49.000-07:00
This PR add testing of the CUDA backend experimental interop functionality. It checks that valid types are returned and that the created SYCL objects can be used to do work. Depends on: intel/llvm#6202
diff --git a/SYCL/Plugin/interop-cuda-experimental.cpp b/SYCL/Plugin/interop-cuda-experimental.cpp
@@ -0,0 +1,98 @@
+// REQUIRES: cuda
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -lcuda %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1
+#include <sycl/ext/oneapi/experimental/backend/cuda.hpp>
+#include <sycl/sycl.hpp>
+
+#include <cuda.h>
+
+#include <assert.h>
+
+void cuda_check(CUresult error) { assert(error == CUDA_SUCCESS); }
+
+template <typename refT, typename T> void check_type(T var) {
+  static_assert(std::is_same_v<decltype(var), refT>);
+}
+
+#define CUDA_CHECK(error) cuda_check(error)
+
+bool check_queue(sycl::queue &Q) {
+  constexpr size_t vec_size = 5;
+  double A_Data[vec_size] = {4.0};
+  double B_Data[vec_size] = {-3.0};
+  double C_Data[vec_size] = {0.0};
+
+  sycl::buffer<double, 1> A_buff(A_Data, sycl::range<1>(vec_size));
+  sycl::buffer<double, 1> B_buff(B_Data, sycl::range<1>(vec_size));
+  sycl::buffer<double, 1> C_buff(C_Data, sycl::range<1>(vec_size));
+
+  Q.submit([&](sycl::handler &cgh) {
+     auto A_acc = A_buff.get_access<sycl::access::mode::read>(cgh);
+     auto B_acc = B_buff.get_access<sycl::access::mode::read>(cgh);
+     auto C_acc = C_buff.get_access<sycl::access::mode::write>(cgh);
+     cgh.parallel_for(sycl::range<1>{vec_size}, [=](sycl::id<1> idx) {
+       C_acc[idx] = A_acc[idx] + B_acc[idx];
+     });
+   }).wait();
+
+  sycl::host_accessor C_acc(C_buff);
+  return C_acc[0] == 1;
+}
+
+int main() {
+  sycl::queue Q;
+
+  CUcontext Q_cu_ctx;
+  auto native_queue = sycl::get_native<sycl::backend::ext_oneapi_cuda>(Q);
+  check_type<CUstream>(native_queue);
+  CUDA_CHECK(cuStreamGetCtx(native_queue, &Q_cu_ctx));
+  auto Q_sycl_ctx =
+      sycl::make_context<sycl::backend::ext_oneapi_cuda>(Q_cu_ctx);
+
+  // Create sycl queue with queue construct from Q's native types and submit
+  // some work
+  {
+    sycl::queue new_Q(Q_sycl_ctx, sycl::default_selector());
+    assert(check_queue(new_Q));
+  }
+
+  // Check Q still works
+  assert(check_queue(Q));
+
+  // Get native cuda device
+  CUdevice cu_dev;
+  CUDA_CHECK(cuDeviceGet(&cu_dev, 0));
+  auto sycl_dev = sycl::make_device<sycl::backend::ext_oneapi_cuda>(cu_dev);
+  auto native_dev = sycl::get_native<sycl::backend::ext_oneapi_cuda>(sycl_dev);
+
+  check_type<sycl::device>(sycl_dev);
+  check_type<CUdevice>(native_dev);
+  assert(native_dev == cu_dev);
+
+  // Create sycl queue with new device and submit some work
+  {
+    sycl::queue new_Q(sycl_dev);
+    assert(check_queue(new_Q));
+  }
+
+  // Create new context
+  CUcontext curr_ctx, cu_ctx;
+  CUDA_CHECK(cuCtxGetCurrent(&curr_ctx));
+  CUDA_CHECK(cuCtxCreate(&cu_ctx, CU_CTX_MAP_HOST, cu_dev));
+  CUDA_CHECK(cuCtxSetCurrent(curr_ctx));
+
+  auto sycl_ctx = sycl::make_context<sycl::backend::ext_oneapi_cuda>(cu_ctx);
+  auto native_ctx = sycl::get_native<sycl::backend::ext_oneapi_cuda>(sycl_ctx);
+
+  check_type<sycl::context>(sycl_ctx);
+  check_type<std::vector<CUcontext>>(native_ctx);
+
+  // Create sycl queue with new queue and submit some work
+  {
+    sycl::queue new_Q(sycl_ctx, sycl::default_selector());
+    assert(check_queue(new_Q));
+  }
+}