|
| 1 | +// REQUIRES: cuda |
| 2 | + |
| 3 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -lcuda %s -o %t.out |
| 4 | +// RUN: %GPU_RUN_PLACEHOLDER %t.out |
| 5 | + |
| 6 | +#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 |
| 7 | +#include <sycl/ext/oneapi/experimental/backend/cuda.hpp> |
| 8 | +#include <sycl/sycl.hpp> |
| 9 | + |
| 10 | +#include <cuda.h> |
| 11 | + |
| 12 | +#include <assert.h> |
| 13 | + |
| 14 | +void cuda_check(CUresult error) { assert(error == CUDA_SUCCESS); } |
| 15 | + |
| 16 | +template <typename refT, typename T> void check_type(T var) { |
| 17 | + static_assert(std::is_same_v<decltype(var), refT>); |
| 18 | +} |
| 19 | + |
| 20 | +#define CUDA_CHECK(error) cuda_check(error) |
| 21 | + |
| 22 | +bool check_queue(sycl::queue &Q) { |
| 23 | + constexpr size_t vec_size = 5; |
| 24 | + double A_Data[vec_size] = {4.0}; |
| 25 | + double B_Data[vec_size] = {-3.0}; |
| 26 | + double C_Data[vec_size] = {0.0}; |
| 27 | + |
| 28 | + sycl::buffer<double, 1> A_buff(A_Data, sycl::range<1>(vec_size)); |
| 29 | + sycl::buffer<double, 1> B_buff(B_Data, sycl::range<1>(vec_size)); |
| 30 | + sycl::buffer<double, 1> C_buff(C_Data, sycl::range<1>(vec_size)); |
| 31 | + |
| 32 | + Q.submit([&](sycl::handler &cgh) { |
| 33 | + auto A_acc = A_buff.get_access<sycl::access::mode::read>(cgh); |
| 34 | + auto B_acc = B_buff.get_access<sycl::access::mode::read>(cgh); |
| 35 | + auto C_acc = C_buff.get_access<sycl::access::mode::write>(cgh); |
| 36 | + cgh.parallel_for(sycl::range<1>{vec_size}, [=](sycl::id<1> idx) { |
| 37 | + C_acc[idx] = A_acc[idx] + B_acc[idx]; |
| 38 | + }); |
| 39 | + }).wait(); |
| 40 | + |
| 41 | + sycl::host_accessor C_acc(C_buff); |
| 42 | + return C_acc[0] == 1; |
| 43 | +} |
| 44 | + |
| 45 | +int main() { |
| 46 | + sycl::queue Q; |
| 47 | + |
| 48 | + CUcontext Q_cu_ctx; |
| 49 | + auto native_queue = sycl::get_native<sycl::backend::ext_oneapi_cuda>(Q); |
| 50 | + check_type<CUstream>(native_queue); |
| 51 | + CUDA_CHECK(cuStreamGetCtx(native_queue, &Q_cu_ctx)); |
| 52 | + auto Q_sycl_ctx = |
| 53 | + sycl::make_context<sycl::backend::ext_oneapi_cuda>(Q_cu_ctx); |
| 54 | + |
| 55 | + // Create sycl queue with queue construct from Q's native types and submit |
| 56 | + // some work |
| 57 | + { |
| 58 | + sycl::queue new_Q(Q_sycl_ctx, sycl::default_selector()); |
| 59 | + assert(check_queue(new_Q)); |
| 60 | + } |
| 61 | + |
| 62 | + // Check Q still works |
| 63 | + assert(check_queue(Q)); |
| 64 | + |
| 65 | + // Get native cuda device |
| 66 | + CUdevice cu_dev; |
| 67 | + CUDA_CHECK(cuDeviceGet(&cu_dev, 0)); |
| 68 | + auto sycl_dev = sycl::make_device<sycl::backend::ext_oneapi_cuda>(cu_dev); |
| 69 | + auto native_dev = sycl::get_native<sycl::backend::ext_oneapi_cuda>(sycl_dev); |
| 70 | + |
| 71 | + check_type<sycl::device>(sycl_dev); |
| 72 | + check_type<CUdevice>(native_dev); |
| 73 | + assert(native_dev == cu_dev); |
| 74 | + |
| 75 | + // Create sycl queue with new device and submit some work |
| 76 | + { |
| 77 | + sycl::queue new_Q(sycl_dev); |
| 78 | + assert(check_queue(new_Q)); |
| 79 | + } |
| 80 | + |
| 81 | + // Create new context |
| 82 | + CUcontext curr_ctx, cu_ctx; |
| 83 | + CUDA_CHECK(cuCtxGetCurrent(&curr_ctx)); |
| 84 | + CUDA_CHECK(cuCtxCreate(&cu_ctx, CU_CTX_MAP_HOST, cu_dev)); |
| 85 | + CUDA_CHECK(cuCtxSetCurrent(curr_ctx)); |
| 86 | + |
| 87 | + auto sycl_ctx = sycl::make_context<sycl::backend::ext_oneapi_cuda>(cu_ctx); |
| 88 | + auto native_ctx = sycl::get_native<sycl::backend::ext_oneapi_cuda>(sycl_ctx); |
| 89 | + |
| 90 | + check_type<sycl::context>(sycl_ctx); |
| 91 | + check_type<std::vector<CUcontext>>(native_ctx); |
| 92 | + |
| 93 | + // Create sycl queue with new queue and submit some work |
| 94 | + { |
| 95 | + sycl::queue new_Q(sycl_ctx, sycl::default_selector()); |
| 96 | + assert(check_queue(new_Q)); |
| 97 | + } |
| 98 | +} |
0 commit comments