Skip to content

Commit b903854

Browse files
authored
[SYCL][CUDA] Updated documentation for CUDA backend (#2042)
Minor udpates to the CUDA backend documentation and starting guide. Signed-off-by: Ruyman Reyes <[email protected]>
1 parent f3b8cdf commit b903854

File tree

2 files changed

+35
-10
lines changed

2 files changed

+35
-10
lines changed

sycl/doc/GetStartedGuide.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ the system, refer to
129129

130130
Currently, the only combination tested is Ubuntu 18.04 with CUDA 10.2 using
131131
a Titan RTX GPU (SM 71), but it should work on any GPU compatible with SM 50 or
132-
above.
132+
above. The default SM for the NVIDIA CUDA backend is 5.0. Users can specify
133+
lower values, but some features may not be supported.
133134

134135
### Deployment
135136

@@ -513,11 +514,10 @@ class CUDASelector : public cl::sycl::device_selector {
513514
public:
514515
int operator()(const cl::sycl::device &Device) const override {
515516
using namespace cl::sycl::info;
517+
const std::string DriverVersion = Device.get_info<device::driver_version>();
516518
517-
const std::string DeviceName = Device.get_info<device::name>();
518-
const std::string DeviceVendor = Device.get_info<device::vendor>();
519-
520-
if (Device.is_gpu() && (DeviceName.find("NVIDIA") != std::string::npos)) {
519+
if (Device.is_gpu() && (DriverVersion.find("CUDA") != std::string::npos)) {
520+
std::cout << " CUDA device found " << std::endl;
521521
return 1;
522522
};
523523
return -1;

sycl/plugins/cuda/pi_cuda.hpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,28 +176,47 @@ struct _pi_context {
176176
std::vector<deleter_data> extended_deleters_;
177177
};
178178

179-
/// PI Mem mapping to a CUDA memory allocation
180-
///
179+
/// PI Mem mapping to CUDA memory allocations, both data and texture/surface.
180+
/// \brief Represents non-SVM allocations on the CUDA backend.
181+
/// Keeps tracks of all mapped regions used for Map/Unmap calls.
182+
/// Only one region can be active at the same time per allocation.
181183
struct _pi_mem {
182184

183185
// TODO: Move as much shared data up as possible
184186
using pi_context = _pi_context *;
185187

188+
// Context where the memory object is accessibles
186189
pi_context context_;
190+
191+
/// Reference counting of the handler
187192
std::atomic_uint32_t refCount_;
188193
enum class mem_type { buffer, surface } mem_type_;
189194

195+
/// A PI Memory object represents either plain memory allocations ("Buffers"
196+
/// in OpenCL) or typed allocations ("Images" in OpenCL).
197+
/// In CUDA their API handlers are different. Whereas "Buffers" are allocated
198+
/// as pointer-like structs, "Images" are stored in Textures or Surfaces
199+
/// This union allows implementation to use either from the same handler.
190200
union mem_ {
201+
// Handler for plain, pointer-based CUDA allocations
191202
struct buffer_mem_ {
192203
using native_type = CUdeviceptr;
193204

205+
// If this allocation is a sub-buffer (i.e., a view on an existing
206+
// allocation), this is the pointer to the parent handler structure
194207
pi_mem parent_;
208+
// CUDA handler for the pointer
195209
native_type ptr_;
210+
211+
/// Pointer associated with this device on the host
196212
void *hostPtr_;
213+
/// Size of the allocation in bytes
197214
size_t size_;
198-
215+
/// Offset of the active mapped region.
199216
size_t mapOffset_;
217+
/// Pointer to the active mapped region, if any
200218
void *mapPtr_;
219+
/// Original flags for the mapped region
201220
cl_map_flags mapFlags_;
202221

203222
/** alloc_mode
@@ -222,6 +241,10 @@ struct _pi_mem {
222241

223242
size_t get_map_offset(void *ptr) const noexcept { return mapOffset_; }
224243

244+
/// Returns a pointer to data visible on the host that contains
245+
/// the data on the device associated with this allocation.
246+
/// The offset is used to index into the CUDA allocation.
247+
///
225248
void *map_to_ptr(size_t offset, cl_map_flags flags) noexcept {
226249
assert(mapPtr_ == nullptr);
227250
mapOffset_ = offset;
@@ -235,6 +258,7 @@ struct _pi_mem {
235258
return mapPtr_;
236259
}
237260

261+
/// Detach the allocation from the host memory.
238262
void unmap(void *ptr) noexcept {
239263
assert(mapPtr_ != nullptr);
240264

@@ -251,6 +275,7 @@ struct _pi_mem {
251275
}
252276
} buffer_mem_;
253277

278+
// Handler data for surface object (i.e. Images)
254279
struct surface_mem_ {
255280
CUarray array_;
256281
CUsurfObject surfObj_;
@@ -264,7 +289,7 @@ struct _pi_mem {
264289
} surface_mem_;
265290
} mem_;
266291

267-
// Buffer constructor
292+
/// Constructs the PI MEM handler for a non-typed allocation ("buffer")
268293
_pi_mem(pi_context ctxt, pi_mem parent, mem_::buffer_mem_::alloc_mode mode,
269294
CUdeviceptr ptr, void *host_ptr, size_t size)
270295
: context_{ctxt}, refCount_{1}, mem_type_{mem_type::buffer} {
@@ -283,7 +308,7 @@ struct _pi_mem {
283308
}
284309
};
285310

286-
// Surface constructor
311+
/// Constructs the PI allocation for an Image object (surface in CUDA)
287312
_pi_mem(pi_context ctxt, CUarray array, CUsurfObject surf,
288313
pi_mem_type image_type, void *host_ptr)
289314
: context_{ctxt}, refCount_{1}, mem_type_{mem_type::surface} {

0 commit comments

Comments
 (0)