@@ -176,28 +176,47 @@ struct _pi_context {
176
176
std::vector<deleter_data> extended_deleters_;
177
177
};
178
178
179
- // / PI Mem mapping to a CUDA memory allocation
180
- // /
179
+ // / PI Mem mapping to CUDA memory allocations, both data and texture/surface.
180
+ // / \brief Represents non-SVM allocations on the CUDA backend.
181
+ // / Keeps tracks of all mapped regions used for Map/Unmap calls.
182
+ // / Only one region can be active at the same time per allocation.
181
183
struct _pi_mem {
182
184
183
185
// TODO: Move as much shared data up as possible
184
186
using pi_context = _pi_context *;
185
187
188
+ // Context where the memory object is accessibles
186
189
pi_context context_;
190
+
191
+ // / Reference counting of the handler
187
192
std::atomic_uint32_t refCount_;
188
193
enum class mem_type { buffer, surface } mem_type_;
189
194
195
+ // / A PI Memory object represents either plain memory allocations ("Buffers"
196
+ // / in OpenCL) or typed allocations ("Images" in OpenCL).
197
+ // / In CUDA their API handlers are different. Whereas "Buffers" are allocated
198
+ // / as pointer-like structs, "Images" are stored in Textures or Surfaces
199
+ // / This union allows implementation to use either from the same handler.
190
200
union mem_ {
201
+ // Handler for plain, pointer-based CUDA allocations
191
202
struct buffer_mem_ {
192
203
using native_type = CUdeviceptr;
193
204
205
+ // If this allocation is a sub-buffer (i.e., a view on an existing
206
+ // allocation), this is the pointer to the parent handler structure
194
207
pi_mem parent_;
208
+ // CUDA handler for the pointer
195
209
native_type ptr_;
210
+
211
+ // / Pointer associated with this device on the host
196
212
void *hostPtr_;
213
+ // / Size of the allocation in bytes
197
214
size_t size_;
198
-
215
+ // / Offset of the active mapped region.
199
216
size_t mapOffset_;
217
+ // / Pointer to the active mapped region, if any
200
218
void *mapPtr_;
219
+ // / Original flags for the mapped region
201
220
cl_map_flags mapFlags_;
202
221
203
222
/* * alloc_mode
@@ -222,6 +241,10 @@ struct _pi_mem {
222
241
223
242
size_t get_map_offset (void *ptr) const noexcept { return mapOffset_; }
224
243
244
+ // / Returns a pointer to data visible on the host that contains
245
+ // / the data on the device associated with this allocation.
246
+ // / The offset is used to index into the CUDA allocation.
247
+ // /
225
248
void *map_to_ptr (size_t offset, cl_map_flags flags) noexcept {
226
249
assert (mapPtr_ == nullptr );
227
250
mapOffset_ = offset;
@@ -235,6 +258,7 @@ struct _pi_mem {
235
258
return mapPtr_;
236
259
}
237
260
261
+ // / Detach the allocation from the host memory.
238
262
void unmap (void *ptr) noexcept {
239
263
assert (mapPtr_ != nullptr );
240
264
@@ -251,6 +275,7 @@ struct _pi_mem {
251
275
}
252
276
} buffer_mem_;
253
277
278
+ // Handler data for surface object (i.e. Images)
254
279
struct surface_mem_ {
255
280
CUarray array_;
256
281
CUsurfObject surfObj_;
@@ -264,7 +289,7 @@ struct _pi_mem {
264
289
} surface_mem_;
265
290
} mem_;
266
291
267
- // Buffer constructor
292
+ // / Constructs the PI MEM handler for a non-typed allocation ("buffer")
268
293
_pi_mem (pi_context ctxt, pi_mem parent, mem_::buffer_mem_::alloc_mode mode,
269
294
CUdeviceptr ptr, void *host_ptr, size_t size)
270
295
: context_{ctxt}, refCount_{1 }, mem_type_{mem_type::buffer} {
@@ -283,7 +308,7 @@ struct _pi_mem {
283
308
}
284
309
};
285
310
286
- // Surface constructor
311
+ // / Constructs the PI allocation for an Image object (surface in CUDA)
287
312
_pi_mem (pi_context ctxt, CUarray array, CUsurfObject surf,
288
313
pi_mem_type image_type, void *host_ptr)
289
314
: context_{ctxt}, refCount_{1 }, mem_type_{mem_type::surface} {
0 commit comments