Skip to content

Commit 49b6223

Browse files
authored
[SYCL][CUDA] Implement COPY_HOST_PTR (#1863)
When a PI Mem object is created with the PI_MEM_FLAGS_HOST_PTR_COPY, the CUDA backend now does not use the input pointer for Map/Unmap, but allocates internal memory for it. Fixes #1462 Signed-off-by: Ruyman Reyes <[email protected]>
1 parent 62e2f3b commit 49b6223

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,9 @@ pi_result cuda_piMemBufferCreate(pi_context context, pi_mem_flags flags,
15131513
allocMode = _pi_mem::alloc_mode::use_host_ptr;
15141514
} else {
15151515
retErr = PI_CHECK_ERROR(cuMemAlloc(&ptr, size));
1516+
if (flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
1517+
allocMode = _pi_mem::alloc_mode::copy_in;
1518+
}
15161519
}
15171520

15181521
if (retErr == PI_SUCCESS) {
@@ -1572,6 +1575,7 @@ pi_result cuda_piMemRelease(pi_mem memObj) {
15721575
ScopedContext active(uniqueMemObj->get_context());
15731576

15741577
switch (uniqueMemObj->allocMode_) {
1578+
case _pi_mem::alloc_mode::copy_in:
15751579
case _pi_mem::alloc_mode::classic:
15761580
ret = PI_CHECK_ERROR(cuMemFree(uniqueMemObj->ptr_));
15771581
break;

sycl/plugins/cuda/pi_cuda.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,13 @@ struct _pi_mem {
192192
void *mapPtr_;
193193
cl_map_flags mapFlags_;
194194
std::atomic_uint32_t refCount_;
195-
enum class alloc_mode { classic, use_host_ptr } allocMode_;
195+
/** alloc_mode
196+
* classic: Just a normal buffer allocated on the device via cuda malloc
197+
* use_host_ptr: Use an address on the host for the device
198+
* copy_in: The data for the device comes from the host but the host pointer
199+
is not available later for re-use
200+
*/
201+
enum class alloc_mode { classic, use_host_ptr, copy_in } allocMode_;
196202

197203
_pi_mem(pi_context ctxt, pi_mem parent, alloc_mode mode, CUdeviceptr ptr, void *host_ptr,
198204
size_t size)
@@ -240,7 +246,7 @@ struct _pi_mem {
240246
assert(mapPtr_ == nullptr);
241247
mapOffset_ = offset;
242248
mapFlags_ = flags;
243-
if (hostPtr_) {
249+
if (hostPtr_ && (allocMode_ != alloc_mode::copy_in)) {
244250
mapPtr_ = static_cast<char *>(hostPtr_) + offset;
245251
} else {
246252
// TODO: Allocate only what is needed based on the offset

0 commit comments

Comments
 (0)