Skip to content

[SYCL] Copy to/from host-allocated buffer can use host memcpy. #2679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 63 additions & 27 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ static pi_result mapError(ze_result_t ZeResult) {
static pi_result
enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
pi_bool BlockingWrite, size_t Size, const void *Src,
pi_uint32 NumEventsInWaitList,
bool HostCopy, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event);

static pi_result enqueueMemCopyRectHelper(
Expand Down Expand Up @@ -3701,6 +3701,21 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
return PI_SUCCESS;
}

static bool piHostCopyablePtr(pi_queue Queue, const void *Ptr) {
ze_device_handle_t ZeDeviceHandle;
ze_memory_allocation_properties_t ZeMemoryAllocationProperties = {};

ZE_CALL(zeMemGetAllocProperties(Queue->Context->ZeContext, Ptr,
&ZeMemoryAllocationProperties,
&ZeDeviceHandle));

return ZeMemoryAllocationProperties.type != ZE_MEMORY_TYPE_DEVICE;
}

static bool piHostCopyableMem(pi_queue Queue, pi_mem Mem) {
return piHostCopyablePtr(Queue, pi_cast<const void *>(Mem->getZeHandle()));
}

pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src,
pi_bool BlockingRead, size_t Offset,
size_t Size, void *Dst,
Expand All @@ -3710,13 +3725,12 @@ pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src,
assert(Src);
assert(Queue);

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

return enqueueMemCopyHelper(PI_COMMAND_TYPE_MEM_BUFFER_READ, Queue, Dst,
BlockingRead, Size,
pi_cast<char *>(Src->getZeHandle()) + Offset,
NumEventsInWaitList, EventWaitList, Event);
return enqueueMemCopyHelper(
PI_COMMAND_TYPE_MEM_BUFFER_READ, Queue, Dst, BlockingRead, Size,
pi_cast<char *>(Src->getZeHandle()) + Offset,
piHostCopyableMem(Queue, Src) &&
piHostCopyablePtr(Queue, Dst), // Whether memcpy on host can be used
NumEventsInWaitList, EventWaitList, Event);
}

pi_result piEnqueueMemBufferReadRect(
Expand Down Expand Up @@ -3747,15 +3761,12 @@ pi_result piEnqueueMemBufferReadRect(
static pi_result
enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
pi_bool BlockingWrite, size_t Size, const void *Src,
pi_uint32 NumEventsInWaitList,
bool HostCopy, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {

// Get a new command list to be used on this call
ze_command_list_handle_t ZeCommandList = nullptr;
ze_fence_handle_t ZeFence = nullptr;
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
&ZeFence))
return Res;

ze_event_handle_t ZeEvent = nullptr;
if (Event) {
Expand All @@ -3770,6 +3781,31 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst,
ZeEvent = (*Event)->ZeEvent;
}

// On integrated devices the buffer has been allocated in host memory.
if (HostCopy) {
// Wait on incoming events before doing the copy
piEventsWait(NumEventsInWaitList, EventWaitList);
memcpy(Dst, Src, Size);

// Signal this event, if it is requested
if (Event) {
ZE_CALL(zeEventHostSignal(ZeEvent));

return PI_SUCCESS;
}
}

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
&ZeFence))
return Res;

if (Event) {
(*Event)->ZeCommandList = ZeCommandList;
}

ze_event_handle_t *ZeEventWaitList =
_pi_event::createZeEventList(NumEventsInWaitList, EventWaitList);
if (!ZeEventWaitList)
Expand Down Expand Up @@ -3911,15 +3947,14 @@ pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer,
assert(Buffer);
assert(Queue);

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

return enqueueMemCopyHelper(PI_COMMAND_TYPE_MEM_BUFFER_WRITE, Queue,
pi_cast<char *>(Buffer->getZeHandle()) +
Offset, // dst
BlockingWrite, Size,
Ptr, // src
NumEventsInWaitList, EventWaitList, Event);
return enqueueMemCopyHelper(
PI_COMMAND_TYPE_MEM_BUFFER_WRITE, Queue,
pi_cast<char *>(Buffer->getZeHandle()) + Offset, // dst
BlockingWrite, Size,
Ptr, // src
piHostCopyableMem(Queue, Buffer) &&
piHostCopyablePtr(Queue, Ptr), // Whether memcpy on host can be used
NumEventsInWaitList, EventWaitList, Event);
}

pi_result piEnqueueMemBufferWriteRect(
Expand Down Expand Up @@ -3954,14 +3989,14 @@ pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcBuffer,
assert(DstBuffer);
assert(Queue);

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

return enqueueMemCopyHelper(
PI_COMMAND_TYPE_MEM_BUFFER_COPY, Queue,
pi_cast<char *>(DstBuffer->getZeHandle()) + DstOffset,
false, // blocking
Size, pi_cast<char *>(SrcBuffer->getZeHandle()) + SrcOffset,
piHostCopyableMem(Queue, SrcBuffer) &&
piHostCopyableMem(Queue,
DstBuffer), // Whether memcpy on host can be used
NumEventsInWaitList, EventWaitList, Event);
}

Expand Down Expand Up @@ -4856,12 +4891,13 @@ pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr,

assert(Queue);

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

return enqueueMemCopyHelper(
// TODO: do we need a new command type for this?
// Currently we use host memcpy so probably not.
PI_COMMAND_TYPE_MEM_BUFFER_COPY, Queue, DstPtr, Blocking, Size, SrcPtr,
piHostCopyablePtr(Queue, DstPtr) &&
piHostCopyablePtr(Queue,
SrcPtr), // Use host mempcy
NumEventsInWaitlist, EventsWaitlist, Event);
}

Expand Down