Skip to content

Commit d40405d

Browse files
authored
Merge pull request #2682 from Xewar313/add-append-usm-memory-v2-commandbuffer
Add USMMemcpy to v2 command buffer
2 parents 9ffb5ff + a828309 commit d40405d

File tree

5 files changed

+59
-26
lines changed

5 files changed

+59
-26
lines changed

source/adapters/level_zero/v2/api.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -239,17 +239,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
239239
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
240240
}
241241

242-
ur_result_t urCommandBufferAppendUSMMemcpyExp(
243-
ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
244-
size_t size, uint32_t numSyncPointsInWaitList,
245-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
246-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
247-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
248-
ur_exp_command_buffer_command_handle_t *phCommand) {
249-
logger::error("{} function not implemented!", __FUNCTION__);
250-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
251-
}
252-
253242
ur_result_t urCommandBufferAppendUSMFillExp(
254243
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
255244
const void *pPattern, size_t patternSize, size_t size,

source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,33 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
138138
return exceptionToResult(std::current_exception());
139139
}
140140

141+
ur_result_t urCommandBufferAppendUSMMemcpyExp(
142+
ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
143+
size_t size, uint32_t numSyncPointsInWaitList,
144+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
145+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
146+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
147+
ur_exp_command_buffer_command_handle_t *phCommand) try {
148+
149+
// the same issue as in urCommandBufferAppendKernelLaunchExp
150+
std::ignore = numEventsInWaitList;
151+
std::ignore = phEventWaitList;
152+
std::ignore = phEvent;
153+
// sync mechanic can be ignored, because all lists are in-order
154+
std::ignore = numSyncPointsInWaitList;
155+
std::ignore = pSyncPointWaitList;
156+
std::ignore = pSyncPoint;
157+
158+
std::ignore = phCommand;
159+
// Responsibility of UMD to offload to copy engine
160+
UR_CALL(hCommandBuffer->commandListManager.appendUSMMemcpy(
161+
false, pDst, pSrc, size, 0, nullptr, nullptr));
162+
163+
return UR_RESULT_SUCCESS;
164+
} catch (...) {
165+
return exceptionToResult(std::current_exception());
166+
}
167+
141168
ur_result_t
142169
urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
143170
ur_exp_command_buffer_info_t propName,

source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,30 @@ ur_result_t ur_command_list_manager::appendKernelLaunch(
102102
return UR_RESULT_SUCCESS;
103103
}
104104

105+
ur_result_t ur_command_list_manager::appendUSMMemcpy(
106+
bool blocking, void *pDst, const void *pSrc, size_t size,
107+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
108+
ur_event_handle_t *phEvent) {
109+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy");
110+
111+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
112+
113+
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY);
114+
115+
auto [pWaitEvents, numWaitEvents] =
116+
getWaitListView(phEventWaitList, numEventsInWaitList);
117+
118+
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
119+
(zeCommandList.get(), pDst, pSrc, size, zeSignalEvent,
120+
numWaitEvents, pWaitEvents));
121+
122+
if (blocking) {
123+
ZE2UR_CALL(zeCommandListHostSynchronize, (zeCommandList.get(), UINT64_MAX));
124+
}
125+
126+
return UR_RESULT_SUCCESS;
127+
}
128+
105129
ze_command_list_handle_t ur_command_list_manager::getZeCommandList() {
106130
return zeCommandList.get();
107131
}

source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ struct ur_command_list_manager : public _ur_object {
4747
const ur_event_handle_t *phEventWaitList,
4848
ur_event_handle_t *phEvent);
4949

50+
ur_result_t appendUSMMemcpy(bool blocking, void *pDst, const void *pSrc,
51+
size_t size, uint32_t numEventsInWaitList,
52+
const ur_event_handle_t *phEventWaitList,
53+
ur_event_handle_t *phEvent);
54+
5055
ze_command_list_handle_t getZeCommandList();
5156

5257
wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents,

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -698,21 +698,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy(
698698
// TODO: parametrize latency tracking with 'blocking'
699699
TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy");
700700

701-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
702-
703-
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY);
704-
705-
auto [pWaitEvents, numWaitEvents] =
706-
getWaitListView(phEventWaitList, numEventsInWaitList);
707-
708-
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
709-
(commandListManager.getZeCommandList(), pDst, pSrc, size,
710-
zeSignalEvent, numWaitEvents, pWaitEvents));
711-
712-
if (blocking) {
713-
ZE2UR_CALL(zeCommandListHostSynchronize,
714-
(commandListManager.getZeCommandList(), UINT64_MAX));
715-
}
701+
UR_CALL(commandListManager.appendUSMMemcpy(blocking, pDst, pSrc, size,
702+
numEventsInWaitList,
703+
phEventWaitList, phEvent));
716704

717705
return UR_RESULT_SUCCESS;
718706
}

0 commit comments

Comments
 (0)