Skip to content

Commit bd25d68

Browse files
mfrancepilloiskbenzie
authored andcommitted
Changes USMShared memory allocation for host only allocation
1 parent e8b7840 commit bd25d68

File tree

2 files changed

+23
-32
lines changed

2 files changed

+23
-32
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -939,51 +939,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
939939
// Create a command-list to signal RetEvent on completion
940940
ur_command_list_ptr_t SignalCommandList{};
941941
if (Event) {
942+
ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent;
942943
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
943944
false, false));
944945

945946
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
946947
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
947948
SignalCommandList, false));
948949

949-
ZE2UR_CALL(zeCommandListAppendBarrier,
950-
(SignalCommandList->first, RetEvent->ZeEvent, 1,
951-
&(CommandBuffer->SignalEvent->ZeEvent)));
952-
953950
if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
954951
// Multiple submissions of a command buffer implies that we need to save
955952
// the event timestamps before resubmiting the command buffer. We
956953
// therefore copy the these timestamps in a dedicated USM memory section
957954
// before completing the command buffer execution, and then attach this
958955
// memory to the event returned to users to allow to allow the profiling
959956
// engine to recover these timestamps.
960-
ur_usm_desc_t USMDesc{};
961-
ur_usm_device_desc_t UsmDeviceDesc{};
962-
UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC;
963-
ur_usm_host_desc_t UsmHostDesc{};
964-
UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC;
965-
UsmDeviceDesc.pNext = &UsmHostDesc;
966-
USMDesc.pNext = &UsmDeviceDesc;
967-
USMDesc.align = 4; // 4byte-aligned
968-
969-
size_t Size = WaitEventList.size() * sizeof(ze_kernel_timestamp_result_t);
957+
UR_CALL(createEventAndAssociateQueue(
958+
Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false));
970959

971-
struct command_buffer_profiling_t *Profiling =
972-
new command_buffer_profiling_t();
960+
command_buffer_profiling_t *Profiling = new command_buffer_profiling_t();
973961

974962
Profiling->NumEvents = WaitEventList.size();
975-
976-
urUSMSharedAlloc(RetEvent->Context, CommandBuffer->Device, &USMDesc,
977-
nullptr, Size, (void **)&Profiling->Timestamps);
963+
Profiling->Timestamps =
964+
new ze_kernel_timestamp_result_t[Profiling->NumEvents];
978965

979966
ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps,
980967
(SignalCommandList->first, WaitEventList.size(),
981-
WaitEventList.data(), Profiling->Timestamps, 0,
982-
RetEvent->ZeEvent, 1,
968+
WaitEventList.data(), (void *)Profiling->Timestamps, 0,
969+
SyncEvent->ZeEvent, 1,
983970
&(CommandBuffer->SignalEvent->ZeEvent)));
984971

985972
RetEvent->CommandData = static_cast<void *>(Profiling);
986973
}
974+
975+
ZE2UR_CALL(zeCommandListAppendBarrier,
976+
(SignalCommandList->first, RetEvent->ZeEvent, 1,
977+
&(SyncEvent->ZeEvent)));
987978
}
988979

989980
// Execution our command-lists asynchronously

source/adapters/level_zero/event.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
480480
// from this memory.
481481
if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) {
482482
if (Event->CommandData) {
483-
struct command_buffer_profiling_t *ProfilingsPtr;
483+
command_buffer_profiling_t *ProfilingsPtr;
484484
switch (PropName) {
485485
case UR_PROFILING_INFO_COMMAND_START: {
486-
ProfilingsPtr = static_cast<struct command_buffer_profiling_t *>(
487-
Event->CommandData);
486+
ProfilingsPtr =
487+
static_cast<command_buffer_profiling_t *>(Event->CommandData);
488488
// Sync-point order does not necessarily match to the order of
489489
// execution. We therefore look for the first command executed.
490-
uint64_t MinStart = ProfilingsPtr->Timestamps->global.kernelStart;
490+
uint64_t MinStart = ProfilingsPtr->Timestamps[0].global.kernelStart;
491491
for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) {
492492
uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelStart;
493493
if (Timestamp < MinStart) {
@@ -499,12 +499,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
499499
return ReturnValue(ContextStartTime);
500500
}
501501
case UR_PROFILING_INFO_COMMAND_END: {
502-
ProfilingsPtr = static_cast<struct command_buffer_profiling_t *>(
503-
Event->CommandData);
502+
ProfilingsPtr =
503+
static_cast<command_buffer_profiling_t *>(Event->CommandData);
504504
// Sync-point order does not necessarily match to the order of
505505
// execution. We therefore look for the last command executed.
506-
uint64_t MaxEnd = ProfilingsPtr->Timestamps->global.kernelEnd;
507-
uint64_t LastStart = ProfilingsPtr->Timestamps->global.kernelStart;
506+
uint64_t MaxEnd = ProfilingsPtr->Timestamps[0].global.kernelEnd;
507+
uint64_t LastStart = ProfilingsPtr->Timestamps[0].global.kernelStart;
508508
for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) {
509509
uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelEnd;
510510
if (Timestamp > MaxEnd) {
@@ -832,9 +832,9 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
832832
if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP &&
833833
Event->CommandData) {
834834
// Free the memory extra event allocated for profiling purposed.
835-
struct command_buffer_profiling_t *ProfilingPtr =
836-
static_cast<struct command_buffer_profiling_t *>(Event->CommandData);
837-
urUSMFree(Event->Context, (void *)ProfilingPtr->Timestamps);
835+
command_buffer_profiling_t *ProfilingPtr =
836+
static_cast<command_buffer_profiling_t *>(Event->CommandData);
837+
delete[] ProfilingPtr->Timestamps;
838838
delete ProfilingPtr;
839839
Event->CommandData = nullptr;
840840
}

0 commit comments

Comments
 (0)