@@ -939,51 +939,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
939
939
// Create a command-list to signal RetEvent on completion
940
940
ur_command_list_ptr_t SignalCommandList{};
941
941
if (Event) {
942
+ ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent ;
942
943
UR_CALL (Queue->Context ->getAvailableCommandList (Queue, SignalCommandList,
943
944
false , false ));
944
945
945
946
UR_CALL (createEventAndAssociateQueue (Queue, &RetEvent,
946
947
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
947
948
SignalCommandList, false ));
948
949
949
- ZE2UR_CALL (zeCommandListAppendBarrier,
950
- (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
951
- &(CommandBuffer->SignalEvent ->ZeEvent )));
952
-
953
950
if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
954
951
// Multiple submissions of a command buffer implies that we need to save
955
952
// the event timestamps before resubmiting the command buffer. We
956
953
// therefore copy the these timestamps in a dedicated USM memory section
957
954
// before completing the command buffer execution, and then attach this
958
955
// memory to the event returned to users to allow to allow the profiling
959
956
// engine to recover these timestamps.
960
- ur_usm_desc_t USMDesc{};
961
- ur_usm_device_desc_t UsmDeviceDesc{};
962
- UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC;
963
- ur_usm_host_desc_t UsmHostDesc{};
964
- UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC;
965
- UsmDeviceDesc.pNext = &UsmHostDesc;
966
- USMDesc.pNext = &UsmDeviceDesc;
967
- USMDesc.align = 4 ; // 4byte-aligned
968
-
969
- size_t Size = WaitEventList.size () * sizeof (ze_kernel_timestamp_result_t );
957
+ UR_CALL (createEventAndAssociateQueue (
958
+ Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false ));
970
959
971
- struct command_buffer_profiling_t *Profiling =
972
- new command_buffer_profiling_t ();
960
+ command_buffer_profiling_t *Profiling = new command_buffer_profiling_t ();
973
961
974
962
Profiling->NumEvents = WaitEventList.size ();
975
-
976
- urUSMSharedAlloc (RetEvent->Context , CommandBuffer->Device , &USMDesc,
977
- nullptr , Size, (void **)&Profiling->Timestamps );
963
+ Profiling->Timestamps =
964
+ new ze_kernel_timestamp_result_t [Profiling->NumEvents ];
978
965
979
966
ZE2UR_CALL (zeCommandListAppendQueryKernelTimestamps,
980
967
(SignalCommandList->first , WaitEventList.size (),
981
- WaitEventList.data (), Profiling->Timestamps , 0 ,
982
- RetEvent ->ZeEvent , 1 ,
968
+ WaitEventList.data (), ( void *) Profiling->Timestamps , 0 ,
969
+ SyncEvent ->ZeEvent , 1 ,
983
970
&(CommandBuffer->SignalEvent ->ZeEvent )));
984
971
985
972
RetEvent->CommandData = static_cast <void *>(Profiling);
986
973
}
974
+
975
+ ZE2UR_CALL (zeCommandListAppendBarrier,
976
+ (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
977
+ &(SyncEvent->ZeEvent )));
987
978
}
988
979
989
980
// Execution our command-lists asynchronously
0 commit comments