@@ -46,21 +46,21 @@ static const bool UseMultipleCmdlistBarriers = [] {
46
46
}();
47
47
48
48
bool WaitListEmptyOrAllEventsFromSameQueue (
49
- ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
49
+ ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList,
50
50
const ur_event_handle_t *EventWaitList) {
51
51
if (!NumEventsInWaitList)
52
52
return true ;
53
53
54
54
for (uint32_t i = 0 ; i < NumEventsInWaitList; ++i) {
55
- if (Queue != EventWaitList[i]->UrQueue )
55
+ if (Queue != Legacy ( EventWaitList[i]->UrQueue ) )
56
56
return false ;
57
57
}
58
58
59
59
return true ;
60
60
}
61
61
62
62
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait (
63
- ur_queue_handle_t Queue, // /< [in] handle of the queue object
63
+ ur_queue_handle_t UrQueue, // /< [in] handle of the queue object
64
64
uint32_t NumEventsInWaitList, // /< [in] size of the event wait list
65
65
const ur_event_handle_t
66
66
*EventWaitList, // /< [in][optional][range(0, numEventsInWaitList)]
@@ -72,6 +72,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
72
72
*OutEvent // /< [in,out][optional] return an event object that identifies
73
73
// /< this particular command instance.
74
74
) {
75
+ auto Queue = Legacy (UrQueue);
75
76
if (EventWaitList) {
76
77
bool UseCopyEngine = false ;
77
78
@@ -152,7 +153,7 @@ static const bool InOrderBarrierBySignal = [] {
152
153
}();
153
154
154
155
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier (
155
- ur_queue_handle_t Queue, // /< [in] handle of the queue object
156
+ ur_queue_handle_t UrQueue, // /< [in] handle of the queue object
156
157
uint32_t NumEventsInWaitList, // /< [in] size of the event wait list
157
158
const ur_event_handle_t
158
159
*EventWaitList, // /< [in][optional][range(0, numEventsInWaitList)]
@@ -164,6 +165,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
164
165
*OutEvent // /< [in,out][optional] return an event object that identifies
165
166
// /< this particular command instance.
166
167
) {
168
+ auto Queue = Legacy (UrQueue);
167
169
168
170
// Lock automatically releases when this goes out of scope.
169
171
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
@@ -299,8 +301,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
299
301
for (auto &QueueMap :
300
302
{Queue->ComputeQueueGroupsByTID , Queue->CopyQueueGroupsByTID })
301
303
for (auto &QueueGroup : QueueMap) {
302
- bool UseCopyEngine =
303
- QueueGroup. second . Type != ur_queue_handle_t_ ::queue_type::Compute;
304
+ bool UseCopyEngine = QueueGroup. second . Type !=
305
+ ur_queue_handle_legacy_t_ ::queue_type::Compute;
304
306
if (Queue->UsingImmCmdLists ) {
305
307
// If immediate command lists are being used, each will act as their own
306
308
// queue, so we must insert a barrier into each.
@@ -369,8 +371,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
369
371
370
372
// Execute each command list so the barriers can be encountered.
371
373
for (ur_command_list_ptr_t &CmdList : CmdLists) {
372
- bool IsCopy =
373
- CmdList-> second . isCopy ( reinterpret_cast <ur_queue_handle_t >(Queue));
374
+ bool IsCopy = CmdList-> second . isCopy (
375
+ reinterpret_cast <ur_queue_handle_legacy_t >(Queue));
374
376
const auto &CommandBatch =
375
377
(IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch ;
376
378
// Only batch if the matching CmdList is already open.
@@ -414,7 +416,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(
414
416
// possible that this is trying to query some event's status that
415
417
// is part of the batch. This isn't strictly required, but it seems
416
418
// like a reasonable thing to do.
417
- auto UrQueue = Event->UrQueue ;
419
+ auto UrQueue = Legacy ( Event->UrQueue ) ;
418
420
if (UrQueue) {
419
421
// Lock automatically releases when this goes out of scope.
420
422
std::unique_lock<ur_shared_mutex> Lock (UrQueue->Mutex , std::try_to_lock);
@@ -486,8 +488,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
486
488
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
487
489
}
488
490
489
- ur_device_handle_t Device =
490
- Event->UrQueue ? Event->UrQueue ->Device : Event->Context ->Devices [0 ];
491
+ ur_device_handle_t Device = Legacy (Event->UrQueue )
492
+ ? Legacy (Event->UrQueue )->Device
493
+ : Event->Context ->Devices [0 ];
491
494
492
495
uint64_t ZeTimerResolution = Device->ZeDeviceProperties ->timerResolution ;
493
496
const uint64_t TimestampMaxValue =
@@ -512,10 +515,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
512
515
return ReturnValue (Event->RecordEventEndTimestamp );
513
516
514
517
// Otherwise we need to collect it from the queue.
515
- auto Entry = Event->UrQueue ->EndTimeRecordings .find (Event);
518
+ auto Entry = Legacy ( Event->UrQueue ) ->EndTimeRecordings .find (Event);
516
519
517
520
// Unexpected state if there is no end-time record.
518
- if (Entry == Event->UrQueue ->EndTimeRecordings .end ())
521
+ if (Entry == Legacy ( Event->UrQueue ) ->EndTimeRecordings .end ())
519
522
return UR_RESULT_ERROR_UNKNOWN;
520
523
auto &EndTimeRecording = Entry->second ;
521
524
@@ -540,7 +543,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
540
543
// anymore, so we cache it on the event and evict the record from the
541
544
// queue.
542
545
Event->RecordEventEndTimestamp = ContextEndTime;
543
- Event->UrQueue ->EndTimeRecordings .erase (Entry);
546
+ Legacy ( Event->UrQueue ) ->EndTimeRecordings .erase (Entry);
544
547
545
548
return ReturnValue (ContextEndTime);
546
549
}
@@ -659,7 +662,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
659
662
}
660
663
661
664
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp (
662
- ur_queue_handle_t Queue, // /< [in] handle of the queue object
665
+ ur_queue_handle_t UrQueue, // /< [in] handle of the queue object
663
666
bool Blocking, // /< [in] blocking or non-blocking enqueue
664
667
uint32_t NumEventsInWaitList, // /< [in] size of the event wait list
665
668
const ur_event_handle_t
@@ -673,6 +676,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
673
676
*OutEvent // /< [in,out] return an event object that identifies
674
677
// /< this particular command instance.
675
678
) {
679
+ auto Queue = Legacy (UrQueue);
676
680
// Lock automatically releases when this goes out of scope.
677
681
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
678
682
@@ -701,7 +705,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
701
705
702
706
// Create a new entry in the queue's recordings.
703
707
Queue->EndTimeRecordings [*OutEvent] =
704
- ur_queue_handle_t_ ::end_time_recording{};
708
+ ur_queue_handle_legacy_t_ ::end_time_recording{};
705
709
706
710
ZE2UR_CALL (zeCommandListAppendWriteGlobalTimestamp,
707
711
(CommandList->first ,
@@ -717,6 +721,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
717
721
718
722
ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent (
719
723
ze_event_handle_t &ZeHostVisibleEvent) {
724
+ auto UrQueue = Legacy (this ->UrQueue );
720
725
721
726
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> Lock (UrQueue->Mutex ,
722
727
this ->Mutex );
@@ -771,7 +776,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
771
776
) {
772
777
for (uint32_t I = 0 ; I < NumEvents; I++) {
773
778
auto e = EventWaitList[I];
774
- if (e->UrQueue && e->UrQueue ->ZeEventsScope == OnDemandHostVisibleProxy) {
779
+ auto UrQueue = Legacy (e->UrQueue );
780
+ if (UrQueue && UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) {
775
781
// Make sure to add all host-visible "proxy" event signals if needed.
776
782
// This ensures that all signalling commands are submitted below and
777
783
// thus proxy events can be waited without a deadlock.
@@ -788,15 +794,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
788
794
// Submit dependent open command lists for execution, if any
789
795
for (uint32_t I = 0 ; I < NumEvents; I++) {
790
796
ur_event_handle_t_ *Event = ur_cast<ur_event_handle_t_ *>(EventWaitList[I]);
791
- auto UrQueue = Event->UrQueue ;
797
+ auto UrQueue = Legacy ( Event->UrQueue ) ;
792
798
if (UrQueue) {
793
799
// Lock automatically releases when this goes out of scope.
794
800
std::scoped_lock<ur_shared_mutex> lock (UrQueue->Mutex );
795
801
796
802
UR_CALL (UrQueue->executeAllOpenCommandLists ());
797
803
}
798
804
}
799
- std::unordered_set<ur_queue_handle_t > Queues;
805
+ std::unordered_set<ur_queue_handle_legacy_t > Queues;
800
806
for (uint32_t I = 0 ; I < NumEvents; I++) {
801
807
{
802
808
ur_event_handle_t_ *Event =
@@ -823,12 +829,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
823
829
Event->Completed = true ;
824
830
}
825
831
}
826
- if (auto Q = Event->UrQueue ) {
832
+ if (auto Q = Legacy ( Event->UrQueue ) ) {
827
833
if (Q->UsingImmCmdLists && Q->isInOrderQueue ())
828
834
// Use information about waited event to cleanup completed events in
829
835
// the in-order queue.
830
836
CleanupEventsInImmCmdLists (
831
- Event->UrQueue , false /* QueueLocked */ , false /* QueueSynced */ ,
837
+ Legacy (Event->UrQueue ), false /* QueueLocked */ ,
838
+ false /* QueueSynced */ ,
832
839
reinterpret_cast <ur_event_handle_t >(Event));
833
840
else {
834
841
// NOTE: we are cleaning up after the event here to free resources
@@ -884,7 +891,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
884
891
// Event can potentially be in an open command-list, make sure that
885
892
// it is submitted for execution to avoid potential deadlock if
886
893
// interop app is going to wait for it.
887
- auto Queue = Event->UrQueue ;
894
+ auto Queue = Legacy ( Event->UrQueue ) ;
888
895
if (Queue) {
889
896
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
890
897
const auto &OpenCommandList = Queue->eventOpenCommandList (Event);
@@ -1014,7 +1021,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
1014
1021
}
1015
1022
1016
1023
// Save pointer to the queue before deleting/resetting event.
1017
- auto Queue = Event->UrQueue ;
1024
+ auto Queue = Legacy ( Event->UrQueue ) ;
1018
1025
1019
1026
// If the event was a timestamp recording, we try to evict its entry in the
1020
1027
// queue.
@@ -1028,7 +1035,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
1028
1035
EndTimeRecording.EventHasDied = true ;
1029
1036
} else {
1030
1037
// Otherwise we evict the entry.
1031
- Event->UrQueue ->EndTimeRecordings .erase (Entry);
1038
+ Legacy ( Event->UrQueue ) ->EndTimeRecordings .erase (Entry);
1032
1039
}
1033
1040
}
1034
1041
}
@@ -1046,8 +1053,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
1046
1053
// created so that we can avoid ur_queue_handle_t is released before the
1047
1054
// associated ur_event_handle_t is released. Here we have to decrement it so
1048
1055
// ur_queue_handle_t can be released successfully.
1049
- if (Queue ) {
1050
- UR_CALL (urQueueReleaseInternal (Queue ));
1056
+ if (Event-> UrQueue ) {
1057
+ UR_CALL (urQueueReleaseInternal (Event-> UrQueue ));
1051
1058
}
1052
1059
1053
1060
return UR_RESULT_SUCCESS;
@@ -1091,7 +1098,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
1091
1098
ur_kernel_handle_t AssociatedKernel = nullptr ;
1092
1099
// List of dependent events.
1093
1100
std::list<ur_event_handle_t > EventsToBeReleased;
1094
- ur_queue_handle_t AssociatedQueue = nullptr ;
1101
+ ur_queue_handle_legacy_t AssociatedQueue = nullptr ;
1095
1102
{
1096
1103
// If the Event is already locked, then continue with the cleanup, otherwise
1097
1104
// block on locking the event.
@@ -1105,7 +1112,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
1105
1112
if (Event->CleanedUp )
1106
1113
return UR_RESULT_SUCCESS;
1107
1114
1108
- AssociatedQueue = Event->UrQueue ;
1115
+ AssociatedQueue = Legacy ( Event->UrQueue ) ;
1109
1116
1110
1117
// Remember the kernel associated with this event if there is one. We are
1111
1118
// going to release it later.
@@ -1222,9 +1229,9 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
1222
1229
// The "HostVisible" argument specifies if event needs to be allocated from
1223
1230
// a host-visible pool.
1224
1231
//
1225
- ur_result_t EventCreate (ur_context_handle_t Context, ur_queue_handle_t Queue,
1226
- bool IsMultiDevice , bool HostVisible ,
1227
- ur_event_handle_t *RetEvent,
1232
+ ur_result_t EventCreate (ur_context_handle_t Context,
1233
+ ur_queue_handle_legacy_t Queue , bool IsMultiDevice ,
1234
+ bool HostVisible, ur_event_handle_t *RetEvent,
1228
1235
bool CounterBasedEventEnabled,
1229
1236
bool ForceDisableProfiling) {
1230
1237
bool ProfilingEnabled =
@@ -1311,7 +1318,7 @@ ur_result_t ur_event_handle_t_::reset() {
1311
1318
1312
1319
ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList (
1313
1320
uint32_t EventListLength, const ur_event_handle_t *EventList,
1314
- ur_queue_handle_t CurQueue, bool UseCopyEngine) {
1321
+ ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine) {
1315
1322
this ->Length = 0 ;
1316
1323
this ->ZeEventList = nullptr ;
1317
1324
this ->UrEventList = nullptr ;
@@ -1427,7 +1434,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
1427
1434
}
1428
1435
}
1429
1436
1430
- auto Queue = EventList[I]->UrQueue ;
1437
+ auto Queue = Legacy ( EventList[I]->UrQueue ) ;
1431
1438
1432
1439
auto CurQueueDevice = CurQueue->Device ;
1433
1440
std::optional<std::unique_lock<ur_shared_mutex>> QueueLock =
@@ -1628,7 +1635,7 @@ ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList(
1628
1635
// Tells if this event is with profiling capabilities.
1629
1636
bool ur_event_handle_t_::isProfilingEnabled () const {
1630
1637
return !UrQueue || // tentatively assume user events are profiling enabled
1631
- (UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0 ;
1638
+ (Legacy ( UrQueue) ->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0 ;
1632
1639
}
1633
1640
1634
1641
// Tells if this event was created as a timestamp event, allowing profiling
0 commit comments