Skip to content

Commit 2a31795

Browse files
authored
Merge pull request #1623 from igchor/legacy_path_queue
[L0] Create initial structure for supporting queue dispatcher
2 parents 06cb1b9 + ec73afa commit 2a31795

File tree

12 files changed

+344
-247
lines changed

12 files changed

+344
-247
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,9 +1175,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp(
11751175
}
11761176

11771177
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
1178-
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
1178+
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue,
11791179
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
11801180
ur_event_handle_t *Event) {
1181+
auto Queue = Legacy(UrQueue);
11811182
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
11821183
// Use compute engine rather than copy engine
11831184
const auto UseCopyEngine = false;

source/adapters/level_zero/context.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,8 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
576576
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
577577
ur_device_handle_t Device = nullptr;
578578

579-
if (!Event->IsMultiDevice && Event->UrQueue) {
580-
Device = Event->UrQueue->Device;
579+
if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) {
580+
Device = Legacy(Event->UrQueue)->Device;
581581
}
582582

583583
auto Cache = getEventCache(Event->isHostVisible(),
@@ -598,10 +598,10 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {
598598

599599
ze_device_handle_t ZeDevice = nullptr;
600600
bool UsingImmediateCommandlists =
601-
!Event->UrQueue || Event->UrQueue->UsingImmCmdLists;
601+
!Legacy(Event->UrQueue) || Legacy(Event->UrQueue)->UsingImmCmdLists;
602602

603-
if (!Event->IsMultiDevice && Event->UrQueue) {
604-
ZeDevice = Event->UrQueue->Device->ZeDevice;
603+
if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) {
604+
ZeDevice = Legacy(Event->UrQueue)->Device->ZeDevice;
605605
}
606606

607607
std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
@@ -644,7 +644,7 @@ static const size_t CmdListsCleanupThreshold = [] {
644644

645645
// Retrieve an available command list to be used in a PI call.
646646
ur_result_t ur_context_handle_t_::getAvailableCommandList(
647-
ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList,
647+
ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList,
648648
bool UseCopyEngine, uint32_t NumEventsInWaitList,
649649
const ur_event_handle_t *EventWaitList, bool AllowBatching,
650650
ze_command_queue_handle_t *ForcedCmdQueue) {

source/adapters/level_zero/context.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ struct ur_context_handle_t_ : _ur_object {
293293
// for executing on this device. Immediate commandlists are created only
294294
// once for each SYCL Queue and after that they are reused.
295295
ur_result_t getAvailableCommandList(
296-
ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList,
296+
ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList,
297297
bool UseCopyEngine, uint32_t NumEventsInWaitList,
298298
const ur_event_handle_t *EventWaitList, bool AllowBatching = false,
299299
ze_command_queue_handle_t *ForcedCmdQueue = nullptr);

source/adapters/level_zero/event.cpp

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,21 @@ static const bool UseMultipleCmdlistBarriers = [] {
4646
}();
4747

4848
bool WaitListEmptyOrAllEventsFromSameQueue(
49-
ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
49+
ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList,
5050
const ur_event_handle_t *EventWaitList) {
5151
if (!NumEventsInWaitList)
5252
return true;
5353

5454
for (uint32_t i = 0; i < NumEventsInWaitList; ++i) {
55-
if (Queue != EventWaitList[i]->UrQueue)
55+
if (Queue != Legacy(EventWaitList[i]->UrQueue))
5656
return false;
5757
}
5858

5959
return true;
6060
}
6161

6262
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
63-
ur_queue_handle_t Queue, ///< [in] handle of the queue object
63+
ur_queue_handle_t UrQueue, ///< [in] handle of the queue object
6464
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
6565
const ur_event_handle_t
6666
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
@@ -72,6 +72,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
7272
*OutEvent ///< [in,out][optional] return an event object that identifies
7373
///< this particular command instance.
7474
) {
75+
auto Queue = Legacy(UrQueue);
7576
if (EventWaitList) {
7677
bool UseCopyEngine = false;
7778

@@ -152,7 +153,7 @@ static const bool InOrderBarrierBySignal = [] {
152153
}();
153154

154155
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
155-
ur_queue_handle_t Queue, ///< [in] handle of the queue object
156+
ur_queue_handle_t UrQueue, ///< [in] handle of the queue object
156157
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
157158
const ur_event_handle_t
158159
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
@@ -164,6 +165,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
164165
*OutEvent ///< [in,out][optional] return an event object that identifies
165166
///< this particular command instance.
166167
) {
168+
auto Queue = Legacy(UrQueue);
167169

168170
// Lock automatically releases when this goes out of scope.
169171
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
@@ -299,8 +301,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
299301
for (auto &QueueMap :
300302
{Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID})
301303
for (auto &QueueGroup : QueueMap) {
302-
bool UseCopyEngine =
303-
QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute;
304+
bool UseCopyEngine = QueueGroup.second.Type !=
305+
ur_queue_handle_legacy_t_::queue_type::Compute;
304306
if (Queue->UsingImmCmdLists) {
305307
// If immediate command lists are being used, each will act as their own
306308
// queue, so we must insert a barrier into each.
@@ -369,8 +371,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
369371

370372
// Execute each command list so the barriers can be encountered.
371373
for (ur_command_list_ptr_t &CmdList : CmdLists) {
372-
bool IsCopy =
373-
CmdList->second.isCopy(reinterpret_cast<ur_queue_handle_t>(Queue));
374+
bool IsCopy = CmdList->second.isCopy(
375+
reinterpret_cast<ur_queue_handle_legacy_t>(Queue));
374376
const auto &CommandBatch =
375377
(IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch;
376378
// Only batch if the matching CmdList is already open.
@@ -414,7 +416,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(
414416
// possible that this is trying to query some event's status that
415417
// is part of the batch. This isn't strictly required, but it seems
416418
// like a reasonable thing to do.
417-
auto UrQueue = Event->UrQueue;
419+
auto UrQueue = Legacy(Event->UrQueue);
418420
if (UrQueue) {
419421
// Lock automatically releases when this goes out of scope.
420422
std::unique_lock<ur_shared_mutex> Lock(UrQueue->Mutex, std::try_to_lock);
@@ -486,8 +488,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
486488
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
487489
}
488490

489-
ur_device_handle_t Device =
490-
Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0];
491+
ur_device_handle_t Device = Legacy(Event->UrQueue)
492+
? Legacy(Event->UrQueue)->Device
493+
: Event->Context->Devices[0];
491494

492495
uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution;
493496
const uint64_t TimestampMaxValue =
@@ -512,10 +515,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
512515
return ReturnValue(Event->RecordEventEndTimestamp);
513516

514517
// Otherwise we need to collect it from the queue.
515-
auto Entry = Event->UrQueue->EndTimeRecordings.find(Event);
518+
auto Entry = Legacy(Event->UrQueue)->EndTimeRecordings.find(Event);
516519

517520
// Unexpected state if there is no end-time record.
518-
if (Entry == Event->UrQueue->EndTimeRecordings.end())
521+
if (Entry == Legacy(Event->UrQueue)->EndTimeRecordings.end())
519522
return UR_RESULT_ERROR_UNKNOWN;
520523
auto &EndTimeRecording = Entry->second;
521524

@@ -540,7 +543,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
540543
// anymore, so we cache it on the event and evict the record from the
541544
// queue.
542545
Event->RecordEventEndTimestamp = ContextEndTime;
543-
Event->UrQueue->EndTimeRecordings.erase(Entry);
546+
Legacy(Event->UrQueue)->EndTimeRecordings.erase(Entry);
544547

545548
return ReturnValue(ContextEndTime);
546549
}
@@ -659,7 +662,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
659662
}
660663

661664
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
662-
ur_queue_handle_t Queue, ///< [in] handle of the queue object
665+
ur_queue_handle_t UrQueue, ///< [in] handle of the queue object
663666
bool Blocking, ///< [in] blocking or non-blocking enqueue
664667
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
665668
const ur_event_handle_t
@@ -673,6 +676,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
673676
*OutEvent ///< [in,out] return an event object that identifies
674677
///< this particular command instance.
675678
) {
679+
auto Queue = Legacy(UrQueue);
676680
// Lock automatically releases when this goes out of scope.
677681
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
678682

@@ -701,7 +705,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
701705

702706
// Create a new entry in the queue's recordings.
703707
Queue->EndTimeRecordings[*OutEvent] =
704-
ur_queue_handle_t_::end_time_recording{};
708+
ur_queue_handle_legacy_t_::end_time_recording{};
705709

706710
ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp,
707711
(CommandList->first,
@@ -717,6 +721,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
717721

718722
ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
719723
ze_event_handle_t &ZeHostVisibleEvent) {
724+
auto UrQueue = Legacy(this->UrQueue);
720725

721726
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> Lock(UrQueue->Mutex,
722727
this->Mutex);
@@ -771,7 +776,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
771776
) {
772777
for (uint32_t I = 0; I < NumEvents; I++) {
773778
auto e = EventWaitList[I];
774-
if (e->UrQueue && e->UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) {
779+
auto UrQueue = Legacy(e->UrQueue);
780+
if (UrQueue && UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) {
775781
// Make sure to add all host-visible "proxy" event signals if needed.
776782
// This ensures that all signalling commands are submitted below and
777783
// thus proxy events can be waited without a deadlock.
@@ -788,15 +794,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
788794
// Submit dependent open command lists for execution, if any
789795
for (uint32_t I = 0; I < NumEvents; I++) {
790796
ur_event_handle_t_ *Event = ur_cast<ur_event_handle_t_ *>(EventWaitList[I]);
791-
auto UrQueue = Event->UrQueue;
797+
auto UrQueue = Legacy(Event->UrQueue);
792798
if (UrQueue) {
793799
// Lock automatically releases when this goes out of scope.
794800
std::scoped_lock<ur_shared_mutex> lock(UrQueue->Mutex);
795801

796802
UR_CALL(UrQueue->executeAllOpenCommandLists());
797803
}
798804
}
799-
std::unordered_set<ur_queue_handle_t> Queues;
805+
std::unordered_set<ur_queue_handle_legacy_t> Queues;
800806
for (uint32_t I = 0; I < NumEvents; I++) {
801807
{
802808
ur_event_handle_t_ *Event =
@@ -823,12 +829,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
823829
Event->Completed = true;
824830
}
825831
}
826-
if (auto Q = Event->UrQueue) {
832+
if (auto Q = Legacy(Event->UrQueue)) {
827833
if (Q->UsingImmCmdLists && Q->isInOrderQueue())
828834
// Use information about waited event to cleanup completed events in
829835
// the in-order queue.
830836
CleanupEventsInImmCmdLists(
831-
Event->UrQueue, false /* QueueLocked */, false /* QueueSynced */,
837+
Legacy(Event->UrQueue), false /* QueueLocked */,
838+
false /* QueueSynced */,
832839
reinterpret_cast<ur_event_handle_t>(Event));
833840
else {
834841
// NOTE: we are cleaning up after the event here to free resources
@@ -884,7 +891,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
884891
// Event can potentially be in an open command-list, make sure that
885892
// it is submitted for execution to avoid potential deadlock if
886893
// interop app is going to wait for it.
887-
auto Queue = Event->UrQueue;
894+
auto Queue = Legacy(Event->UrQueue);
888895
if (Queue) {
889896
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
890897
const auto &OpenCommandList = Queue->eventOpenCommandList(Event);
@@ -1014,7 +1021,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
10141021
}
10151022

10161023
// Save pointer to the queue before deleting/resetting event.
1017-
auto Queue = Event->UrQueue;
1024+
auto Queue = Legacy(Event->UrQueue);
10181025

10191026
// If the event was a timestamp recording, we try to evict its entry in the
10201027
// queue.
@@ -1028,7 +1035,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
10281035
EndTimeRecording.EventHasDied = true;
10291036
} else {
10301037
// Otherwise we evict the entry.
1031-
Event->UrQueue->EndTimeRecordings.erase(Entry);
1038+
Legacy(Event->UrQueue)->EndTimeRecordings.erase(Entry);
10321039
}
10331040
}
10341041
}
@@ -1046,8 +1053,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
10461053
// created so that we can avoid ur_queue_handle_t is released before the
10471054
// associated ur_event_handle_t is released. Here we have to decrement it so
10481055
// ur_queue_handle_t can be released successfully.
1049-
if (Queue) {
1050-
UR_CALL(urQueueReleaseInternal(Queue));
1056+
if (Event->UrQueue) {
1057+
UR_CALL(urQueueReleaseInternal(Event->UrQueue));
10511058
}
10521059

10531060
return UR_RESULT_SUCCESS;
@@ -1091,7 +1098,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
10911098
ur_kernel_handle_t AssociatedKernel = nullptr;
10921099
// List of dependent events.
10931100
std::list<ur_event_handle_t> EventsToBeReleased;
1094-
ur_queue_handle_t AssociatedQueue = nullptr;
1101+
ur_queue_handle_legacy_t AssociatedQueue = nullptr;
10951102
{
10961103
// If the Event is already locked, then continue with the cleanup, otherwise
10971104
// block on locking the event.
@@ -1105,7 +1112,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
11051112
if (Event->CleanedUp)
11061113
return UR_RESULT_SUCCESS;
11071114

1108-
AssociatedQueue = Event->UrQueue;
1115+
AssociatedQueue = Legacy(Event->UrQueue);
11091116

11101117
// Remember the kernel associated with this event if there is one. We are
11111118
// going to release it later.
@@ -1222,9 +1229,9 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
12221229
// The "HostVisible" argument specifies if event needs to be allocated from
12231230
// a host-visible pool.
12241231
//
1225-
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
1226-
bool IsMultiDevice, bool HostVisible,
1227-
ur_event_handle_t *RetEvent,
1232+
ur_result_t EventCreate(ur_context_handle_t Context,
1233+
ur_queue_handle_legacy_t Queue, bool IsMultiDevice,
1234+
bool HostVisible, ur_event_handle_t *RetEvent,
12281235
bool CounterBasedEventEnabled,
12291236
bool ForceDisableProfiling) {
12301237
bool ProfilingEnabled =
@@ -1311,7 +1318,7 @@ ur_result_t ur_event_handle_t_::reset() {
13111318

13121319
ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
13131320
uint32_t EventListLength, const ur_event_handle_t *EventList,
1314-
ur_queue_handle_t CurQueue, bool UseCopyEngine) {
1321+
ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine) {
13151322
this->Length = 0;
13161323
this->ZeEventList = nullptr;
13171324
this->UrEventList = nullptr;
@@ -1427,7 +1434,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
14271434
}
14281435
}
14291436

1430-
auto Queue = EventList[I]->UrQueue;
1437+
auto Queue = Legacy(EventList[I]->UrQueue);
14311438

14321439
auto CurQueueDevice = CurQueue->Device;
14331440
std::optional<std::unique_lock<ur_shared_mutex>> QueueLock =
@@ -1628,7 +1635,7 @@ ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList(
16281635
// Tells if this event is with profiling capabilities.
16291636
bool ur_event_handle_t_::isProfilingEnabled() const {
16301637
return !UrQueue || // tentatively assume user events are profiling enabled
1631-
(UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0;
1638+
(Legacy(UrQueue)->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0;
16321639
}
16331640

16341641
// Tells if this event was created as a timestamp event, allowing profiling

source/adapters/level_zero/event.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929

3030
extern "C" {
3131
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
32-
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
33-
bool IsMultiDevice, bool HostVisible,
34-
ur_event_handle_t *RetEvent,
32+
ur_result_t EventCreate(ur_context_handle_t Context,
33+
ur_queue_handle_legacy_t Queue, bool IsMultiDevice,
34+
bool HostVisible, ur_event_handle_t *RetEvent,
3535
bool CounterBasedEventEnabled = false,
3636
bool ForceDisableProfiling = false);
3737
} // extern "C"
@@ -89,7 +89,7 @@ struct _ur_ze_event_list_t {
8989
// command-lists.
9090
ur_result_t createAndRetainUrZeEventList(uint32_t EventListLength,
9191
const ur_event_handle_t *EventList,
92-
ur_queue_handle_t CurQueue,
92+
ur_queue_handle_legacy_t CurQueue,
9393
bool UseCopyEngine);
9494

9595
// Add all the events in this object's UrEventList to the end

source/adapters/level_zero/image.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,12 +753,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
753753
}
754754

755755
UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
756-
ur_queue_handle_t hQueue, void *pDst, void *pSrc,
756+
ur_queue_handle_t hUrQueue, void *pDst, void *pSrc,
757757
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
758758
ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset,
759759
ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent,
760760
ur_rect_region_t hostExtent, uint32_t numEventsInWaitList,
761761
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
762+
auto hQueue = Legacy(hUrQueue);
762763
std::scoped_lock<ur_shared_mutex> Lock(hQueue->Mutex);
763764

764765
UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE);

0 commit comments

Comments
 (0)