Skip to content

Commit a856a01

Browse files
[SYCL][L0] Rework active barriers tracking to achieve correct reference counting (#7893)
Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 2c7ed98 commit a856a01

File tree

3 files changed

+56
-37
lines changed

3 files changed

+56
-37
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,27 +2004,38 @@ pi_result _pi_queue::insertActiveBarriers(pi_command_list_ptr_t &CmdList,
20042004
if (ActiveBarriers.empty())
20052005
return PI_SUCCESS;
20062006

2007-
// Create a wait-list and retain events. This will filter out finished events.
2007+
// Create a wait-list and retain events.
20082008
_pi_ze_event_list_t ActiveBarriersWaitList;
20092009
if (auto Res = ActiveBarriersWaitList.createAndRetainPiZeEventList(
2010-
ActiveBarriers.size(), ActiveBarriers.data(), this, UseCopyEngine))
2010+
ActiveBarriers.vector().size(), ActiveBarriers.vector().data(), this,
2011+
UseCopyEngine))
20112012
return Res;
20122013

2013-
// We can now release all the active barriers and replace them with the ones
2014-
// in the wait list.
2015-
for (pi_event &BarrierEvent : ActiveBarriers)
2016-
PI_CALL(piEventReleaseInternal(BarrierEvent));
2014+
// We can now replace active barriers with the ones in the wait list.
20172015
ActiveBarriers.clear();
2018-
ActiveBarriers.insert(
2019-
ActiveBarriers.end(), ActiveBarriersWaitList.PiEventList,
2020-
ActiveBarriersWaitList.PiEventList + ActiveBarriersWaitList.Length);
2016+
2017+
if (ActiveBarriersWaitList.Length == 0) {
2018+
return PI_SUCCESS;
2019+
}
2020+
2021+
for (pi_uint32 I = 0; I < ActiveBarriersWaitList.Length; ++I) {
2022+
auto &Event = ActiveBarriersWaitList.PiEventList[I];
2023+
ActiveBarriers.add(Event);
2024+
}
2025+
2026+
pi_event Event = nullptr;
2027+
if (auto Res = createEventAndAssociateQueue(
2028+
this, &Event, PI_COMMAND_TYPE_USER, CmdList, /*IsInternal*/ true))
2029+
return Res;
2030+
2031+
Event->WaitList = ActiveBarriersWaitList;
2032+
Event->OwnZeEvent = true;
20212033

20222034
// If there are more active barriers, insert a barrier on the command-list. We
20232035
// do not need an event for finishing so we pass nullptr.
2024-
if (!ActiveBarriers.empty())
2025-
ZE_CALL(zeCommandListAppendBarrier,
2026-
(CmdList->first, nullptr, ActiveBarriersWaitList.Length,
2027-
ActiveBarriersWaitList.ZeEventList));
2036+
ZE_CALL(zeCommandListAppendBarrier,
2037+
(CmdList->first, nullptr, ActiveBarriersWaitList.Length,
2038+
ActiveBarriersWaitList.ZeEventList));
20282039
return PI_SUCCESS;
20292040
}
20302041

@@ -6191,6 +6202,17 @@ pi_result piEventRelease(pi_event Event) {
61916202
return PI_SUCCESS;
61926203
}
61936204

6205+
void _pi_queue::active_barriers::add(pi_event &Event) {
6206+
Event->RefCount.increment();
6207+
Events.push_back(Event);
6208+
}
6209+
6210+
void _pi_queue::active_barriers::clear() {
6211+
for (const auto &Event : Events)
6212+
piEventReleaseInternal(Event);
6213+
Events.clear();
6214+
}
6215+
61946216
static pi_result piEventReleaseInternal(pi_event Event) {
61956217
PI_ASSERT(Event, PI_ERROR_INVALID_EVENT);
61966218

@@ -6561,6 +6583,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
65616583
if (auto Res = createEventAndAssociateQueue(
65626584
Queue, &Event, PI_COMMAND_TYPE_USER, CmdList, IsInternal))
65636585
return Res;
6586+
65646587
Event->WaitList = EventWaitList;
65656588
ZE_CALL(zeCommandListAppendBarrier,
65666589
(CmdList->first, Event->ZeEvent, EventWaitList.Length,
@@ -6611,21 +6634,13 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
66116634
// Because of the dependency between commands in the in-order queue we don't
66126635
// need to keep track of any active barriers if we have in-order queue.
66136636
if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) {
6614-
// Retain and save the resulting event for future commands.
6615-
(*Event)->RefCount.increment();
6616-
Queue->ActiveBarriers.push_back(*Event);
6637+
Queue->ActiveBarriers.add(*Event);
66176638
}
66186639
return PI_SUCCESS;
66196640
}
66206641

66216642
// Since there are no events to explicitly create a barrier for, we are
6622-
// inserting a queue-wide barrier. As such, the barrier will also encapsulate
6623-
// the active barriers, so we can release and clear the active barriers list.
6624-
// Doing it early prevents potential additional barriers from implicitly being
6625-
// appended.
6626-
for (pi_event &E : Queue->ActiveBarriers)
6627-
PI_CALL(piEventReleaseInternal(E));
6628-
Queue->ActiveBarriers.clear();
6643+
// inserting a queue-wide barrier.
66296644

66306645
// Command list(s) for putting barriers.
66316646
std::vector<pi_command_list_ptr_t> CmdLists;
@@ -6688,11 +6703,12 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
66886703
// Insert a barrier into each unique command queue using the available
66896704
// command-lists.
66906705
std::vector<pi_event> EventWaitVector(CmdLists.size());
6691-
for (size_t I = 0; I < CmdLists.size(); ++I)
6692-
if (auto Res = insertBarrierIntoCmdList(
6693-
CmdLists[I], _pi_ze_event_list_t{}, EventWaitVector[I], false))
6706+
for (size_t I = 0; I < CmdLists.size(); ++I) {
6707+
if (auto Res =
6708+
insertBarrierIntoCmdList(CmdLists[I], _pi_ze_event_list_t{},
6709+
EventWaitVector[I], /*IsInternal*/ true))
66946710
return Res;
6695-
6711+
}
66966712
// If there were multiple queues we need to create a "convergence" event to
66976713
// be our active barrier. This convergence event is signalled by a barrier
66986714
// on all the events from the barriers we have inserted into each queue.
@@ -6706,8 +6722,6 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
67066722
EventWaitVector.size(), EventWaitVector.data(), Queue,
67076723
ConvergenceCmdList->second.isCopy(Queue)))
67086724
return Res;
6709-
for (pi_event &E : EventWaitVector)
6710-
PI_CALL(piEventReleaseInternal(E));
67116725

67126726
// Insert a barrier with the events from each command-queue into the
67136727
// convergence command list. The resulting event signals the convergence of
@@ -6729,9 +6743,8 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
67296743
if (auto Res = Queue->executeCommandList(CmdList, false, OkToBatch))
67306744
return Res;
67316745

6732-
// We must keep the event internally to use if new command lists are created.
6733-
(*Event)->RefCount.increment();
6734-
Queue->ActiveBarriers.push_back(*Event);
6746+
Queue->ActiveBarriers.clear();
6747+
Queue->ActiveBarriers.add(*Event);
67356748
return PI_SUCCESS;
67366749
}
67376750

@@ -6838,10 +6851,7 @@ pi_result _pi_queue::synchronize() {
68386851

68396852
// With the entire queue synchronized, the active barriers must be done so we
68406853
// can remove them.
6841-
for (pi_event &BarrierEvent : ActiveBarriers)
6842-
PI_CALL(piEventReleaseInternal(BarrierEvent));
68436854
ActiveBarriers.clear();
6844-
68456855
return PI_SUCCESS;
68466856
}
68476857

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,10 +797,19 @@ struct _pi_queue : _pi_object {
797797
pi_result insertActiveBarriers(pi_command_list_ptr_t &CmdList,
798798
bool UseCopyEngine);
799799

800+
// A helper structure to keep active barriers of the queue.
801+
// It additionally manages ref-count of events in this list.
802+
struct active_barriers {
803+
std::vector<pi_event> Events;
804+
void add(pi_event &Event);
805+
void clear();
806+
bool empty() { return Events.empty(); }
807+
std::vector<pi_event> &vector() { return Events; }
808+
};
800809
// A collection of currently active barriers.
801810
// These should be inserted into a command list whenever an available command
802811
// list is needed for a command.
803-
std::vector<pi_event> ActiveBarriers;
812+
active_barriers ActiveBarriers;
804813

805814
// Besides each PI object keeping a total reference count in
806815
// _pi_object::RefCount we keep special track of the queue *external*

sycl/plugins/unified_runtime/ur/ur.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ template <class T> struct ZeCache : private T {
121121

122122
// Access to the fields of the original T data structure.
123123
T *operator->() {
124-
std::call_once(Computed, Compute, static_cast<T&>(*this));
124+
std::call_once(Computed, Compute, static_cast<T &>(*this));
125125
return this;
126126
}
127127
};

0 commit comments

Comments
 (0)