Skip to content

Commit 7181b4c

Browse files
[SYCL] Drop the old post-wait graph cleanup mechanism (#7727)
This patch updates the remaining cases that still used the old post-wait graph cleanup: - Kernels with streams are now cleaned up after enqueue. The lifetime of internal buffers allocated for them is now handled like any other buffer with deferred release. - Kernels with auxiliary resources are also cleaned up after enqueue. Since their resources are not limited to buffers, we now regularly check the status of such kernels alongside regular graph cleanup, and release the resources when we're able to. - Host tasks are still cleaned up after completion, but they are now sent to the new cleanup mechanism instead of relying on the old one.
1 parent 65c0e98 commit 7181b4c

23 files changed

+373
-667
lines changed

sycl/source/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ set(SYCL_SOURCES
156156
"detail/scheduler/commands.cpp"
157157
"detail/scheduler/leaves_collection.cpp"
158158
"detail/scheduler/scheduler.cpp"
159-
"detail/scheduler/scheduler_helpers.cpp"
160159
"detail/scheduler/graph_processor.cpp"
161160
"detail/scheduler/graph_builder.cpp"
162161
"detail/spec_constant_impl.cpp"

sycl/source/detail/event_impl.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,6 @@ void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
231231
waitInternal();
232232
else if (MCommand)
233233
detail::Scheduler::getInstance().waitForEvent(Self);
234-
cleanupCommand(std::move(Self));
235234

236235
#ifdef XPTI_ENABLE_INSTRUMENTATION
237236
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
@@ -246,12 +245,6 @@ void event_impl::wait_and_throw(
246245
SubmittedQueue->throw_asynchronous();
247246
}
248247

249-
void event_impl::cleanupCommand(
250-
std::shared_ptr<sycl::detail::event_impl> Self) const {
251-
if (MCommand && !SYCLConfig<SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::get())
252-
detail::Scheduler::getInstance().cleanupFinishedCommands(std::move(Self));
253-
}
254-
255248
void event_impl::checkProfilingPreconditions() const {
256249
std::weak_ptr<queue_impl> EmptyPtr;
257250

sycl/source/detail/event_impl.hpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,6 @@ class event_impl {
8585
/// \param Self is a pointer to this event.
8686
void wait_and_throw(std::shared_ptr<sycl::detail::event_impl> Self);
8787

88-
/// Clean up the command associated with the event. Assumes that the task this
89-
/// event is associated with has been completed.
90-
///
91-
/// \param Self is a pointer to this event.
92-
void cleanupCommand(std::shared_ptr<sycl::detail::event_impl> Self) const;
93-
9488
/// Queries this event for profiling information.
9589
///
9690
/// If the requested info is not available when this member function is
@@ -207,11 +201,6 @@ class event_impl {
207201
/// \return true if this event is discarded.
208202
bool isDiscarded() const { return MState == HES_Discarded; }
209203

210-
void setNeedsCleanupAfterWait(bool NeedsCleanupAfterWait) {
211-
MNeedsCleanupAfterWait = NeedsCleanupAfterWait;
212-
}
213-
bool needsCleanupAfterWait() { return MNeedsCleanupAfterWait; }
214-
215204
/// Returns worker queue for command.
216205
///
217206
/// @return shared_ptr to MWorkerQueue, please be aware it can be empty
@@ -293,12 +282,6 @@ class event_impl {
293282
// HostEventState enum.
294283
std::atomic<int> MState;
295284

296-
// A temporary workaround for the current limitations of post enqueue graph
297-
// cleanup. Indicates that the command associated with this event isn't
298-
// handled by post enqueue cleanup yet and has to be deleted by cleanup after
299-
// wait.
300-
bool MNeedsCleanupAfterWait = false;
301-
302285
std::mutex MMutex;
303286
std::condition_variable cv;
304287

sycl/source/detail/queue_impl.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,8 @@ void queue_impl::addEvent(const event &Event) {
199199
addSharedEvent(Event);
200200
}
201201
// As long as the queue supports piQueueFinish we only need to store events
202-
// with command nodes in the following cases:
203-
// 1. Unenqueued commands, since they aren't covered by piQueueFinish.
204-
// 2. Kernels with streams, since they are not supported by post enqueue
205-
// cleanup.
206-
// 3. Host tasks, for both reasons.
207-
else if (is_host() || !MSupportOOO || EImpl->getHandleRef() == nullptr ||
208-
EImpl->needsCleanupAfterWait()) {
202+
// for unenqueued commands and host tasks.
203+
else if (is_host() || !MSupportOOO || EImpl->getHandleRef() == nullptr) {
209204
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
210205
std::lock_guard<std::mutex> Lock{MMutex};
211206
MEventsWeak.push_back(std::move(EventWeakPtr));
@@ -366,11 +361,6 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
366361
if (SupportsPiFinish) {
367362
const detail::plugin &Plugin = getPlugin();
368363
Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
369-
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
370-
if (std::shared_ptr<event_impl> EventImplSharedPtr =
371-
EventImplWeakPtr.lock())
372-
if (EventImplSharedPtr->needsCleanupAfterWait())
373-
EventImplSharedPtr->cleanupCommand(EventImplSharedPtr);
374364
assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
375365
"shouldn't have shared events");
376366
} else {

sycl/source/detail/scheduler/commands.cpp

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,11 @@ bool Command::producesPiEvent() const { return true; }
619619

620620
bool Command::supportsPostEnqueueCleanup() const { return true; }
621621

622+
bool Command::readyForCleanup() const {
623+
return MLeafCounter == 0 &&
624+
MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess;
625+
}
626+
622627
Command *Command::addDep(DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
623628
Command *ConnectionCmd = nullptr;
624629

@@ -748,8 +753,8 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
748753
MEnqueueStatus = EnqueueResultT::SyclEnqueueSuccess;
749754
if (MLeafCounter == 0 && supportsPostEnqueueCleanup() &&
750755
!SYCLConfig<SYCL_DISABLE_POST_ENQUEUE_CLEANUP>::get()) {
751-
assert(!MPostEnqueueCleanup);
752-
MPostEnqueueCleanup = true;
756+
assert(!MMarkedForCleanup);
757+
MMarkedForCleanup = true;
753758
ToCleanUp.push_back(this);
754759
}
755760
}
@@ -851,6 +856,8 @@ bool AllocaCommandBase::producesPiEvent() const { return false; }
851856

852857
bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; }
853858

859+
bool AllocaCommandBase::readyForCleanup() const { return false; }
860+
854861
AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req,
855862
bool InitFromUserData,
856863
AllocaCommandBase *LinkedAllocaCmd, bool IsConst)
@@ -1127,6 +1134,8 @@ bool ReleaseCommand::producesPiEvent() const { return false; }
11271134

11281135
bool ReleaseCommand::supportsPostEnqueueCleanup() const { return false; }
11291136

1137+
bool ReleaseCommand::readyForCleanup() const { return false; }
1138+
11301139
MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req,
11311140
void **DstPtr, QueueImplPtr Queue,
11321141
access::mode MapMode)
@@ -1393,24 +1402,13 @@ AllocaCommandBase *ExecCGCommand::getAllocaForReq(Requirement *Req) {
13931402
PI_ERROR_INVALID_OPERATION);
13941403
}
13951404

1396-
std::vector<StreamImplPtr> ExecCGCommand::getStreams() const {
1397-
if (MCommandGroup->getType() == CG::Kernel)
1398-
return ((CGExecKernel *)MCommandGroup.get())->getStreams();
1399-
return {};
1400-
}
1401-
14021405
std::vector<std::shared_ptr<const void>>
14031406
ExecCGCommand::getAuxiliaryResources() const {
14041407
if (MCommandGroup->getType() == CG::Kernel)
14051408
return ((CGExecKernel *)MCommandGroup.get())->getAuxiliaryResources();
14061409
return {};
14071410
}
14081411

1409-
void ExecCGCommand::clearStreams() {
1410-
if (MCommandGroup->getType() == CG::Kernel)
1411-
((CGExecKernel *)MCommandGroup.get())->clearStreams();
1412-
}
1413-
14141412
void ExecCGCommand::clearAuxiliaryResources() {
14151413
if (MCommandGroup->getType() == CG::Kernel)
14161414
((CGExecKernel *)MCommandGroup.get())->clearAuxiliaryResources();
@@ -1714,12 +1712,7 @@ ExecCGCommand::ExecCGCommand(std::unique_ptr<detail::CG> CommandGroup,
17141712
if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) {
17151713
MEvent->setSubmittedQueue(
17161714
static_cast<detail::CGHostTask *>(MCommandGroup.get())->MQueue);
1717-
MEvent->setNeedsCleanupAfterWait(true);
1718-
} else if (MCommandGroup->getType() == CG::CGTYPE::Kernel &&
1719-
(static_cast<CGExecKernel *>(MCommandGroup.get())->hasStreams() ||
1720-
static_cast<CGExecKernel *>(MCommandGroup.get())
1721-
->hasAuxiliaryResources()))
1722-
MEvent->setNeedsCleanupAfterWait(true);
1715+
}
17231716

17241717
emitInstrumentationDataProxy();
17251718
}
@@ -2583,14 +2576,15 @@ bool ExecCGCommand::producesPiEvent() const {
25832576
}
25842577

25852578
bool ExecCGCommand::supportsPostEnqueueCleanup() const {
2586-
// TODO enable cleaning up host task commands and kernels with streams after
2587-
// enqueue
2579+
// Host tasks are cleaned up upon completion instead.
25882580
return Command::supportsPostEnqueueCleanup() &&
2589-
(MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask) &&
2590-
(MCommandGroup->getType() != CG::CGTYPE::Kernel ||
2591-
(!static_cast<CGExecKernel *>(MCommandGroup.get())->hasStreams() &&
2592-
!static_cast<CGExecKernel *>(MCommandGroup.get())
2593-
->hasAuxiliaryResources()));
2581+
(MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask);
2582+
}
2583+
2584+
bool ExecCGCommand::readyForCleanup() const {
2585+
if (MCommandGroup->getType() == CG::CGTYPE::CodeplayHostTask)
2586+
return MLeafCounter == 0 && MEvent->isCompleted();
2587+
return Command::readyForCleanup();
25942588
}
25952589
} // namespace detail
25962590
} // __SYCL_INLINE_VER_NAMESPACE(_V1)

sycl/source/detail/scheduler/commands.hpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ class Command {
217217
/// Returns true iff this command can be freed by post enqueue cleanup.
218218
virtual bool supportsPostEnqueueCleanup() const;
219219

220+
/// Returns true iff this command is ready to be submitted for cleanup.
221+
virtual bool readyForCleanup() const;
222+
220223
/// Collect PI events from EventImpls and filter out some of them in case of
221224
/// in order queue
222225
std::vector<RT::PiEvent>
@@ -334,9 +337,10 @@ class Command {
334337
// synchronous. The only asynchronous operation currently is host-task.
335338
bool MShouldCompleteEventIfPossible = true;
336339

337-
/// Indicates that the node will be freed by cleanup after enqueue. Such nodes
338-
/// should be ignored by other cleanup mechanisms.
339-
bool MPostEnqueueCleanup = false;
340+
/// Indicates that the node will be freed by graph cleanup. Such nodes should
341+
/// be ignored by other cleanup mechanisms (e.g. during memory object
342+
/// removal).
343+
bool MMarkedForCleanup = false;
340344

341345
/// Contains list of commands that depends on the host command explicitly (by
342346
/// depends_on). Not involved in the cleanup process since it is one-way link
@@ -381,6 +385,7 @@ class ReleaseCommand : public Command {
381385
void emitInstrumentationData() override;
382386
bool producesPiEvent() const final;
383387
bool supportsPostEnqueueCleanup() const final;
388+
bool readyForCleanup() const final;
384389

385390
private:
386391
pi_int32 enqueueImp() final;
@@ -409,6 +414,8 @@ class AllocaCommandBase : public Command {
409414

410415
bool supportsPostEnqueueCleanup() const final;
411416

417+
bool readyForCleanup() const final;
418+
412419
void *MMemAllocation = nullptr;
413420

414421
/// Alloca command linked with current command.
@@ -570,10 +577,8 @@ class ExecCGCommand : public Command {
570577
public:
571578
ExecCGCommand(std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue);
572579

573-
std::vector<StreamImplPtr> getStreams() const;
574580
std::vector<std::shared_ptr<const void>> getAuxiliaryResources() const;
575581

576-
void clearStreams();
577582
void clearAuxiliaryResources();
578583

579584
void printDot(std::ostream &Stream) const final;
@@ -585,6 +590,8 @@ class ExecCGCommand : public Command {
585590

586591
bool supportsPostEnqueueCleanup() const final;
587592

593+
bool readyForCleanup() const final;
594+
588595
private:
589596
pi_int32 enqueueImp() final;
590597

0 commit comments

Comments
 (0)