Skip to content

[SYCL] Drop the old post-wait graph cleanup mechanism #7727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 15, 2022
1 change: 0 additions & 1 deletion sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ set(SYCL_SOURCES
"detail/scheduler/commands.cpp"
"detail/scheduler/leaves_collection.cpp"
"detail/scheduler/scheduler.cpp"
"detail/scheduler/scheduler_helpers.cpp"
"detail/scheduler/graph_processor.cpp"
"detail/scheduler/graph_builder.cpp"
"detail/spec_constant_impl.cpp"
Expand Down
7 changes: 0 additions & 7 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@ void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
waitInternal();
else if (MCommand)
detail::Scheduler::getInstance().waitForEvent(Self);
cleanupCommand(std::move(Self));

#ifdef XPTI_ENABLE_INSTRUMENTATION
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
Expand All @@ -246,12 +245,6 @@ void event_impl::wait_and_throw(
SubmittedQueue->throw_asynchronous();
}

void event_impl::cleanupCommand(
std::shared_ptr<sycl::detail::event_impl> Self) const {
if (MCommand && !SYCLConfig<SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::get())
detail::Scheduler::getInstance().cleanupFinishedCommands(std::move(Self));
}

void event_impl::checkProfilingPreconditions() const {
std::weak_ptr<queue_impl> EmptyPtr;

Expand Down
17 changes: 0 additions & 17 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,6 @@ class event_impl {
/// \param Self is a pointer to this event.
void wait_and_throw(std::shared_ptr<sycl::detail::event_impl> Self);

/// Clean up the command associated with the event. Assumes that the task this
/// event is associated with has been completed.
///
/// \param Self is a pointer to this event.
void cleanupCommand(std::shared_ptr<sycl::detail::event_impl> Self) const;

/// Queries this event for profiling information.
///
/// If the requested info is not available when this member function is
Expand Down Expand Up @@ -207,11 +201,6 @@ class event_impl {
/// \return true if this event is discarded.
bool isDiscarded() const { return MState == HES_Discarded; }

void setNeedsCleanupAfterWait(bool NeedsCleanupAfterWait) {
MNeedsCleanupAfterWait = NeedsCleanupAfterWait;
}
bool needsCleanupAfterWait() { return MNeedsCleanupAfterWait; }

/// Returns worker queue for command.
///
/// @return shared_ptr to MWorkerQueue, please be aware it can be empty
Expand Down Expand Up @@ -293,12 +282,6 @@ class event_impl {
// HostEventState enum.
std::atomic<int> MState;

// A temporary workaround for the current limitations of post enqueue graph
// cleanup. Indicates that the command associated with this event isn't
// handled by post enqueue cleanup yet and has to be deleted by cleanup after
// wait.
bool MNeedsCleanupAfterWait = false;

std::mutex MMutex;
std::condition_variable cv;

Expand Down
14 changes: 2 additions & 12 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,8 @@ void queue_impl::addEvent(const event &Event) {
addSharedEvent(Event);
}
// As long as the queue supports piQueueFinish we only need to store events
// with command nodes in the following cases:
// 1. Unenqueued commands, since they aren't covered by piQueueFinish.
// 2. Kernels with streams, since they are not supported by post enqueue
// cleanup.
// 3. Host tasks, for both reasons.
else if (is_host() || !MSupportOOO || EImpl->getHandleRef() == nullptr ||
EImpl->needsCleanupAfterWait()) {
// for unenqueued commands and host tasks.
else if (is_host() || !MSupportOOO || EImpl->getHandleRef() == nullptr) {
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
std::lock_guard<std::mutex> Lock{MMutex};
MEventsWeak.push_back(std::move(EventWeakPtr));
Expand Down Expand Up @@ -366,11 +361,6 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
if (SupportsPiFinish) {
const detail::plugin &Plugin = getPlugin();
Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
if (std::shared_ptr<event_impl> EventImplSharedPtr =
EventImplWeakPtr.lock())
if (EventImplSharedPtr->needsCleanupAfterWait())
EventImplSharedPtr->cleanupCommand(EventImplSharedPtr);
assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
"shouldn't have shared events");
} else {
Expand Down
46 changes: 20 additions & 26 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,11 @@ bool Command::producesPiEvent() const { return true; }

bool Command::supportsPostEnqueueCleanup() const { return true; }

bool Command::readyForCleanup() const {
return MLeafCounter == 0 &&
MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess;
}

Command *Command::addDep(DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
Command *ConnectionCmd = nullptr;

Expand Down Expand Up @@ -748,8 +753,8 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
MEnqueueStatus = EnqueueResultT::SyclEnqueueSuccess;
if (MLeafCounter == 0 && supportsPostEnqueueCleanup() &&
!SYCLConfig<SYCL_DISABLE_POST_ENQUEUE_CLEANUP>::get()) {
assert(!MPostEnqueueCleanup);
MPostEnqueueCleanup = true;
assert(!MMarkedForCleanup);
MMarkedForCleanup = true;
ToCleanUp.push_back(this);
}
}
Expand Down Expand Up @@ -851,6 +856,8 @@ bool AllocaCommandBase::producesPiEvent() const { return false; }

bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; }

bool AllocaCommandBase::readyForCleanup() const { return false; }

AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req,
bool InitFromUserData,
AllocaCommandBase *LinkedAllocaCmd, bool IsConst)
Expand Down Expand Up @@ -1127,6 +1134,8 @@ bool ReleaseCommand::producesPiEvent() const { return false; }

bool ReleaseCommand::supportsPostEnqueueCleanup() const { return false; }

bool ReleaseCommand::readyForCleanup() const { return false; }

MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req,
void **DstPtr, QueueImplPtr Queue,
access::mode MapMode)
Expand Down Expand Up @@ -1393,24 +1402,13 @@ AllocaCommandBase *ExecCGCommand::getAllocaForReq(Requirement *Req) {
PI_ERROR_INVALID_OPERATION);
}

std::vector<StreamImplPtr> ExecCGCommand::getStreams() const {
if (MCommandGroup->getType() == CG::Kernel)
return ((CGExecKernel *)MCommandGroup.get())->getStreams();
return {};
}

std::vector<std::shared_ptr<const void>>
ExecCGCommand::getAuxiliaryResources() const {
if (MCommandGroup->getType() == CG::Kernel)
return ((CGExecKernel *)MCommandGroup.get())->getAuxiliaryResources();
return {};
}

void ExecCGCommand::clearStreams() {
if (MCommandGroup->getType() == CG::Kernel)
((CGExecKernel *)MCommandGroup.get())->clearStreams();
}

void ExecCGCommand::clearAuxiliaryResources() {
if (MCommandGroup->getType() == CG::Kernel)
((CGExecKernel *)MCommandGroup.get())->clearAuxiliaryResources();
Expand Down Expand Up @@ -1714,12 +1712,7 @@ ExecCGCommand::ExecCGCommand(std::unique_ptr<detail::CG> CommandGroup,
if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) {
MEvent->setSubmittedQueue(
static_cast<detail::CGHostTask *>(MCommandGroup.get())->MQueue);
MEvent->setNeedsCleanupAfterWait(true);
} else if (MCommandGroup->getType() == CG::CGTYPE::Kernel &&
(static_cast<CGExecKernel *>(MCommandGroup.get())->hasStreams() ||
static_cast<CGExecKernel *>(MCommandGroup.get())
->hasAuxiliaryResources()))
MEvent->setNeedsCleanupAfterWait(true);
}

emitInstrumentationDataProxy();
}
Expand Down Expand Up @@ -2583,14 +2576,15 @@ bool ExecCGCommand::producesPiEvent() const {
}

bool ExecCGCommand::supportsPostEnqueueCleanup() const {
// TODO enable cleaning up host task commands and kernels with streams after
// enqueue
// Host tasks are cleaned up upon completion instead.
return Command::supportsPostEnqueueCleanup() &&
(MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask) &&
(MCommandGroup->getType() != CG::CGTYPE::Kernel ||
(!static_cast<CGExecKernel *>(MCommandGroup.get())->hasStreams() &&
!static_cast<CGExecKernel *>(MCommandGroup.get())
->hasAuxiliaryResources()));
(MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask);
}

bool ExecCGCommand::readyForCleanup() const {
if (MCommandGroup->getType() == CG::CGTYPE::CodeplayHostTask)
return MLeafCounter == 0 && MEvent->isCompleted();
return Command::readyForCleanup();
}
} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
Expand Down
17 changes: 12 additions & 5 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ class Command {
/// Returns true iff this command can be freed by post enqueue cleanup.
virtual bool supportsPostEnqueueCleanup() const;

/// Returns true iff this command is ready to be submitted for cleanup.
virtual bool readyForCleanup() const;

/// Collect PI events from EventImpls and filter out some of them in case of
/// in order queue
std::vector<RT::PiEvent>
Expand Down Expand Up @@ -334,9 +337,10 @@ class Command {
// synchronous. The only asynchronous operation currently is host-task.
bool MShouldCompleteEventIfPossible = true;

/// Indicates that the node will be freed by cleanup after enqueue. Such nodes
/// should be ignored by other cleanup mechanisms.
bool MPostEnqueueCleanup = false;
/// Indicates that the node will be freed by graph cleanup. Such nodes should
/// be ignored by other cleanup mechanisms (e.g. during memory object
/// removal).
bool MMarkedForCleanup = false;

/// Contains list of commands that depends on the host command explicitly (by
/// depends_on). Not involved in the cleanup process since it is one-way link
Expand Down Expand Up @@ -381,6 +385,7 @@ class ReleaseCommand : public Command {
void emitInstrumentationData() override;
bool producesPiEvent() const final;
bool supportsPostEnqueueCleanup() const final;
bool readyForCleanup() const final;

private:
pi_int32 enqueueImp() final;
Expand Down Expand Up @@ -409,6 +414,8 @@ class AllocaCommandBase : public Command {

bool supportsPostEnqueueCleanup() const final;

bool readyForCleanup() const final;

void *MMemAllocation = nullptr;

/// Alloca command linked with current command.
Expand Down Expand Up @@ -570,10 +577,8 @@ class ExecCGCommand : public Command {
public:
ExecCGCommand(std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue);

std::vector<StreamImplPtr> getStreams() const;
std::vector<std::shared_ptr<const void>> getAuxiliaryResources() const;

void clearStreams();
void clearAuxiliaryResources();

void printDot(std::ostream &Stream) const final;
Expand All @@ -585,6 +590,8 @@ class ExecCGCommand : public Command {

bool supportsPostEnqueueCleanup() const final;

bool readyForCleanup() const final;

private:
pi_int32 enqueueImp() final;

Expand Down
Loading