Skip to content

Commit de35323

Browse files
[DO NOT MERGE] Graph cleanup experiments
A very rough implementation of cleaning up command nodes after they're enqueued and stop being leaves, alloca commands excluded. Handles only a subset of cases.
1 parent 86c4c15 commit de35323

File tree

9 files changed

+182
-59
lines changed

9 files changed

+182
-59
lines changed

sycl/source/detail/event_impl.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,6 @@ void event_impl::wait_and_throw(
235235

236236
void event_impl::cleanupCommand(
237237
std::shared_ptr<cl::sycl::detail::event_impl> Self) const {
238-
if (MCommand && !SYCLConfig<SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::get())
239-
detail::Scheduler::getInstance().cleanupFinishedCommands(std::move(Self));
240238
}
241239

242240
template <>

sycl/source/detail/scheduler/commands.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,9 @@ class DispatchHostTask {
260260
// of empty command.
261261
// Also, it's possible to have record deallocated prior to enqueue process.
262262
// Thus we employ read-lock of graph.
263+
std::vector<Command *> EnqueuedCmds;
264+
Scheduler &Sched = Scheduler::getInstance();
263265
{
264-
Scheduler &Sched = Scheduler::getInstance();
265266
Scheduler::ReadLockT Lock(Sched.MGraphLock);
266267

267268
std::vector<DepDesc> Deps = MThisCmd->MDeps;
@@ -272,8 +273,10 @@ class DispatchHostTask {
272273
EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
273274

274275
for (const DepDesc &Dep : Deps)
275-
Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement);
276+
Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement,
277+
EnqueuedCmds);
276278
}
279+
Sched.cleanupCommands(EnqueuedCmds);
277280
}
278281
};
279282

@@ -614,7 +617,7 @@ void Command::emitInstrumentation(uint16_t Type, const char *Txt) {
614617
#endif
615618
}
616619

617-
bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking) {
620+
bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector<Command *> &EnqueuedCommands) {
618621
// Exit if already enqueued
619622
if (MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess)
620623
return true;
@@ -683,6 +686,8 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking) {
683686
// Consider the command is successfully enqueued if return code is
684687
// CL_SUCCESS
685688
MEnqueueStatus = EnqueueResultT::SyclEnqueueSuccess;
689+
if (MLeafCounter == 0 && (!MDeps.empty() || !MUsers.empty()))
690+
EnqueuedCommands.push_back(this);
686691
}
687692

688693
// Emit this correlation signal before the task end

sycl/source/detail/scheduler/commands.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ class Command {
124124
/// \param Blocking if this argument is true, function will wait for the
125125
/// command to be unblocked before calling enqueueImp.
126126
/// \return true if the command is enqueued.
127-
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking);
127+
virtual bool enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, std::vector<Command *> &EnqueuedCommands);
128128

129129
bool isFinished();
130130

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
194194
ToEnqueue.push_back(ConnectionCmd);
195195
Dependency->addUser(Dependant);
196196
--(Dependency->MLeafCounter);
197+
if (Dependency->MLeafCounter == 0 && Dependency->isSuccessfullyEnqueued())
198+
cleanupCommand(Dependency);
197199
};
198200

199201
const ContextImplPtr &InteropCtxPtr = Req->MSYCLMemObj->getInteropContext();
@@ -225,17 +227,25 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
225227
return MemObject->MRecord.get();
226228
}
227229

228-
void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
229-
MemObjRecord *Record,
230-
access::mode AccessMode) {
230+
void Scheduler::GraphBuilder::updateLeaves(
231+
const std::set<Command *> &Cmds, MemObjRecord *Record,
232+
access::mode AccessMode, std::vector<Command *> *CommandsToCleanUp) {
231233

232234
const bool ReadOnlyReq = AccessMode == access::mode::read;
233235
if (ReadOnlyReq)
234236
return;
235237

236238
for (Command *Cmd : Cmds) {
239+
bool WasLeaf = Cmd->MLeafCounter > 0;
237240
Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd);
238241
Cmd->MLeafCounter -= Record->MWriteLeaves.remove(Cmd);
242+
if (Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued()) {
243+
if (CommandsToCleanUp) {
244+
if (WasLeaf)
245+
CommandsToCleanUp->push_back(Cmd);
246+
} else
247+
cleanupCommand(Cmd);
248+
}
239249
}
240250
}
241251

@@ -963,14 +973,23 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
963973
// Node dependencies can be modified further when adding the node to leaves,
964974
// iterate over their copy.
965975
// FIXME employ a reference here to eliminate copying of a vector
976+
// Updating leaves might also clean up some of the dep commands, so update
977+
// their users first.
978+
// FIXME there's probably a better way of handling cleanup & leaf/dep update
979+
// here considering that some of the updated might be destroyed by cleanup
980+
// immediately after.
966981
std::vector<DepDesc> Deps = NewCmd->MDeps;
982+
std::vector<Command *> CommandsToCleanUp;
967983
for (DepDesc &Dep : Deps) {
968984
Dep.MDepCommand->addUser(NewCmd.get());
969985
const Requirement *Req = Dep.MDepRequirement;
970986
MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
971-
updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode);
987+
updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode,
988+
&CommandsToCleanUp);
972989
addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode, ToEnqueue);
973990
}
991+
for (Command *Cmd : CommandsToCleanUp)
992+
cleanupCommand(Cmd);
974993

975994
// Register all the events as dependencies
976995
for (detail::EventImplPtr e : Events) {
@@ -993,9 +1012,13 @@ void Scheduler::GraphBuilder::decrementLeafCountersForRecord(
9931012
MemObjRecord *Record) {
9941013
for (Command *Cmd : Record->MReadLeaves) {
9951014
--(Cmd->MLeafCounter);
1015+
if (Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued())
1016+
cleanupCommand(Cmd);
9961017
}
9971018
for (Command *Cmd : Record->MWriteLeaves) {
9981019
--(Cmd->MLeafCounter);
1020+
if (Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued())
1021+
cleanupCommand(Cmd);
9991022
}
10001023
}
10011024

@@ -1096,6 +1119,52 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(
10961119
handleVisitedNodes(MVisitedCmds);
10971120
}
10981121

1122+
1123+
void Scheduler::GraphBuilder::cleanupCommand(Command *Cmd) {
1124+
if (SYCLConfig<SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::get())
1125+
return;
1126+
assert(Cmd->MLeafCounter == 0 && Cmd->isSuccessfullyEnqueued());
1127+
// Isolated command nodes are cleaned up by scheduler instead.
1128+
assert(Cmd->MDeps.size() != 0 || Cmd->MUsers.size() != 0);
1129+
Command::CommandType CmdT = Cmd->getType();
1130+
// Allocas have to be kept alive until memory objects are released.
1131+
if (CmdT == Command::ALLOCA || CmdT == Command::ALLOCA_SUB_BUF)
1132+
return;
1133+
1134+
// FIXME handle host tasks
1135+
if (CmdT == Command::RUN_CG) {
1136+
auto *ExecCGCmd = static_cast<ExecCGCommand *>(Cmd);
1137+
if (ExecCGCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask) {
1138+
return;
1139+
}
1140+
}
1141+
assert(CmdT != Command::ALLOCA && CmdT != Command::ALLOCA_SUB_BUF);
1142+
1143+
for (Command *UserCmd : Cmd->MUsers) {
1144+
for (DepDesc &Dep : UserCmd->MDeps) {
1145+
// Link the users of the command to the alloca command(s) instead
1146+
if (Dep.MDepCommand == Cmd) {
1147+
// ... unless the user is the alloca itself.
1148+
if (Dep.MAllocaCmd == UserCmd) {
1149+
Dep.MDepCommand = nullptr;
1150+
}
1151+
else {
1152+
Dep.MDepCommand = Dep.MAllocaCmd;
1153+
Dep.MDepCommand->MUsers.insert(UserCmd);
1154+
}
1155+
}
1156+
}
1157+
}
1158+
// Update dependency users
1159+
for (DepDesc &Dep : Cmd->MDeps) {
1160+
Command *DepCmd = Dep.MDepCommand;
1161+
DepCmd->MUsers.erase(Cmd);
1162+
}
1163+
Cmd->getEvent()->setCommand(nullptr);
1164+
Cmd->getEvent()->cleanupDependencyEvents();
1165+
delete Cmd;
1166+
}
1167+
10991168
void Scheduler::GraphBuilder::cleanupFinishedCommands(
11001169
Command *FinishedCmd,
11011170
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate) {

sycl/source/detail/scheduler/graph_processor.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ static Command *getCommand(const EventImplPtr &Event) {
2323

2424
void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event,
2525
ReadLockT &GraphReadLock,
26-
bool LockTheLock) {
26+
std::vector<Command *> &EnqueuedCmds, bool LockTheLock) {
2727
Command *Cmd = getCommand(Event);
2828
// Command can be nullptr if user creates cl::sycl::event explicitly or the
2929
// event has been waited on by another thread
3030
if (!Cmd)
3131
return;
3232

3333
EnqueueResultT Res;
34-
bool Enqueued = enqueueCommand(Cmd, Res, BLOCKING);
34+
bool Enqueued = enqueueCommand(Cmd, Res, EnqueuedCmds, BLOCKING);
3535
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
3636
// TODO: Reschedule commands.
3737
throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
@@ -47,7 +47,7 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event,
4747

4848
bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd,
4949
EnqueueResultT &EnqueueResult,
50-
BlockingT Blocking) {
50+
std::vector<Command *> &EnqueuedCommands, BlockingT Blocking) {
5151
if (!Cmd || Cmd->isSuccessfullyEnqueued())
5252
return true;
5353

@@ -60,7 +60,7 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd,
6060
// Recursively enqueue all the dependencies first and
6161
// exit immediately if any of the commands cannot be enqueued.
6262
for (DepDesc &Dep : Cmd->MDeps) {
63-
if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, Blocking))
63+
if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, EnqueuedCommands, Blocking))
6464
return false;
6565
}
6666

@@ -76,7 +76,7 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd,
7676
// implemented.
7777
for (const EventImplPtr &Event : Cmd->getPreparedHostDepsEvents()) {
7878
if (Command *DepCmd = static_cast<Command *>(Event->getCommand()))
79-
if (!enqueueCommand(DepCmd, EnqueueResult, Blocking))
79+
if (!enqueueCommand(DepCmd, EnqueueResult, EnqueuedCommands, Blocking))
8080
return false;
8181
}
8282

@@ -93,7 +93,7 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd,
9393
// on completion of C and starts cleanup process. This thread is still in the
9494
// middle of enqueue of B. The other thread modifies dependency list of A by
9595
// removing C out of it. Iterators become invalid.
96-
return Cmd->enqueue(EnqueueResult, Blocking);
96+
return Cmd->enqueue(EnqueueResult, Blocking, EnqueuedCommands);
9797
}
9898

9999
} // namespace detail

0 commit comments

Comments
 (0)