Skip to content

Commit 6725863

Browse files
authored
[SYCL] Small refactoring in scheduler (#7140)
1. Change some functions to take std::shared_ptr by const reference rather than by value. 2. Modify the helper for acquiring the graph lock to return a lock rather than take it as an out parameter. 3. Move host task post processing code to a new scheduler API to avoid locking graph lock outside of the scheduler.
1 parent 1b79491 commit 6725863

File tree

5 files changed

+80
-80
lines changed

5 files changed

+80
-80
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -325,28 +325,7 @@ class DispatchHostTask {
325325
EmptyCommand *EmptyCmd = MThisCmd->MEmptyCmd;
326326
assert(EmptyCmd && "No empty command found");
327327

328-
// Completing command's event along with unblocking enqueue readiness of
329-
// empty command may lead to quick deallocation of MThisCmd by some cleanup
330-
// process. Thus we'll copy deps prior to completing of event and unblocking
331-
// of empty command.
332-
// Also, it's possible to have record deallocated prior to enqueue process.
333-
// Thus we employ read-lock of graph.
334-
std::vector<Command *> ToCleanUp;
335-
Scheduler &Sched = Scheduler::getInstance();
336-
{
337-
Scheduler::ReadLockT Lock(Sched.MGraphLock);
338-
339-
std::vector<DepDesc> Deps = MThisCmd->MDeps;
340-
341-
// update self-event status
342-
MThisCmd->MEvent->setComplete();
343-
344-
EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
345-
346-
for (const DepDesc &Dep : Deps)
347-
Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement, ToCleanUp);
348-
}
349-
Sched.cleanupCommands(ToCleanUp);
328+
Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd, EmptyCmd);
350329
}
351330
};
352331

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
539539
}
540540

541541
Command *Scheduler::GraphBuilder::addCGUpdateHost(
542-
std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr HostQueue,
542+
std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &HostQueue,
543543
std::vector<Command *> &ToEnqueue) {
544544

545545
auto UpdateHost = static_cast<CGUpdateHost *>(CommandGroup.get());
@@ -668,7 +668,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
668668
// Note, creation of new allocation command can lead to the current context
669669
// (Record->MCurContext) change.
670670
AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
671-
MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue,
671+
MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
672672
std::vector<Command *> &ToEnqueue) {
673673

674674
AllocaCommandBase *AllocaCmd = findAllocaForReq(
@@ -919,7 +919,7 @@ static void combineAccessModesOfReqs(std::vector<Requirement *> &Reqs) {
919919

920920
Command *
921921
Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
922-
QueueImplPtr Queue,
922+
const QueueImplPtr &Queue,
923923
std::vector<Command *> &ToEnqueue) {
924924
std::vector<Requirement *> &Reqs = CommandGroup->MRequirements;
925925
const std::vector<detail::EventImplPtr> &Events = CommandGroup->MEvents;
@@ -1302,7 +1302,7 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) {
13021302
// requirement.
13031303
// Optionality of Dep is set by Dep.MDepCommand equal to nullptr.
13041304
Command *Scheduler::GraphBuilder::connectDepEvent(
1305-
Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep,
1305+
Command *const Cmd, const EventImplPtr &DepEvent, const DepDesc &Dep,
13061306
std::vector<Command *> &ToCleanUp) {
13071307
assert(Cmd->getWorkerContext() != DepEvent->getContextImpl());
13081308

sycl/source/detail/scheduler/graph_processor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static Command *getCommand(const EventImplPtr &Event) {
2121
return (Command *)Event->getCommand();
2222
}
2323

24-
void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event,
24+
void Scheduler::GraphProcessor::waitForEvent(const EventImplPtr &Event,
2525
ReadLockT &GraphReadLock,
2626
std::vector<Command *> &ToCleanUp,
2727
bool LockTheLock) {

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
7474
}
7575

7676
EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
77-
QueueImplPtr Queue) {
77+
const QueueImplPtr &Queue) {
7878
EventImplPtr NewEvent = nullptr;
7979
const CG::CGTYPE Type = CommandGroup->getType();
8080
std::vector<Command *> AuxiliaryCmds;
@@ -93,8 +93,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
9393
}
9494

9595
{
96-
WriteLockT Lock(MGraphLock, std::defer_lock);
97-
acquireWriteLock(Lock);
96+
WriteLockT Lock = acquireWriteLock();
9897

9998
Command *NewCmd = nullptr;
10099
switch (Type) {
@@ -115,7 +114,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
115114

116115
std::vector<Command *> ToCleanUp;
117116
{
118-
ReadLockT Lock(MGraphLock);
117+
ReadLockT Lock = acquireReadLock();
119118

120119
Command *NewCmd = static_cast<Command *>(NewEvent->getCommand());
121120

@@ -172,8 +171,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
172171
std::vector<Command *> AuxiliaryCmds;
173172
Command *NewCmd = nullptr;
174173
{
175-
WriteLockT Lock(MGraphLock, std::defer_lock);
176-
acquireWriteLock(Lock);
174+
WriteLockT Lock = acquireWriteLock();
177175
NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds);
178176
// Command was not creted because there were no operations with
179177
// buffer.
@@ -183,7 +181,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
183181

184182
std::vector<Command *> ToCleanUp;
185183
try {
186-
ReadLockT Lock(MGraphLock);
184+
ReadLockT Lock = acquireReadLock();
187185
EnqueueResultT Res;
188186
bool Enqueued;
189187

@@ -210,8 +208,8 @@ Scheduler &Scheduler::getInstance() {
210208
return GlobalHandler::instance().getScheduler();
211209
}
212210

213-
void Scheduler::waitForEvent(EventImplPtr Event) {
214-
ReadLockT Lock(MGraphLock);
211+
void Scheduler::waitForEvent(const EventImplPtr &Event) {
212+
ReadLockT Lock = acquireReadLock();
215213
// It's fine to leave the lock unlocked upon return from waitForEvent as
216214
// there's no more actions to do here with graph
217215
std::vector<Command *> ToCleanUp;
@@ -230,7 +228,7 @@ static void deallocateStreams(
230228
StreamImplPtr->get());
231229
}
232230

233-
void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
231+
void Scheduler::cleanupFinishedCommands(const EventImplPtr &FinishedEvent) {
234232
// We are going to traverse a graph of finished commands. Gather stream
235233
// objects from these commands if any and deallocate buffers for these stream
236234
// objects, this is needed to guarantee that streamed data is printed and
@@ -276,7 +274,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
276274
{
277275
// This only needs a shared mutex as it only involves enqueueing and
278276
// awaiting for events
279-
ReadLockT Lock(MGraphLock);
277+
ReadLockT Lock = acquireReadLock();
280278

281279
Record = MGraphBuilder.getMemObjRecord(MemObj);
282280
if (!Record)
@@ -287,8 +285,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
287285
}
288286

289287
{
290-
WriteLockT Lock(MGraphLock, std::defer_lock);
291-
acquireWriteLock(Lock);
288+
WriteLockT Lock = acquireWriteLock();
292289
MGraphBuilder.decrementLeafCountersForRecord(Record);
293290
MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate,
294291
AuxResourcesToDeallocate);
@@ -303,8 +300,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
303300
EventImplPtr NewCmdEvent = nullptr;
304301

305302
{
306-
WriteLockT Lock(MGraphLock, std::defer_lock);
307-
acquireWriteLock(Lock);
303+
WriteLockT Lock = acquireWriteLock();
308304

309305
Command *NewCmd = MGraphBuilder.addHostAccessor(Req, AuxiliaryCmds);
310306
if (!NewCmd)
@@ -314,7 +310,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
314310

315311
std::vector<Command *> ToCleanUp;
316312
{
317-
ReadLockT ReadLock(MGraphLock);
313+
ReadLockT Lock = acquireReadLock();
318314
EnqueueResultT Res;
319315
bool Enqueued;
320316

@@ -342,7 +338,7 @@ void Scheduler::releaseHostAccessor(Requirement *Req) {
342338

343339
std::vector<Command *> ToCleanUp;
344340
{
345-
ReadLockT Lock(MGraphLock);
341+
ReadLockT Lock = acquireReadLock();
346342

347343
assert(BlockedCmd && "Can't find appropriate command to unblock");
348344

@@ -416,27 +412,6 @@ Scheduler::~Scheduler() {
416412
cleanupCommands({});
417413
}
418414

419-
void Scheduler::acquireWriteLock(WriteLockT &Lock) {
420-
#ifdef _WIN32
421-
// Avoiding deadlock situation for MSVC. std::shared_timed_mutex specification
422-
// does not specify a priority for shared and exclusive accesses. It will be a
423-
// deadlock in MSVC's std::shared_timed_mutex implementation, if exclusive
424-
// access occurs after shared access.
425-
// TODO: after switching to C++17, change std::shared_timed_mutex to
426-
// std::shared_mutex and use std::lock_guard here both for Windows and Linux.
427-
while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
428-
// Without yield while loop acts like endless while loop and occupies the
429-
// whole CPU when multiple command groups are created in multiple host
430-
// threads
431-
std::this_thread::yield();
432-
}
433-
#else
434-
// It is a deadlock on UNIX in implementation of lock and lock_shared, if
435-
// try_lock in the loop above will be executed, so using a single lock here
436-
Lock.lock();
437-
#endif // _WIN32
438-
}
439-
440415
MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) {
441416
return Req->MSYCLMemObj->MRecord.get();
442417
}
@@ -472,6 +447,31 @@ void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) {
472447
}
473448
}
474449

450+
void Scheduler::NotifyHostTaskCompletion(Command *Cmd, Command *BlockingCmd) {
451+
// Completing command's event along with unblocking enqueue readiness of
452+
// empty command may lead to quick deallocation of MThisCmd by some cleanup
453+
// process. Thus we'll copy deps prior to completing of event and unblocking
454+
// of empty command.
455+
// Also, it's possible to have record deallocated prior to enqueue process.
456+
// Thus we employ read-lock of graph.
457+
458+
std::vector<Command *> ToCleanUp;
459+
{
460+
ReadLockT Lock = acquireReadLock();
461+
462+
std::vector<DepDesc> Deps = Cmd->MDeps;
463+
464+
// update self-event status
465+
Cmd->getEvent()->setComplete();
466+
467+
BlockingCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
468+
469+
for (const DepDesc &Dep : Deps)
470+
Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement, ToCleanUp);
471+
}
472+
cleanupCommands(ToCleanUp);
473+
}
474+
475475
} // namespace detail
476476
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
477477
} // namespace sycl

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ class Scheduler {
364364
/// \param CommandGroup is a unique_ptr to a command group to be added.
365365
/// \return an event object to wait on for command group completion.
366366
EventImplPtr addCG(std::unique_ptr<detail::CG> CommandGroup,
367-
QueueImplPtr Queue);
367+
const QueueImplPtr &Queue);
368368

369369
/// Registers a command group, that copies most recent memory to the memory
370370
/// pointed by the requirement.
@@ -380,7 +380,7 @@ class Scheduler {
380380
/// corresponding function of device API.
381381
///
382382
/// \param Event is a pointer to event to wait on.
383-
void waitForEvent(EventImplPtr Event);
383+
void waitForEvent(const EventImplPtr &Event);
384384

385385
/// Removes buffer from the graph.
386386
///
@@ -404,7 +404,7 @@ class Scheduler {
404404
/// \sa GraphBuilder::cleanupFinishedCommands
405405
///
406406
/// \param FinishedEvent is a cleanup candidate event.
407-
void cleanupFinishedCommands(EventImplPtr FinishedEvent);
407+
void cleanupFinishedCommands(const EventImplPtr &FinishedEvent);
408408

409409
/// Adds nodes to the graph, that update the requirement with the pointer
410410
/// to the host memory.
@@ -441,26 +441,45 @@ class Scheduler {
441441

442442
QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; }
443443

444+
const QueueImplPtr &getDefaultHostQueue() const { return DefaultHostQueue; }
445+
444446
static MemObjRecord *getMemObjRecord(const Requirement *const Req);
445447

446448
Scheduler();
447449
~Scheduler();
448450

449451
protected:
450-
// TODO: after switching to C++17, change std::shared_timed_mutex to
451-
// std::shared_mutex
452452
using RWLockT = std::shared_timed_mutex;
453453
using ReadLockT = std::shared_lock<RWLockT>;
454454
using WriteLockT = std::unique_lock<RWLockT>;
455455

456456
/// Provides exclusive access to std::shared_timed_mutex object with deadlock
457457
/// avoidance
458-
///
459-
/// \param Lock is an instance of WriteLockT, created with \c std::defer_lock
460-
void acquireWriteLock(WriteLockT &Lock);
458+
WriteLockT acquireWriteLock() {
459+
#ifdef _WIN32
460+
WriteLockT Lock(MGraphLock, std::defer_lock);
461+
while (!Lock.try_lock_for(std::chrono::milliseconds(10))) {
462+
// Without yield while loop acts like endless while loop and occupies the
463+
// whole CPU when multiple command groups are created in multiple host
464+
// threads
465+
std::this_thread::yield();
466+
}
467+
#else
468+
WriteLockT Lock(MGraphLock);
469+
// It is a deadlock on UNIX in implementation of lock and lock_shared, if
470+
// try_lock in the loop above will be executed, so using a single lock here
471+
#endif // _WIN32
472+
return std::move(Lock);
473+
}
474+
475+
/// Provides shared access to std::shared_timed_mutex object with deadlock
476+
/// avoidance
477+
ReadLockT acquireReadLock() { return ReadLockT{MGraphLock}; }
461478

462479
void cleanupCommands(const std::vector<Command *> &Cmds);
463480

481+
void NotifyHostTaskCompletion(Command *Cmd, Command *BlockingCmd);
482+
464483
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req,
465484
std::vector<Command *> &ToCleanUp);
466485

@@ -479,15 +498,16 @@ class Scheduler {
479498
/// \sa queue::submit, Scheduler::addCG
480499
///
481500
/// \return a command that represents command group execution.
482-
Command *addCG(std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
501+
Command *addCG(std::unique_ptr<detail::CG> CommandGroup,
502+
const QueueImplPtr &Queue,
483503
std::vector<Command *> &ToEnqueue);
484504

485505
/// Registers a \ref CG "command group" that updates host memory to the
486506
/// latest state.
487507
///
488508
/// \return a command that represents command group execution.
489509
Command *addCGUpdateHost(std::unique_ptr<detail::CG> CommandGroup,
490-
QueueImplPtr HostQueue,
510+
const QueueImplPtr &HostQueue,
491511
std::vector<Command *> &ToEnqueue);
492512

493513
/// Enqueues a command to update memory to the latest state.
@@ -506,7 +526,7 @@ class Scheduler {
506526

507527
/// [Provisional] Optimizes subgraph that consists of command associated
508528
/// with Event passed and its dependencies.
509-
void optimize(EventImplPtr Event);
529+
void optimize(const EventImplPtr &Event);
510530

511531
void cleanupCommand(Command *Cmd);
512532

@@ -523,7 +543,7 @@ class Scheduler {
523543
/// used when the user provides a "secondary" queue to the submit method
524544
/// which may be used when the command fails to enqueue/execute in the
525545
/// primary queue.
526-
void rescheduleCommand(Command *Cmd, QueueImplPtr Queue);
546+
void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue);
527547

528548
/// \return a pointer to the corresponding memory object record for the
529549
/// SYCL memory object provided, or nullptr if it does not exist.
@@ -566,7 +586,7 @@ class Scheduler {
566586
/// \returns the connecting command which is to be enqueued
567587
///
568588
/// Optionality of Dep is set by Dep.MDepCommand equal to nullptr.
569-
Command *connectDepEvent(Command *const Cmd, EventImplPtr DepEvent,
589+
Command *connectDepEvent(Command *const Cmd, const EventImplPtr &DepEvent,
570590
const DepDesc &Dep,
571591
std::vector<Command *> &ToCleanUp);
572592

@@ -631,7 +651,7 @@ class Scheduler {
631651
/// If none found, creates new one.
632652
AllocaCommandBase *
633653
getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req,
634-
QueueImplPtr Queue,
654+
const QueueImplPtr &Queue,
635655
std::vector<Command *> &ToEnqueue);
636656

637657
void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req);
@@ -738,7 +758,8 @@ class Scheduler {
738758
///
739759
/// The function may unlock and lock GraphReadLock as needed. Upon return
740760
/// the lock is left in locked state if and only if LockTheLock is true.
741-
static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock,
761+
static void waitForEvent(const EventImplPtr &Event,
762+
ReadLockT &GraphReadLock,
742763
std::vector<Command *> &ToCleanUp,
743764
bool LockTheLock = true);
744765

0 commit comments

Comments
 (0)