@@ -74,7 +74,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
74
74
}
75
75
76
76
EventImplPtr Scheduler::addCG (std::unique_ptr<detail::CG> CommandGroup,
77
- QueueImplPtr Queue) {
77
+ const QueueImplPtr & Queue) {
78
78
EventImplPtr NewEvent = nullptr ;
79
79
const CG::CGTYPE Type = CommandGroup->getType ();
80
80
std::vector<Command *> AuxiliaryCmds;
@@ -93,8 +93,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
93
93
}
94
94
95
95
{
96
- WriteLockT Lock (MGraphLock, std::defer_lock);
97
- acquireWriteLock (Lock);
96
+ WriteLockT Lock = acquireWriteLock ();
98
97
99
98
Command *NewCmd = nullptr ;
100
99
switch (Type) {
@@ -115,7 +114,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
115
114
116
115
std::vector<Command *> ToCleanUp;
117
116
{
118
- ReadLockT Lock (MGraphLock );
117
+ ReadLockT Lock = acquireReadLock ( );
119
118
120
119
Command *NewCmd = static_cast <Command *>(NewEvent->getCommand ());
121
120
@@ -172,8 +171,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
172
171
std::vector<Command *> AuxiliaryCmds;
173
172
Command *NewCmd = nullptr ;
174
173
{
175
- WriteLockT Lock (MGraphLock, std::defer_lock);
176
- acquireWriteLock (Lock);
174
+ WriteLockT Lock = acquireWriteLock ();
177
175
NewCmd = MGraphBuilder.addCopyBack (Req, AuxiliaryCmds);
178
176
// Command was not creted because there were no operations with
179
177
// buffer.
@@ -183,7 +181,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
183
181
184
182
std::vector<Command *> ToCleanUp;
185
183
try {
186
- ReadLockT Lock (MGraphLock );
184
+ ReadLockT Lock = acquireReadLock ( );
187
185
EnqueueResultT Res;
188
186
bool Enqueued;
189
187
@@ -210,8 +208,8 @@ Scheduler &Scheduler::getInstance() {
210
208
return GlobalHandler::instance ().getScheduler ();
211
209
}
212
210
213
- void Scheduler::waitForEvent (EventImplPtr Event) {
214
- ReadLockT Lock (MGraphLock );
211
+ void Scheduler::waitForEvent (const EventImplPtr & Event) {
212
+ ReadLockT Lock = acquireReadLock ( );
215
213
// It's fine to leave the lock unlocked upon return from waitForEvent as
216
214
// there's no more actions to do here with graph
217
215
std::vector<Command *> ToCleanUp;
@@ -230,7 +228,7 @@ static void deallocateStreams(
230
228
StreamImplPtr->get ());
231
229
}
232
230
233
- void Scheduler::cleanupFinishedCommands (EventImplPtr FinishedEvent) {
231
+ void Scheduler::cleanupFinishedCommands (const EventImplPtr & FinishedEvent) {
234
232
// We are going to traverse a graph of finished commands. Gather stream
235
233
// objects from these commands if any and deallocate buffers for these stream
236
234
// objects, this is needed to guarantee that streamed data is printed and
@@ -276,7 +274,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
276
274
{
277
275
// This only needs a shared mutex as it only involves enqueueing and
278
276
// awaiting for events
279
- ReadLockT Lock (MGraphLock );
277
+ ReadLockT Lock = acquireReadLock ( );
280
278
281
279
Record = MGraphBuilder.getMemObjRecord (MemObj);
282
280
if (!Record)
@@ -287,8 +285,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
287
285
}
288
286
289
287
{
290
- WriteLockT Lock (MGraphLock, std::defer_lock);
291
- acquireWriteLock (Lock);
288
+ WriteLockT Lock = acquireWriteLock ();
292
289
MGraphBuilder.decrementLeafCountersForRecord (Record);
293
290
MGraphBuilder.cleanupCommandsForRecord (Record, StreamsToDeallocate,
294
291
AuxResourcesToDeallocate);
@@ -303,8 +300,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
303
300
EventImplPtr NewCmdEvent = nullptr ;
304
301
305
302
{
306
- WriteLockT Lock (MGraphLock, std::defer_lock);
307
- acquireWriteLock (Lock);
303
+ WriteLockT Lock = acquireWriteLock ();
308
304
309
305
Command *NewCmd = MGraphBuilder.addHostAccessor (Req, AuxiliaryCmds);
310
306
if (!NewCmd)
@@ -314,7 +310,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
314
310
315
311
std::vector<Command *> ToCleanUp;
316
312
{
317
- ReadLockT ReadLock (MGraphLock );
313
+ ReadLockT Lock = acquireReadLock ( );
318
314
EnqueueResultT Res;
319
315
bool Enqueued;
320
316
@@ -342,7 +338,7 @@ void Scheduler::releaseHostAccessor(Requirement *Req) {
342
338
343
339
std::vector<Command *> ToCleanUp;
344
340
{
345
- ReadLockT Lock (MGraphLock );
341
+ ReadLockT Lock = acquireReadLock ( );
346
342
347
343
assert (BlockedCmd && " Can't find appropriate command to unblock" );
348
344
@@ -416,27 +412,6 @@ Scheduler::~Scheduler() {
416
412
cleanupCommands ({});
417
413
}
418
414
419
- void Scheduler::acquireWriteLock (WriteLockT &Lock) {
420
- #ifdef _WIN32
421
- // Avoiding deadlock situation for MSVC. std::shared_timed_mutex specification
422
- // does not specify a priority for shared and exclusive accesses. It will be a
423
- // deadlock in MSVC's std::shared_timed_mutex implementation, if exclusive
424
- // access occurs after shared access.
425
- // TODO: after switching to C++17, change std::shared_timed_mutex to
426
- // std::shared_mutex and use std::lock_guard here both for Windows and Linux.
427
- while (!Lock.try_lock_for (std::chrono::milliseconds (10 ))) {
428
- // Without yield while loop acts like endless while loop and occupies the
429
- // whole CPU when multiple command groups are created in multiple host
430
- // threads
431
- std::this_thread::yield ();
432
- }
433
- #else
434
- // It is a deadlock on UNIX in implementation of lock and lock_shared, if
435
- // try_lock in the loop above will be executed, so using a single lock here
436
- Lock.lock ();
437
- #endif // _WIN32
438
- }
439
-
440
415
MemObjRecord *Scheduler::getMemObjRecord (const Requirement *const Req) {
441
416
return Req->MSYCLMemObj ->MRecord .get ();
442
417
}
@@ -472,6 +447,31 @@ void Scheduler::cleanupCommands(const std::vector<Command *> &Cmds) {
472
447
}
473
448
}
474
449
450
+ void Scheduler::NotifyHostTaskCompletion (Command *Cmd, Command *BlockingCmd) {
451
+ // Completing command's event along with unblocking enqueue readiness of
452
+ // empty command may lead to quick deallocation of MThisCmd by some cleanup
453
+ // process. Thus we'll copy deps prior to completing of event and unblocking
454
+ // of empty command.
455
+ // Also, it's possible to have record deallocated prior to enqueue process.
456
+ // Thus we employ read-lock of graph.
457
+
458
+ std::vector<Command *> ToCleanUp;
459
+ {
460
+ ReadLockT Lock = acquireReadLock ();
461
+
462
+ std::vector<DepDesc> Deps = Cmd->MDeps ;
463
+
464
+ // update self-event status
465
+ Cmd->getEvent ()->setComplete ();
466
+
467
+ BlockingCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
468
+
469
+ for (const DepDesc &Dep : Deps)
470
+ Scheduler::enqueueLeavesOfReqUnlocked (Dep.MDepRequirement , ToCleanUp);
471
+ }
472
+ cleanupCommands (ToCleanUp);
473
+ }
474
+
475
475
} // namespace detail
476
476
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
477
477
} // namespace sycl
0 commit comments