@@ -235,7 +235,7 @@ class DispatchHostTask {
235
235
// Thus we employ read-lock of graph.
236
236
{
237
237
Scheduler &Sched = Scheduler::getInstance ();
238
- std::shared_lock<std::shared_timed_mutex> Lock (Sched.MGraphLock );
238
+ Scheduler::ReadLockT Lock (Sched.MGraphLock );
239
239
240
240
std::vector<DepDesc> Deps = MThisCmd->MDeps ;
241
241
@@ -481,7 +481,7 @@ void Command::makeTraceEventEpilog() {
481
481
#endif
482
482
}
483
483
484
- void Command::processDepEvent (EventImplPtr DepEvent, const DepDesc &Dep) {
484
+ Command * Command::processDepEvent (EventImplPtr DepEvent, const DepDesc &Dep) {
485
485
const QueueImplPtr &WorkerQueue = getWorkerQueue ();
486
486
const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr ();
487
487
@@ -493,21 +493,25 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
493
493
// call to waitInternal() is in waitForPreparedHostEvents() as it's called
494
494
// from enqueue process functions
495
495
MPreparedHostDepsEvents.push_back (DepEvent);
496
- return ;
496
+ return nullptr ;
497
497
}
498
498
499
+ Command *ConnectionCmd = nullptr ;
500
+
499
501
// Do not add redundant event dependencies for in-order queues.
500
502
if (Dep.MDepCommand && Dep.MDepCommand ->getWorkerQueue () == WorkerQueue &&
501
503
WorkerQueue->has_property <property::queue::in_order>())
502
- return ;
504
+ return nullptr ;
503
505
504
506
ContextImplPtr DepEventContext = DepEvent->getContextImpl ();
505
507
// If contexts don't match we'll connect them using host task
506
508
if (DepEventContext != WorkerContext && !WorkerContext->is_host ()) {
507
509
Scheduler::GraphBuilder &GB = Scheduler::getInstance ().MGraphBuilder ;
508
- GB.connectDepEvent (this , DepEvent, Dep);
510
+ ConnectionCmd = GB.connectDepEvent (this , DepEvent, Dep);
509
511
} else
510
512
MPreparedDepsEvents.push_back (std::move (DepEvent));
513
+
514
+ return ConnectionCmd;
511
515
}
512
516
513
517
const ContextImplPtr &Command::getWorkerContext () const {
@@ -516,19 +520,23 @@ const ContextImplPtr &Command::getWorkerContext() const {
516
520
517
521
const QueueImplPtr &Command::getWorkerQueue () const { return MQueue; }
518
522
519
- void Command::addDep (DepDesc NewDep) {
523
+ Command *Command::addDep (DepDesc NewDep) {
524
+ Command *ConnectionCmd = nullptr ;
525
+
520
526
if (NewDep.MDepCommand ) {
521
- processDepEvent (NewDep.MDepCommand ->getEvent (), NewDep);
527
+ ConnectionCmd = processDepEvent (NewDep.MDepCommand ->getEvent (), NewDep);
522
528
}
523
529
MDeps.push_back (NewDep);
524
530
#ifdef XPTI_ENABLE_INSTRUMENTATION
525
531
emitEdgeEventForCommandDependence (
526
532
NewDep.MDepCommand , (void *)NewDep.MDepRequirement ->MSYCLMemObj ,
527
533
accessModeToString (NewDep.MDepRequirement ->MAccessMode ), true );
528
534
#endif
535
+
536
+ return ConnectionCmd;
529
537
}
530
538
531
- void Command::addDep (EventImplPtr Event) {
539
+ Command * Command::addDep (EventImplPtr Event) {
532
540
#ifdef XPTI_ENABLE_INSTRUMENTATION
533
541
// We need this for just the instrumentation, so guarding it will prevent
534
542
// unused variable warnings when instrumentation is turned off
@@ -538,7 +546,7 @@ void Command::addDep(EventImplPtr Event) {
538
546
emitEdgeEventForEventDependence (Cmd, PiEventAddr);
539
547
#endif
540
548
541
- processDepEvent (std::move (Event), DepDesc{nullptr , nullptr , nullptr });
549
+ return processDepEvent (std::move (Event), DepDesc{nullptr , nullptr , nullptr });
542
550
}
543
551
544
552
void Command::emitEnqueuedEventSignal (RT::PiEvent &PiEventAddr) {
@@ -732,7 +740,10 @@ AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req,
732
740
// Node event must be created before the dependent edge is added to this node,
733
741
// so this call must be before the addDep() call.
734
742
emitInstrumentationDataProxy ();
735
- addDep (DepDesc (nullptr , getRequirement (), this ));
743
+ // "Nothing to depend on"
744
+ Command *ConnectionCmd = addDep (DepDesc (nullptr , getRequirement (), this ));
745
+ assert (ConnectionCmd == nullptr );
746
+ (void )ConnectionCmd;
736
747
}
737
748
738
749
void AllocaCommand::emitInstrumentationData () {
@@ -795,7 +806,8 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
795
806
}
796
807
797
808
AllocaSubBufCommand::AllocaSubBufCommand (QueueImplPtr Queue, Requirement Req,
798
- AllocaCommandBase *ParentAlloca)
809
+ AllocaCommandBase *ParentAlloca,
810
+ std::vector<Command *> &ToEnqueue)
799
811
: AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue),
800
812
std::move(Req),
801
813
/* LinkedAllocaCmd*/ nullptr),
@@ -804,7 +816,10 @@ AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req,
804
816
// is added to this node, so this call must be before
805
817
// the addDep() call.
806
818
emitInstrumentationDataProxy ();
807
- addDep (DepDesc (MParentAlloca, getRequirement (), MParentAlloca));
819
+ Command *ConnectionCmd =
820
+ addDep (DepDesc (MParentAlloca, getRequirement (), MParentAlloca));
821
+ if (ConnectionCmd)
822
+ ToEnqueue.push_back (ConnectionCmd);
808
823
}
809
824
810
825
void AllocaSubBufCommand::emitInstrumentationData () {
@@ -1329,7 +1344,10 @@ void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd,
1329
1344
MRequirements.emplace_back (ReqRef);
1330
1345
const Requirement *const StoredReq = &MRequirements.back ();
1331
1346
1332
- addDep (DepDesc{DepCmd, StoredReq, AllocaCmd});
1347
+ // EmptyCommand is always host one, so we believe that result of addDep is nil
1348
+ Command *Cmd = addDep (DepDesc{DepCmd, StoredReq, AllocaCmd});
1349
+ assert (Cmd == nullptr && " Conection command should be null for EmptyCommand" );
1350
+ (void )Cmd;
1333
1351
}
1334
1352
1335
1353
void EmptyCommand::emitInstrumentationData () {
0 commit comments