Skip to content

Commit eaabc1f

Browse files
Further reduce the number of wait calls for blocked commands
Any blocked command was being added to MPreparedHostDepsEvents which led to waiting for the dependency event before enqueueing the dependant command rather than passing the dependency to PI.
1 parent dc32a66 commit eaabc1f

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,14 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
486486
const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr();
487487

488488
// 1. Async work is not supported for host device.
489-
// 2. The event handle can be null in case of, for example, alloca command,
490-
// which is currently synchronous, so don't generate OpenCL event.
491-
// Though, this event isn't host one as it's context isn't host one.
492-
if (DepEvent->is_host() || DepEvent->getHandleRef() == nullptr) {
489+
// 2. Some types of commands do not produce PI events after they are enqueued
490+
// (e.g. alloca). Note that we can't check the pi event to make that
491+
// distinction since the command might still be unenqueued at this point.
492+
bool PiEventExpected = !DepEvent->is_host();
493+
if (auto *DepCmd = static_cast<Command *>(DepEvent->getCommand()))
494+
PiEventExpected &= DepCmd->producesPiEvent();
495+
496+
if (!PiEventExpected) {
493497
// call to waitInternal() is in waitForPreparedHostEvents() as it's called
494498
// from enqueue process functions
495499
MPreparedHostDepsEvents.push_back(DepEvent);
@@ -520,6 +524,8 @@ const ContextImplPtr &Command::getWorkerContext() const {
520524

521525
const QueueImplPtr &Command::getWorkerQueue() const { return MQueue; }
522526

527+
bool Command::producesPiEvent() const { return true; }
528+
523529
Command *Command::addDep(DepDesc NewDep) {
524530
Command *ConnectionCmd = nullptr;
525531

@@ -731,6 +737,8 @@ void AllocaCommandBase::emitInstrumentationData() {
731737
#endif
732738
}
733739

740+
bool AllocaCommandBase::producesPiEvent() const { return false; }
741+
734742
AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req,
735743
bool InitFromUserData,
736744
AllocaCommandBase *LinkedAllocaCmd)
@@ -998,6 +1006,8 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
9981006
}
9991007
}
10001008

1009+
bool ReleaseCommand::producesPiEvent() const { return false; }
1010+
10011011
MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req,
10021012
void **DstPtr, QueueImplPtr Queue,
10031013
access::mode MapMode)
@@ -1392,6 +1402,8 @@ void EmptyCommand::printDot(std::ostream &Stream) const {
13921402
}
13931403
}
13941404

1405+
bool EmptyCommand::producesPiEvent() const { return false; }
1406+
13951407
void MemCpyCommandHost::printDot(std::ostream &Stream) const {
13961408
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
13971409

@@ -2193,6 +2205,10 @@ cl_int ExecCGCommand::enqueueImp() {
21932205
return PI_INVALID_OPERATION;
21942206
}
21952207

2208+
bool ExecCGCommand::producesPiEvent() const {
2209+
return MCommandGroup->getType() != CG::CGTYPE::CODEPLAY_HOST_TASK;
2210+
}
2211+
21962212
} // namespace detail
21972213
} // namespace sycl
21982214
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/scheduler/commands.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ class Command {
189189
/// for memory copy commands.
190190
virtual const QueueImplPtr &getWorkerQueue() const;
191191

192+
/// Returns true iff the command produces a PI event on non-host devices.
193+
virtual bool producesPiEvent() const;
194+
192195
protected:
193196
EventImplPtr MEvent;
194197
QueueImplPtr MQueue;
@@ -306,6 +309,8 @@ class EmptyCommand : public Command {
306309

307310
void emitInstrumentationData() override;
308311

312+
bool producesPiEvent() const final;
313+
309314
private:
310315
cl_int enqueueImp() final;
311316

@@ -323,6 +328,7 @@ class ReleaseCommand : public Command {
323328

324329
void printDot(std::ostream &Stream) const final;
325330
void emitInstrumentationData() override;
331+
bool producesPiEvent() const final;
326332

327333
private:
328334
cl_int enqueueImp() final;
@@ -347,6 +353,8 @@ class AllocaCommandBase : public Command {
347353

348354
void emitInstrumentationData() override;
349355

356+
bool producesPiEvent() const final;
357+
350358
void *MMemAllocation = nullptr;
351359

352360
/// Alloca command linked with current command.
@@ -518,6 +526,8 @@ class ExecCGCommand : public Command {
518526
MCommandGroup.release();
519527
}
520528

529+
bool producesPiEvent() const final;
530+
521531
private:
522532
cl_int enqueueImp() final;
523533

0 commit comments

Comments
 (0)