Skip to content

Commit b935d6b

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into esimd_bfloat16
2 parents ae82662 + 346a6c5 commit b935d6b

14 files changed

+567
-95
lines changed

sycl/include/sycl/reduction.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2272,7 +2272,7 @@ void reduCGFunc(handler &CGH, KernelType KernelFunc,
22722272

22732273
namespace reduction {
22742274
namespace aux_krn {
2275-
template <class KernelName, class Accessor> struct Multi;
2275+
template <class KernelName, class Predicate> struct Multi;
22762276
} // namespace aux_krn
22772277
} // namespace reduction
22782278
template <typename KernelName, typename KernelType, typename... Reductions,
@@ -2312,7 +2312,7 @@ size_t reduAuxCGFunc(handler &CGH, size_t NWorkItems, size_t MaxWGSize,
23122312
auto AccReduIndices = filterSequence<Reductions...>(Predicate, ReduIndices);
23132313
associateReduAccsWithHandler(CGH, ReduTuple, AccReduIndices);
23142314
using Name = __sycl_reduction_kernel<reduction::aux_krn::Multi, KernelName,
2315-
decltype(OutAccsTuple)>;
2315+
decltype(Predicate)>;
23162316
// TODO: Opportunity to parallelize across number of elements
23172317
range<1> GlobalRange = {HasUniformWG ? NWorkItems : NWorkGroups * WGSize};
23182318
nd_range<1> Range{GlobalRange, range<1>(WGSize)};

sycl/source/detail/event_impl.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,10 +342,16 @@ event_impl::get_info<info::event::command_execution_status>() {
342342
if (MState == HES_Discarded)
343343
return info::event_command_status::ext_oneapi_unknown;
344344

345-
if (!MHostEvent && MEvent) {
346-
return get_event_info<info::event::command_execution_status>(
347-
this->getHandleRef(), this->getPlugin());
345+
if (!MHostEvent) {
346+
// Command is enqueued and PiEvent is ready
347+
if (MEvent)
348+
return get_event_info<info::event::command_execution_status>(
349+
this->getHandleRef(), this->getPlugin());
350+
// Command is blocked and not enqueued, PiEvent is not assigned yet
351+
else if (MCommand)
352+
return sycl::info::event_command_status::submitted;
348353
}
354+
349355
return MHostEvent && MState.load() != HES_Complete
350356
? sycl::info::event_command_status::submitted
351357
: info::event_command_status::complete;

sycl/source/detail/event_impl.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ class event_impl {
212212
}
213213
bool needsCleanupAfterWait() { return MNeedsCleanupAfterWait; }
214214

215+
/// Returns worker queue for command.
216+
///
217+
/// @return a reference to MWorkerQueue.
218+
QueueImplPtr &getWorkerQueue() { return MWorkerQueue; };
219+
215220
/// Checks if an event is in a fully intialized state. Default-constructed
216221
/// events will return true only after having initialized its native event,
217222
/// while other events will assume that they are fully initialized at
@@ -243,6 +248,8 @@ class event_impl {
243248
std::weak_ptr<queue_impl> MQueue;
244249
const bool MIsProfilingEnabled = false;
245250

251+
QueueImplPtr MWorkerQueue;
252+
246253
/// Dependency events prepared for waiting by backend.
247254
std::vector<EventImplPtr> MPreparedDepsEvents;
248255
std::vector<EventImplPtr> MPreparedHostDepsEvents;

sycl/source/detail/queue_impl.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,7 @@ class queue_impl {
120120
}
121121
if (!MHostQueue) {
122122
const QueueOrder QOrder =
123-
MPropList.has_property<property::queue::in_order>()
124-
? QueueOrder::Ordered
125-
: QueueOrder::OOO;
123+
MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO;
126124
MQueues.push_back(createQueue(QOrder));
127125
}
128126
}
@@ -202,6 +200,8 @@ class queue_impl {
202200
/// \return true if this queue has discard_events support.
203201
bool has_discard_events_support() const { return MHasDiscardEventsSupport; }
204202

203+
bool isInOrder() const { return MIsInorder; }
204+
205205
/// Queries SYCL queue for information.
206206
///
207207
/// The return type depends on information being queried.

sycl/source/detail/scheduler/commands.cpp

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,34 @@ static std::string commandToName(Command::CommandType Type) {
203203
}
204204
#endif
205205

206-
static std::vector<RT::PiEvent>
207-
getPiEvents(const std::vector<EventImplPtr> &EventImpls) {
206+
std::vector<RT::PiEvent>
207+
Command::getPiEvents(const std::vector<EventImplPtr> &EventImpls) const {
208208
std::vector<RT::PiEvent> RetPiEvents;
209209
for (auto &EventImpl : EventImpls) {
210-
if (EventImpl->getHandleRef() != nullptr)
211-
RetPiEvents.push_back(EventImpl->getHandleRef());
210+
if (EventImpl->getHandleRef() == nullptr)
211+
continue;
212+
213+
// Do not add redundant event dependencies for in-order queues.
214+
// At this stage dependency is definitely pi task and need to check if
215+
// current one is a host task. In this case we should not skip pi event due
216+
// to different sync mechanisms for different task types on in-order queue.
217+
const QueueImplPtr &WorkerQueue = getWorkerQueue();
218+
if (EventImpl->getWorkerQueue() == WorkerQueue &&
219+
WorkerQueue->isInOrder() && !isHostTask())
220+
continue;
221+
222+
RetPiEvents.push_back(EventImpl->getHandleRef());
212223
}
213224

214225
return RetPiEvents;
215226
}
216227

228+
bool Command::isHostTask() const {
229+
return (MType == CommandType::RUN_CG) /* host task has this type also */ &&
230+
((static_cast<const ExecCGCommand *>(this))->getCG().getType() ==
231+
CG::CGTYPE::CodeplayHostTask);
232+
}
233+
217234
static void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls,
218235
const QueueImplPtr &Queue) {
219236
for (auto &EventImpl : EventImpls) {
@@ -240,7 +257,8 @@ class DispatchHostTask {
240257
// sophisticated waiting mechanism to allow to utilize this thread for any
241258
// other available job and resume once all required events are ready.
242259
for (auto &PluginWithEvents : RequiredEventsPerPlugin) {
243-
std::vector<RT::PiEvent> RawEvents = getPiEvents(PluginWithEvents.second);
260+
std::vector<RT::PiEvent> RawEvents =
261+
MThisCmd->getPiEvents(PluginWithEvents.second);
244262
try {
245263
PluginWithEvents.first->call<PiApiKind::piEventsWait>(RawEvents.size(),
246264
RawEvents.data());
@@ -393,10 +411,12 @@ void Command::waitForEvents(QueueImplPtr Queue,
393411
Command::Command(CommandType Type, QueueImplPtr Queue)
394412
: MQueue(std::move(Queue)),
395413
MEvent(std::make_shared<detail::event_impl>(MQueue)),
414+
MWorkerQueue(MEvent->getWorkerQueue()),
396415
MPreparedDepsEvents(MEvent->getPreparedDepsEvents()),
397416
MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents()),
398417
MType(Type) {
399418
MSubmittedQueue = MQueue;
419+
MWorkerQueue = MQueue;
400420
MEvent->setCommand(this);
401421
MEvent->setContextImpl(MQueue->getContextImplPtr());
402422
MEvent->setStateIncomplete();
@@ -600,12 +620,6 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
600620

601621
Command *ConnectionCmd = nullptr;
602622

603-
// Do not add redundant event dependencies for in-order queues.
604-
if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue &&
605-
WorkerQueue->has_property<property::queue::in_order>() &&
606-
getType() != CommandType::HOST_TASK)
607-
return nullptr;
608-
609623
ContextImplPtr DepEventContext = DepEvent->getContextImpl();
610624
// If contexts don't match we'll connect them using host task
611625
if (DepEventContext != WorkerContext && !WorkerContext->is_host()) {
@@ -621,14 +635,14 @@ const ContextImplPtr &Command::getWorkerContext() const {
621635
return MQueue->getContextImplPtr();
622636
}
623637

624-
const QueueImplPtr &Command::getWorkerQueue() const { return MQueue; }
638+
const QueueImplPtr &Command::getWorkerQueue() const {
639+
assert(MWorkerQueue && "MWorkerQueue must not be nullptr");
640+
return MWorkerQueue;
641+
}
625642

626643
bool Command::producesPiEvent() const { return true; }
627644

628-
bool Command::supportsPostEnqueueCleanup() const {
629-
// Isolated commands are cleaned up separately
630-
return !MUsers.empty() || !MDeps.empty();
631-
}
645+
bool Command::supportsPostEnqueueCleanup() const { return true; }
632646

633647
Command *Command::addDep(DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
634648
Command *ConnectionCmd = nullptr;
@@ -1298,6 +1312,9 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
12981312
if (!MSrcQueue->is_host()) {
12991313
MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
13001314
}
1315+
1316+
MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue;
1317+
13011318
emitInstrumentationDataProxy();
13021319
}
13031320

@@ -1335,10 +1352,6 @@ const ContextImplPtr &MemCpyCommand::getWorkerContext() const {
13351352
return getWorkerQueue()->getContextImplPtr();
13361353
}
13371354

1338-
const QueueImplPtr &MemCpyCommand::getWorkerQueue() const {
1339-
return MQueue->is_host() ? MSrcQueue : MQueue;
1340-
}
1341-
13421355
bool MemCpyCommand::producesPiEvent() const {
13431356
// TODO remove this workaround once the batching issue is addressed in Level
13441357
// Zero plugin.
@@ -1481,6 +1494,8 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq,
14811494
MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
14821495
}
14831496

1497+
MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue;
1498+
14841499
emitInstrumentationDataProxy();
14851500
}
14861501

@@ -1518,10 +1533,6 @@ const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const {
15181533
return getWorkerQueue()->getContextImplPtr();
15191534
}
15201535

1521-
const QueueImplPtr &MemCpyCommandHost::getWorkerQueue() const {
1522-
return MQueue->is_host() ? MSrcQueue : MQueue;
1523-
}
1524-
15251536
pi_int32 MemCpyCommandHost::enqueueImp() {
15261537
const QueueImplPtr &Queue = getWorkerQueue();
15271538
waitForPreparedHostEvents();

sycl/source/detail/scheduler/commands.hpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,19 +199,28 @@ class Command {
199199

200200
/// Get the queue this command will be submitted to. Could differ from MQueue
201201
/// for memory copy commands.
202-
virtual const QueueImplPtr &getWorkerQueue() const;
202+
const QueueImplPtr &getWorkerQueue() const;
203203

204204
/// Returns true iff the command produces a PI event on non-host devices.
205205
virtual bool producesPiEvent() const;
206206

207207
/// Returns true iff this command can be freed by post enqueue cleanup.
208208
virtual bool supportsPostEnqueueCleanup() const;
209209

210+
/// Collect PI events from EventImpls and filter out some of them in case of
211+
/// in order queue
212+
std::vector<RT::PiEvent>
213+
getPiEvents(const std::vector<EventImplPtr> &EventImpls) const;
214+
215+
bool isHostTask() const;
216+
210217
protected:
211218
QueueImplPtr MQueue;
212219
QueueImplPtr MSubmittedQueue;
213220
EventImplPtr MEvent;
214221

222+
QueueImplPtr &MWorkerQueue;
223+
215224
/// Dependency events prepared for waiting by backend.
216225
/// See processDepEvent for details.
217226
std::vector<EventImplPtr> &MPreparedDepsEvents;
@@ -252,6 +261,10 @@ class Command {
252261
return MPreparedHostDepsEvents;
253262
}
254263

264+
const std::vector<EventImplPtr> &getPreparedDepsEvents() const {
265+
return MPreparedDepsEvents;
266+
}
267+
255268
/// Contains list of dependencies(edges)
256269
std::vector<DepDesc> MDeps;
257270
/// Contains list of commands that depend on the command.
@@ -492,7 +505,6 @@ class MemCpyCommand : public Command {
492505
const Requirement *getRequirement() const final { return &MDstReq; }
493506
void emitInstrumentationData() final;
494507
const ContextImplPtr &getWorkerContext() const final;
495-
const QueueImplPtr &getWorkerQueue() const final;
496508
bool producesPiEvent() const final;
497509

498510
private:
@@ -517,7 +529,6 @@ class MemCpyCommandHost : public Command {
517529
const Requirement *getRequirement() const final { return &MDstReq; }
518530
void emitInstrumentationData() final;
519531
const ContextImplPtr &getWorkerContext() const final;
520-
const QueueImplPtr &getWorkerQueue() const final;
521532

522533
private:
523534
pi_int32 enqueueImp() final;

sycl/source/detail/scheduler/graph_processor.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,20 @@ bool Scheduler::GraphProcessor::enqueueCommand(
5858
return false;
5959
}
6060

61-
// Recursively enqueue all the dependencies first and
62-
// exit immediately if any of the commands cannot be enqueued.
63-
for (DepDesc &Dep : Cmd->MDeps) {
64-
if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, ToCleanUp, Blocking))
65-
return false;
61+
// Recursively enqueue all the implicit + explicit backend level dependencies
62+
// first and exit immediately if any of the commands cannot be enqueued.
63+
for (const EventImplPtr &Event : Cmd->getPreparedDepsEvents()) {
64+
if (Command *DepCmd = static_cast<Command *>(Event->getCommand()))
65+
if (!enqueueCommand(DepCmd, EnqueueResult, ToCleanUp, Blocking))
66+
return false;
6667
}
6768

68-
// Asynchronous host operations (amongst dependencies of an arbitrary command)
69-
// are not supported (see Command::processDepEvent method). This impacts
70-
// operation of host-task feature a lot with hangs and long-runs. Hence we
71-
// have this workaround here.
72-
// This workaround is safe as long as the only asynchronous host operation we
73-
// have is a host task.
74-
// This may iterate over some of dependencies in Cmd->MDeps. Though, the
75-
// enqueue operation is idempotent and the second call will result in no-op.
76-
// TODO remove the workaround when proper fix for host-task dispatching is
77-
// implemented.
69+
// Recursively enqueue all the implicit + explicit host dependencies and
70+
// exit immediately if any of the commands cannot be enqueued.
71+
// Host task execution is asynchronous. In current implementation enqueue for
72+
// this command will wait till host task completion by waitInternal call on
73+
// MHostDepsEvents. TO FIX: implement enqueue of blocked commands on host task
74+
// completion stage and eliminate this event waiting in enqueue.
7875
for (const EventImplPtr &Event : Cmd->getPreparedHostDepsEvents()) {
7976
if (Command *DepCmd = static_cast<Command *>(Event->getCommand()))
8077
if (!enqueueCommand(DepCmd, EnqueueResult, ToCleanUp, Blocking))

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,6 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
157157
CleanUp();
158158
std::rethrow_exception(std::current_exception());
159159
}
160-
161-
// If there are no memory dependencies decouple and free the command.
162-
// Though, dismiss ownership of native kernel command group as it's
163-
// resources may be in use by backend and synchronization point here is
164-
// at native kernel execution finish.
165-
CleanUp();
166160
}
167161
}
168162
cleanupCommands(ToCleanUp);

sycl/unittests/scheduler/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT
2222
LeafLimitDiffContexts.cpp
2323
InOrderQueueSyncCheck.cpp
2424
RunOnHostIntelCG.cpp
25+
EnqueueWithDependsOnDeps.cpp
2526
)

0 commit comments

Comments
 (0)