Skip to content

Commit ca630f3

Browse files
committed
[SYCL] do not store last event as a dendency for in-order queues
For in-order queues, the ordering is guaranteed by UR. The only expception is Host Task - if the last event is a host task, then enqueue a barrier.
1 parent 08d11bc commit ca630f3

File tree

5 files changed

+111
-155
lines changed

5 files changed

+111
-155
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 69 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -136,18 +136,19 @@ queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
136136
return DepEvents;
137137

138138
QueueLock.lock();
139-
EventImplPtr ExtraEvent = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
140-
: MExtGraphDeps.LastEventPtr;
141139
std::optional<event> ExternalEvent = popExternalEvent();
142140

143-
if (!ExternalEvent && !ExtraEvent)
141+
if (!ExternalEvent && !LastHostTaskEvent)
144142
return DepEvents;
145143

146144
MutableVec = DepEvents;
147145
if (ExternalEvent)
148146
MutableVec.push_back(*ExternalEvent);
149-
if (ExtraEvent)
150-
MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
147+
if (LastHostTaskEvent) {
148+
MutableVec.push_back(
149+
detail::createSyclObjFromImpl<event>(LastHostTaskEvent));
150+
LastHostTaskEvent = nullptr;
151+
}
151152
return MutableVec;
152153
}
153154

@@ -282,20 +283,21 @@ event queue_impl::memcpyFromDeviceGlobal(
282283
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
283284
}
284285

285-
sycl::detail::optional<event> queue_impl::getLastEvent() {
286-
// The external event is required to finish last if set, so it is considered
287-
// the last event if present.
286+
sycl::detail::optional<event>
287+
queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
288288
if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read())
289289
return ExternalEvent;
290290

291-
std::lock_guard<std::mutex> Lock{MMutex};
292-
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr)
291+
if (MEmpty) {
293292
return std::nullopt;
294-
if (MDiscardEvents)
295-
return createDiscardedEvent();
296-
if (!MGraph.expired() && MExtGraphDeps.LastEventPtr)
297-
return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr);
298-
return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr);
293+
}
294+
295+
if (LastHostTaskEvent) {
296+
return detail::createSyclObjFromImpl<event>(LastHostTaskEvent);
297+
}
298+
299+
// We insert a marker to represent an event at end.
300+
return detail::createSyclObjFromImpl<event>(insertMarkerEvent(Self));
299301
}
300302

301303
void queue_impl::addEvent(const event &Event) {
@@ -375,11 +377,11 @@ event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
375377
};
376378
detail::type_erased_cgfo_ty CGF{L};
377379

378-
if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
379-
submit_without_event(CGF, Self, SI,
380-
/*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
381-
return createDiscardedEvent();
382-
}
380+
// if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
381+
// submit_without_event(CGF, Self, SI,
382+
// /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
383+
// return createDiscardedEvent();
384+
// }
383385
return submit_with_event(CGF, Self, SI,
384386
/*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
385387
}
@@ -396,6 +398,32 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
396398
{
397399
std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);
398400

401+
if (isInOrder()) {
402+
Lock.lock();
403+
std::optional<event> ExternalEvent = popExternalEvent();
404+
405+
if (LastHostTaskEvent) {
406+
// TODO: this should be in finalize?
407+
LastHostTaskEvent->wait(LastHostTaskEvent);
408+
LastHostTaskEvent = nullptr;
409+
}
410+
411+
std::vector<event> WaitEvents;
412+
if (ExternalEvent)
413+
WaitEvents.emplace_back(std::move(*ExternalEvent));
414+
415+
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
416+
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
417+
{
418+
NestedCallsTracker tracker;
419+
ur_event_handle_t UREvent = nullptr;
420+
MemOpFunc(MemOpArgs..., getUrEvents(WaitEvents), &UREvent, EventImpl);
421+
EventImpl->setHandle(UREvent);
422+
EventImpl->setEnqueued();
423+
}
424+
return discard_or_return(ResEvent);
425+
}
426+
399427
std::vector<event> MutableDepEvents;
400428
const std::vector<event> &ExpandedDepEvents =
401429
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
@@ -404,22 +432,22 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
404432
// handler rather than by-passing the scheduler.
405433
if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass(
406434
ExpandedDepEvents, MContext)) {
407-
if ((MDiscardEvents || !CallerNeedsEvent) &&
408-
supportsDiscardingPiEvents()) {
409-
NestedCallsTracker tracker;
410-
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
411-
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
412-
413-
event DiscardedEvent = createDiscardedEvent();
414-
if (isInOrder()) {
415-
// Store the discarded event for proper in-order dependency tracking.
416-
auto &EventToStoreIn = MGraph.expired()
417-
? MDefaultGraphDeps.LastEventPtr
418-
: MExtGraphDeps.LastEventPtr;
419-
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
420-
}
421-
return DiscardedEvent;
422-
}
435+
// if ((MDiscardEvents || !CallerNeedsEvent) &&
436+
// supportsDiscardingPiEvents()) {
437+
// NestedCallsTracker tracker;
438+
// MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
439+
// /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
440+
441+
// event DiscardedEvent = createDiscardedEvent();
442+
// if (isInOrder()) {
443+
// // Store the discarded event for proper in-order dependency
444+
// tracking. auto &EventToStoreIn = MGraph.expired()
445+
// ? MDefaultGraphDeps.LastEventPtr
446+
// : MExtGraphDeps.LastEventPtr;
447+
// EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
448+
// }
449+
// return DiscardedEvent;
450+
// }
423451

424452
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
425453
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
@@ -443,12 +471,6 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
443471
}
444472
}
445473

446-
if (isInOrder()) {
447-
auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
448-
: MExtGraphDeps.LastEventPtr;
449-
EventToStoreIn = EventImpl;
450-
}
451-
452474
return discard_or_return(ResEvent);
453475
}
454476
}
@@ -569,6 +591,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
569591
std::vector<std::weak_ptr<event_impl>> WeakEvents;
570592
{
571593
std::lock_guard<std::mutex> Lock(MMutex);
594+
if (LastHostTaskEvent) {
595+
LastHostTaskEvent->wait(LastHostTaskEvent);
596+
LastHostTaskEvent = nullptr;
597+
}
598+
572599
WeakEvents.swap(MEventsWeak);
573600

574601
MMissedCleanupRequests.unset(
@@ -684,23 +711,6 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const {
684711
}
685712

686713
bool queue_impl::ext_oneapi_empty() const {
687-
// If we have in-order queue where events are not discarded then just check
688-
// the status of the last event.
689-
if (isInOrder() && !MDiscardEvents) {
690-
std::lock_guard<std::mutex> Lock(MMutex);
691-
// If there is no last event we know that no work has been submitted, so it
692-
// must be trivially empty.
693-
if (!MDefaultGraphDeps.LastEventPtr)
694-
return true;
695-
// Otherwise, check if the last event is finished.
696-
// Note that we fall back to the backend query if the event was discarded,
697-
// which may happend despite the queue not being a discard event queue.
698-
if (!MDefaultGraphDeps.LastEventPtr->isDiscarded())
699-
return MDefaultGraphDeps.LastEventPtr
700-
->get_info<info::event::command_execution_status>() ==
701-
info::event_command_status::complete;
702-
}
703-
704714
// Check the status of the backend queue if this is not a host queue.
705715
ur_bool_t IsReady = false;
706716
getAdapter()->call<UrApiKind::urQueueGetInfo>(

sycl/source/detail/queue_impl.hpp

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,6 @@ class queue_impl {
180180
#endif
181181
}
182182

183-
sycl::detail::optional<event> getLastEvent();
184-
185183
private:
186184
void queue_impl_interop(ur_queue_handle_t UrQueue) {
187185
if (has_property<ext::oneapi::property::queue::discard_events>() &&
@@ -676,6 +674,9 @@ class queue_impl {
676674
return Result;
677675
}
678676

677+
sycl::detail::optional<event>
678+
getLastEvent(const std::shared_ptr<queue_impl> &Self);
679+
679680
const std::vector<event> &
680681
getExtendDependencyList(const std::vector<event> &DepEvents,
681682
std::vector<event> &MutableVec,
@@ -742,44 +743,32 @@ class queue_impl {
742743
// Hence, here is the lock for thread-safety.
743744
std::lock_guard<std::mutex> Lock{MMutex};
744745

745-
auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
746-
: MExtGraphDeps.LastEventPtr;
747-
748-
// This dependency is needed for the following purposes:
749-
// - host tasks are handled by the runtime and cannot be implicitly
750-
// synchronized by the backend.
751-
// - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
752-
// by a host task. This dependency allows to build the enqueue order in
753-
// the RT but will not be passed to the backend. See getPIEvents in
754-
// Command.
755-
if (EventToBuildDeps) {
756-
// In the case where the last event was discarded and we are to run a
757-
// host_task, we insert a barrier into the queue and use the resulting
758-
// event as the dependency for the host_task.
759-
// Note that host_task events can never be discarded, so this will not
760-
// insert barriers between host_task enqueues.
761-
if (EventToBuildDeps->isDiscarded() &&
762-
Handler.getType() == CGType::CodeplayHostTask)
763-
EventToBuildDeps = insertHelperBarrier(Handler);
764-
765-
// depends_on after an async alloc is explicitly disallowed. Async alloc
766-
// handles in order queue dependencies preemptively, so we skip them.
767-
// Note: This could be improved by moving the handling of dependencies
768-
// to before calling the CGF.
769-
if (!EventToBuildDeps->isDiscarded() &&
770-
!(Handler.getType() == CGType::AsyncAlloc))
771-
Handler.depends_on(EventToBuildDeps);
772-
}
773-
774746
// If there is an external event set, add it as a dependency and clear it.
775747
// We do not need to hold the lock as MLastEventMtx will ensure the last
776748
// event reflects the corresponding external event dependence as well.
777749
std::optional<event> ExternalEvent = popExternalEvent();
778750
if (ExternalEvent)
779751
Handler.depends_on(*ExternalEvent);
780752

753+
if (LastHostTaskEvent && Handler.getType() == CGType::CodeplayHostTask) {
754+
// is this needed?
755+
Handler.depends_on(
756+
detail::createSyclObjFromImpl<event>(LastHostTaskEvent));
757+
LastHostTaskEvent = nullptr;
758+
} else if (!LastHostTaskEvent &&
759+
Handler.getType() == CGType::CodeplayHostTask) {
760+
auto Event = insertHelperBarrier(Handler);
761+
Handler.depends_on(Event);
762+
} else if (LastHostTaskEvent) {
763+
LastHostTaskEvent->wait(LastHostTaskEvent);
764+
LastHostTaskEvent = nullptr;
765+
}
766+
781767
auto EventRet = Handler.finalize();
782-
EventToBuildDeps = getSyclObjImpl(EventRet);
768+
769+
if (getSyclObjImpl(EventRet)->isHost()) {
770+
LastHostTaskEvent = getSyclObjImpl(EventRet);
771+
}
783772

784773
return EventRet;
785774
}
@@ -849,6 +838,7 @@ class queue_impl {
849838
template <typename HandlerType = handler>
850839
event finalizeHandler(HandlerType &Handler,
851840
const optional<SubmitPostProcessF> &PostProcessorFunc) {
841+
MEmpty = false;
852842
if (PostProcessorFunc) {
853843
return finalizeHandlerPostProcess(Handler, PostProcessorFunc);
854844
} else {
@@ -956,6 +946,11 @@ class queue_impl {
956946
/// List of queues created for FPGA device from a single SYCL queue.
957947
ur_queue_handle_t MQueue;
958948

949+
// TODO: this is for in-order queue only. Move it to separate struct.
950+
EventImplPtr LastHostTaskEvent = nullptr;
951+
952+
bool MEmpty = true;
953+
959954
// Access should be guarded with MMutex
960955
struct DependencyTrackingItems {
961956
// This event is employed for enhanced dependency tracking with in-order

sycl/source/queue.cpp

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -293,24 +293,6 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
293293
impl->wait_and_throw(CodeLoc);
294294
}
295295

296-
static event
297-
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
298-
// This function should not be called when a queue is recording to a graph,
299-
// as a graph can record from multiple queues and we cannot guarantee the
300-
// last node added by an in-order queue will be the last node added to the
301-
// graph.
302-
assert(!QueueImpl->hasCommandGraph() &&
303-
"Should not be called in on graph recording.");
304-
305-
sycl::detail::optional<event> LastEvent = QueueImpl->getLastEvent();
306-
if (LastEvent)
307-
return *LastEvent;
308-
309-
// If there was no last event, we create an empty one.
310-
return detail::createSyclObjFromImpl<event>(
311-
std::make_shared<detail::event_impl>(std::nullopt));
312-
}
313-
314296
/// Prevents any commands submitted afterward to this queue from executing
315297
/// until all commands previously submitted to this queue have entered the
316298
/// complete state.
@@ -321,10 +303,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
321303
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
322304
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
323305
!impl->MIsProfilingEnabled) {
324-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
325-
// If the last event was discarded, fall back to enqueuing a barrier.
326-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
327-
return InOrderLastEvent;
306+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
328307
}
329308

330309
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
@@ -346,13 +325,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
346325
auto EventImpl = detail::getSyclObjImpl(Event);
347326
return (EventImpl->isDefaultConstructed() || EventImpl->isNOP()) &&
348327
!EventImpl->hasCommandGraph();
349-
});
328+
}); // TODO: is this needed?
350329
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
351330
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
352-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
353-
// If the last event was discarded, fall back to enqueuing a barrier.
354-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
355-
return InOrderLastEvent;
331+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
356332
}
357333

358334
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
@@ -432,20 +408,7 @@ sycl::detail::optional<event> queue::ext_oneapi_get_last_event_impl() const {
432408
make_error_code(errc::invalid),
433409
"ext_oneapi_get_last_event() can only be called on in-order queues.");
434410

435-
sycl::detail::optional<event> LastEvent = impl->getLastEvent();
436-
437-
// If there was no last event, the queue is yet to have any work submitted and
438-
// we return a std::nullopt.
439-
if (!LastEvent)
440-
return std::nullopt;
441-
442-
// If the last event was discarded or a NOP, we insert a marker to represent
443-
// an event at end.
444-
auto LastEventImpl = detail::getSyclObjImpl(*LastEvent);
445-
if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP())
446-
LastEvent =
447-
detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
448-
return LastEvent;
411+
return impl->getLastEvent(impl);
449412
}
450413

451414
void queue::ext_oneapi_set_external_event(const event &external_event) {

sycl/test-e2e/InOrderEventsExt/get_last_event.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@ int Check(const sycl::queue &Q, const char *CheckName, const F &CheckFunc) {
3636
<< std::endl;
3737
return 1;
3838
}
39-
if (*E != *LastEvent) {
40-
std::cout << "Failed " << CheckName << std::endl;
41-
return 1;
42-
}
4339
return 0;
4440
}
4541

0 commit comments

Comments
 (0)