@@ -263,41 +263,54 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
263
263
WeakEvents.swap (MEventsWeak);
264
264
SharedEvents.swap (MEventsShared);
265
265
}
266
+ const detail::plugin &Plugin = getPlugin ();
266
267
// If the queue is either a host one or does not support OOO (and we use
267
268
// multiple in-order queues as a result of that), wait for each event
268
269
// directly. Otherwise, only wait for unenqueued or host task events, starting
269
270
// from the latest submitted task in order to minimize total amount of calls,
270
271
// then handle the rest with piQueueFinish.
271
- bool SupportsPiFinish = !is_host () && MSupportOOO;
272
- for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
273
- EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
274
- if (std::shared_ptr<event_impl> EventImplSharedPtr =
275
- EventImplWeakPtrIt->lock ()) {
276
- // A nullptr PI event indicates that piQueueFinish will not cover it,
277
- // either because it's a host task event or an unenqueued one.
278
- if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef ()) {
279
- EventImplSharedPtr->wait (EventImplSharedPtr);
280
- }
281
- }
282
- }
283
- if (SupportsPiFinish) {
284
- const detail::plugin &Plugin = getPlugin ();
285
- Plugin.call <detail::PiApiKind::piQueueFinish>(getHandleRef ());
272
+ // TODO the new workflow has worse performance with Level Zero, keep the old
273
+ // behavior until this is addressed
274
+ if (Plugin.getBackend () == backend::level_zero) {
286
275
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
287
276
if (std::shared_ptr<event_impl> EventImplSharedPtr =
288
277
EventImplWeakPtr.lock ())
289
- EventImplSharedPtr->cleanupCommand (EventImplSharedPtr);
290
- // FIXME these events are stored for level zero until as a workaround,
291
- // remove once piEventRelease no longer calls wait on the event in the
292
- // plugin.
293
- if (Plugin.getBackend () == backend::level_zero) {
294
- SharedEvents.clear ();
295
- }
296
- assert (SharedEvents.empty () && " Queues that support calling piQueueFinish "
297
- " shouldn't have shared events" );
298
- } else {
278
+ EventImplSharedPtr->wait (EventImplSharedPtr);
299
279
for (event &Event : SharedEvents)
300
280
Event.wait ();
281
+ } else {
282
+ bool SupportsPiFinish = !is_host () && MSupportOOO;
283
+ for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
284
+ EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
285
+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
286
+ EventImplWeakPtrIt->lock ()) {
287
+ // A nullptr PI event indicates that piQueueFinish will not cover it,
288
+ // either because it's a host task event or an unenqueued one.
289
+ if (!SupportsPiFinish ||
290
+ nullptr == EventImplSharedPtr->getHandleRef ()) {
291
+ EventImplSharedPtr->wait (EventImplSharedPtr);
292
+ }
293
+ }
294
+ }
295
+ if (SupportsPiFinish) {
296
+ Plugin.call <detail::PiApiKind::piQueueFinish>(getHandleRef ());
297
+ for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
298
+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
299
+ EventImplWeakPtr.lock ())
300
+ EventImplSharedPtr->cleanupCommand (EventImplSharedPtr);
301
+ // FIXME these events are stored for level zero until as a workaround,
302
+ // remove once piEventRelease no longer calls wait on the event in the
303
+ // plugin.
304
+ if (Plugin.getBackend () == backend::level_zero) {
305
+ SharedEvents.clear ();
306
+ }
307
+ assert (SharedEvents.empty () &&
308
+ " Queues that support calling piQueueFinish "
309
+ " shouldn't have shared events" );
310
+ } else {
311
+ for (event &Event : SharedEvents)
312
+ Event.wait ();
313
+ }
301
314
}
302
315
#ifdef XPTI_ENABLE_INSTRUMENTATION
303
316
instrumentationEpilog (TelemetryEvent, Name, StreamID, IId);
0 commit comments