@@ -268,36 +268,49 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
268
268
// directly. Otherwise, only wait for unenqueued or host task events, starting
269
269
// from the latest submitted task in order to minimize total amount of calls,
270
270
// then handle the rest with piQueueFinish.
271
- bool SupportsPiFinish = !is_host () && MSupportOOO;
272
- for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
273
- EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
274
- if (std::shared_ptr<event_impl> EventImplSharedPtr =
275
- EventImplWeakPtrIt->lock ()) {
276
- // A nullptr PI event indicates that piQueueFinish will not cover it,
277
- // either because it's a host task event or an unenqueued one.
278
- if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef ()) {
279
- EventImplSharedPtr->wait (EventImplSharedPtr);
280
- }
281
- }
282
- }
283
- if (SupportsPiFinish) {
284
- const detail::plugin &Plugin = getPlugin ();
285
- Plugin.call <detail::PiApiKind::piQueueFinish>(getHandleRef ());
271
+ // TODO the new workflow has worse performance with Level Zero, keep the old
272
+ // behavior until this is addressed
273
+ if (!is_host () && getPlugin ().getBackend () == backend::level_zero) {
286
274
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
287
275
if (std::shared_ptr<event_impl> EventImplSharedPtr =
288
276
EventImplWeakPtr.lock ())
289
- EventImplSharedPtr->cleanupCommand (EventImplSharedPtr);
290
- // FIXME these events are stored for level zero until as a workaround,
291
- // remove once piEventRelease no longer calls wait on the event in the
292
- // plugin.
293
- if (Plugin.getBackend () == backend::level_zero) {
294
- SharedEvents.clear ();
295
- }
296
- assert (SharedEvents.empty () && " Queues that support calling piQueueFinish "
297
- " shouldn't have shared events" );
298
- } else {
277
+ EventImplSharedPtr->wait (EventImplSharedPtr);
299
278
for (event &Event : SharedEvents)
300
279
Event.wait ();
280
+ } else {
281
+ bool SupportsPiFinish = !is_host () && MSupportOOO;
282
+ for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
283
+ EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
284
+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
285
+ EventImplWeakPtrIt->lock ()) {
286
+ // A nullptr PI event indicates that piQueueFinish will not cover it,
287
+ // either because it's a host task event or an unenqueued one.
288
+ if (!SupportsPiFinish ||
289
+ nullptr == EventImplSharedPtr->getHandleRef ()) {
290
+ EventImplSharedPtr->wait (EventImplSharedPtr);
291
+ }
292
+ }
293
+ }
294
+ if (SupportsPiFinish) {
295
+ const detail::plugin &Plugin = getPlugin ();
296
+ Plugin.call <detail::PiApiKind::piQueueFinish>(getHandleRef ());
297
+ for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
298
+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
299
+ EventImplWeakPtr.lock ())
300
+ EventImplSharedPtr->cleanupCommand (EventImplSharedPtr);
301
+ // FIXME these events are stored for level zero until as a workaround,
302
+ // remove once piEventRelease no longer calls wait on the event in the
303
+ // plugin.
304
+ if (Plugin.getBackend () == backend::level_zero) {
305
+ SharedEvents.clear ();
306
+ }
307
+ assert (SharedEvents.empty () &&
308
+ " Queues that support calling piQueueFinish "
309
+ " shouldn't have shared events" );
310
+ } else {
311
+ for (event &Event : SharedEvents)
312
+ Event.wait ();
313
+ }
301
314
}
302
315
#ifdef XPTI_ENABLE_INSTRUMENTATION
303
316
instrumentationEpilog (TelemetryEvent, Name, StreamID, IId);
0 commit comments