Skip to content

Commit 028e141

Browse files
[SYCL] Revert queue::wait() to its old behaviour with Level Zero (#4153)
Revert the behaviour of queue::wait() to calling event::wait() on all events associated with it for Level Zero backend due to significant performance regressions introduced with that change.
1 parent f264925 commit 028e141

File tree

4 files changed

+51
-30
lines changed

4 files changed

+51
-30
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -268,36 +268,49 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
268268
// directly. Otherwise, only wait for unenqueued or host task events, starting
269269
// from the latest submitted task in order to minimize total amount of calls,
270270
// then handle the rest with piQueueFinish.
271-
bool SupportsPiFinish = !is_host() && MSupportOOO;
272-
for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
273-
EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
274-
if (std::shared_ptr<event_impl> EventImplSharedPtr =
275-
EventImplWeakPtrIt->lock()) {
276-
// A nullptr PI event indicates that piQueueFinish will not cover it,
277-
// either because it's a host task event or an unenqueued one.
278-
if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) {
279-
EventImplSharedPtr->wait(EventImplSharedPtr);
280-
}
281-
}
282-
}
283-
if (SupportsPiFinish) {
284-
const detail::plugin &Plugin = getPlugin();
285-
Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
271+
// TODO the new workflow has worse performance with Level Zero, keep the old
272+
// behavior until this is addressed
273+
if (!is_host() && getPlugin().getBackend() == backend::level_zero) {
286274
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
287275
if (std::shared_ptr<event_impl> EventImplSharedPtr =
288276
EventImplWeakPtr.lock())
289-
EventImplSharedPtr->cleanupCommand(EventImplSharedPtr);
290-
// FIXME these events are stored for level zero until as a workaround,
291-
// remove once piEventRelease no longer calls wait on the event in the
292-
// plugin.
293-
if (Plugin.getBackend() == backend::level_zero) {
294-
SharedEvents.clear();
295-
}
296-
assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
297-
"shouldn't have shared events");
298-
} else {
277+
EventImplSharedPtr->wait(EventImplSharedPtr);
299278
for (event &Event : SharedEvents)
300279
Event.wait();
280+
} else {
281+
bool SupportsPiFinish = !is_host() && MSupportOOO;
282+
for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
283+
EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
284+
if (std::shared_ptr<event_impl> EventImplSharedPtr =
285+
EventImplWeakPtrIt->lock()) {
286+
// A nullptr PI event indicates that piQueueFinish will not cover it,
287+
// either because it's a host task event or an unenqueued one.
288+
if (!SupportsPiFinish ||
289+
nullptr == EventImplSharedPtr->getHandleRef()) {
290+
EventImplSharedPtr->wait(EventImplSharedPtr);
291+
}
292+
}
293+
}
294+
if (SupportsPiFinish) {
295+
const detail::plugin &Plugin = getPlugin();
296+
Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
297+
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
298+
if (std::shared_ptr<event_impl> EventImplSharedPtr =
299+
EventImplWeakPtr.lock())
300+
EventImplSharedPtr->cleanupCommand(EventImplSharedPtr);
301+
// FIXME these events are stored for level zero until as a workaround,
302+
// remove once piEventRelease no longer calls wait on the event in the
303+
// plugin.
304+
if (Plugin.getBackend() == backend::level_zero) {
305+
SharedEvents.clear();
306+
}
307+
assert(SharedEvents.empty() &&
308+
"Queues that support calling piQueueFinish "
309+
"shouldn't have shared events");
310+
} else {
311+
for (event &Event : SharedEvents)
312+
Event.wait();
313+
}
301314
}
302315
#ifdef XPTI_ENABLE_INSTRUMENTATION
303316
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);

sycl/test/on-device/plugins/level_zero_batch_event_status.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
// CHECK: ZE ---> zeCommandListClose
2626
// CHECK: ZE ---> zeCommandQueueExecuteCommandLists
2727
// CHECK: ---> piEventGetInfo
28-
// CHECK-NOT: piQueueFinish
28+
// CHECK-NOT: piEventsWait
2929
// CHECK: ---> piEnqueueKernelLaunch
3030
// CHECK: ZE ---> zeCommandListAppendLaunchKernel
31-
// CHECK: ---> piQueueFinish
31+
// CHECK: ---> piEventsWait
3232
// Look for close and Execute after piEventsWait
3333
// CHECK: ZE ---> zeCommandListClose
3434
// CHECK: ZE ---> zeCommandQueueExecuteCommandLists

sycl/test/on-device/plugins/level_zero_batch_test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
// CKB4: ZE ---> zeCommandQueueExecuteCommandLists(
8787
// CKB8: ZE ---> zeCommandListClose(
8888
// CKB8: ZE ---> zeCommandQueueExecuteCommandLists(
89-
// CKALL: ---> piQueueFinish(
89+
// CKALL: ---> piEventsWait(
9090
// CKB3: ZE ---> zeCommandListClose(
9191
// CKB3: ZE ---> zeCommandQueueExecuteCommandLists(
9292
// CKB5: ZE ---> zeCommandListClose(
@@ -142,7 +142,7 @@
142142
// CKB4: ZE ---> zeCommandQueueExecuteCommandLists(
143143
// CKB8: ZE ---> zeCommandListClose(
144144
// CKB8: ZE ---> zeCommandQueueExecuteCommandLists(
145-
// CKALL: ---> piQueueFinish(
145+
// CKALL: ---> piEventsWait(
146146
// CKB3: ZE ---> zeCommandListClose(
147147
// CKB3: ZE ---> zeCommandQueueExecuteCommandLists(
148148
// CKB5: ZE ---> zeCommandListClose(
@@ -198,7 +198,7 @@
198198
// CKB4: ZE ---> zeCommandQueueExecuteCommandLists(
199199
// CKB8: ZE ---> zeCommandListClose(
200200
// CKB8: ZE ---> zeCommandQueueExecuteCommandLists(
201-
// CKALL: ---> piQueueFinish(
201+
// CKALL: ---> piEventsWait(
202202
// CKB3: ZE ---> zeCommandListClose(
203203
// CKB3: ZE ---> zeCommandQueueExecuteCommandLists(
204204
// CKB5: ZE ---> zeCommandListClose(

sycl/unittests/queue/Wait.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,14 @@ bool preparePiMock(platform &Plt) {
9292
<< std::endl;
9393
return false;
9494
}
95+
// TODO remove once queue:wait() is lowered to PiQueueFinish with level zero
96+
// as well.
97+
if (detail::getSyclObjImpl(Plt)->getPlugin().getBackend() ==
98+
backend::level_zero) {
99+
std::cout << "Not run on Level Zero, old behavior is kept there temporarily"
100+
<< std::endl;
101+
return false;
102+
}
95103

96104
unittest::PiMock Mock{Plt};
97105
Mock.redefine<detail::PiApiKind::piQueueCreate>(redefinedQueueCreate);

0 commit comments

Comments
 (0)