Skip to content

[SYCL] Restore queue::wait() changes for Level Zero #4325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 24 additions & 60 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,

event ResEvent = prepareUSMEvent(Self, NativeEvent);
// Track only if we won't be able to handle it with piQueueFinish.
// FIXME these events are stored for level zero until as a workaround, remove
// once piEventRelease no longer calls wait on the event in the plugin.
if (!MSupportOOO ||
getPlugin().getBackend() == backend::ext_oneapi_level_zero)
if (!MSupportOOO)
addSharedEvent(ResEvent);
return ResEvent;
}
Expand All @@ -83,10 +80,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,

event ResEvent = prepareUSMEvent(Self, NativeEvent);
// Track only if we won't be able to handle it with piQueueFinish.
// FIXME these events are stored for level zero until as a workaround, remove
// once piEventRelease no longer calls wait on the event in the plugin.
if (!MSupportOOO ||
getPlugin().getBackend() == backend::ext_oneapi_level_zero)
if (!MSupportOOO)
addSharedEvent(ResEvent);
return ResEvent;
}
Expand All @@ -104,10 +98,7 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,

event ResEvent = prepareUSMEvent(Self, NativeEvent);
// Track only if we won't be able to handle it with piQueueFinish.
// FIXME these events are stored for level zero until as a workaround, remove
// once piEventRelease no longer calls wait on the event in the plugin.
if (!MSupportOOO ||
getPlugin().getBackend() == backend::ext_oneapi_level_zero)
if (!MSupportOOO)
addSharedEvent(ResEvent);
return ResEvent;
}
Expand All @@ -119,11 +110,7 @@ void queue_impl::addEvent(const event &Event) {
// if there is no command on the event, we cannot track it with MEventsWeak
// as that will leave it with no owner. Track in MEventsShared only if we're
// unable to call piQueueFinish during wait.
// FIXME these events are stored for level zero until as a workaround,
// remove once piEventRelease no longer calls wait on the event in the
// plugin.
if (is_host() || !MSupportOOO ||
getPlugin().getBackend() == backend::ext_oneapi_level_zero)
if (is_host() || !MSupportOOO)
addSharedEvent(Event);
} else {
std::weak_ptr<event_impl> EventWeakPtr{Eimpl};
Expand All @@ -136,10 +123,7 @@ void queue_impl::addEvent(const event &Event) {
/// but some events have no other owner. In this case,
/// addSharedEvent will have the queue track the events via a shared pointer.
void queue_impl::addSharedEvent(const event &Event) {
// FIXME The assertion should be corrected once the Level Zero workaround is
// removed.
assert(is_host() || !MSupportOOO ||
getPlugin().getBackend() == backend::ext_oneapi_level_zero);
assert(is_host() || !MSupportOOO);
std::lock_guard<std::mutex> Lock(MMutex);
// Events stored in MEventsShared are not released anywhere else aside from
// calls to queue::wait/wait_and_throw, which a user application might not
Expand Down Expand Up @@ -272,50 +256,30 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
// directly. Otherwise, only wait for unenqueued or host task events, starting
// from the latest submitted task in order to minimize total amount of calls,
// then handle the rest with piQueueFinish.
// TODO the new workflow has worse performance with Level Zero, keep the old
// behavior until this is addressed
if (!is_host() &&
getPlugin().getBackend() == backend::ext_oneapi_level_zero) {
bool SupportsPiFinish = !is_host() && MSupportOOO;
for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
if (std::shared_ptr<event_impl> EventImplSharedPtr =
EventImplWeakPtrIt->lock()) {
// A nullptr PI event indicates that piQueueFinish will not cover it,
// either because it's a host task event or an unenqueued one.
if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) {
EventImplSharedPtr->wait(EventImplSharedPtr);
}
}
}
if (SupportsPiFinish) {
const detail::plugin &Plugin = getPlugin();
Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
if (std::shared_ptr<event_impl> EventImplSharedPtr =
EventImplWeakPtr.lock())
EventImplSharedPtr->wait(EventImplSharedPtr);
EventImplSharedPtr->cleanupCommand(EventImplSharedPtr);
assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
"shouldn't have shared events");
} else {
for (event &Event : SharedEvents)
Event.wait();
} else {
bool SupportsPiFinish = !is_host() && MSupportOOO;
for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
if (std::shared_ptr<event_impl> EventImplSharedPtr =
EventImplWeakPtrIt->lock()) {
// A nullptr PI event indicates that piQueueFinish will not cover it,
// either because it's a host task event or an unenqueued one.
if (!SupportsPiFinish ||
nullptr == EventImplSharedPtr->getHandleRef()) {
EventImplSharedPtr->wait(EventImplSharedPtr);
}
}
}
if (SupportsPiFinish) {
const detail::plugin &Plugin = getPlugin();
Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
if (std::shared_ptr<event_impl> EventImplSharedPtr =
EventImplWeakPtr.lock())
EventImplSharedPtr->cleanupCommand(EventImplSharedPtr);
// FIXME these events are stored for level zero until as a workaround,
// remove once piEventRelease no longer calls wait on the event in the
// plugin.
if (Plugin.getBackend() == backend::ext_oneapi_level_zero) {
SharedEvents.clear();
}
assert(SharedEvents.empty() &&
"Queues that support calling piQueueFinish "
"shouldn't have shared events");
} else {
for (event &Event : SharedEvents)
Event.wait();
}
}
#ifdef XPTI_ENABLE_INSTRUMENTATION
instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
Expand Down
16 changes: 1 addition & 15 deletions sycl/unittests/queue/Wait.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#include <CL/sycl.hpp>
#include <detail/event_impl.hpp>
#include <detail/platform_impl.hpp>
#include <detail/scheduler/commands.hpp>
#include <gtest/gtest.h>
#include <helpers/PiMock.hpp>
Expand Down Expand Up @@ -92,14 +91,6 @@ bool preparePiMock(platform &Plt) {
<< std::endl;
return false;
}
// TODO remove once queue:wait() is lowered to PiQueueFinish with level zero
// as well.
if (detail::getSyclObjImpl(Plt)->getPlugin().getBackend() ==
backend::ext_oneapi_level_zero) {
std::cout << "Not run on Level Zero, old behavior is kept there temporarily"
<< std::endl;
return false;
}

unittest::PiMock Mock{Plt};
Mock.redefine<detail::PiApiKind::piQueueCreate>(redefinedQueueCreate);
Expand Down Expand Up @@ -129,12 +120,7 @@ TEST(QueueWait, QueueWaitTest) {
TestContext = {};
Q.memset(HostAlloc, 42, 1);
// No need to keep the event since we'll use piQueueFinish.
// FIXME ... unless the plugin is Level Zero, where there's a workaround that
// releases events later.
if (detail::getSyclObjImpl(Plt)->getPlugin().getBackend() !=
backend::ext_oneapi_level_zero) {
ASSERT_EQ(TestContext.EventReferenceCount, 0);
}
ASSERT_EQ(TestContext.EventReferenceCount, 0);
Q.wait();
ASSERT_EQ(TestContext.NEventsWaitedFor, 0);
ASSERT_TRUE(TestContext.PiQueueFinishCalled);
Expand Down