Skip to content

[SYCL]Fixes a performance issue due to https://github.com/intel/llvm/… #3797

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4111,8 +4111,13 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName,
// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Event->Queue->PiQueueMutex);

if (auto Res = Event->Queue->executeOpenCommandList())
return Res;
// Only do the execute of the open command list if the event that
// is being queried and event that is to be signalled by something
// currently in that open command list.
if (Event->Queue->ZeOpenCommandList == Event->ZeCommandList) {
if (auto Res = Event->Queue->executeOpenCommandList())
return Res;
}
}

ze_result_t ZeResult;
Expand Down
19 changes: 13 additions & 6 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,21 @@ void queue_impl::addSharedEvent(const event &Event) {
// of them can be released.
const size_t EventThreshold = 128;
if (MEventsShared.size() >= EventThreshold) {
// Generally, the vector is ordered so that the oldest events are in the
// front and the newer events are in the end. So, search to find the first
// event that isn't yet complete. All the events prior to that can be
// erased. This could leave some few events further on that have completed
// not yet erased, but that is OK. This cleanup doesn't have to be perfect.
// This also keeps the algorithm linear rather than quadratic because it
// doesn't continually recheck things towards the back of the list that
// really haven't had time to complete.
MEventsShared.erase(
std::remove_if(
MEventsShared.begin(), MEventsShared.end(),
[](const event &E) {
return E.get_info<info::event::command_execution_status>() ==
MEventsShared.begin(),
std::find_if(
MEventsShared.begin(), MEventsShared.end(), [](const event &E) {
return E.get_info<info::event::command_execution_status>() !=
info::event_command_status::complete;
}),
MEventsShared.end());
}));
}
MEventsShared.push_back(Event);
}
Expand Down
13 changes: 10 additions & 3 deletions sycl/unittests/queue/EventClear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ struct TestCtx {

std::unique_ptr<TestCtx> TestContext;

const int ExpectedEventThreshold = 128;

pi_result redefinedUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value,
size_t count,
pi_uint32 num_events_in_waitlist,
Expand All @@ -44,10 +46,16 @@ pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name,
size_t *param_value_size_ret) {
EXPECT_EQ(param_name, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS)
<< "Unexpected event info requested";
// Report half of events as complete
// Report first half of events as complete.
// Report second half of events as running.
// This is important, because removal algorithm assumes that
// events are likely to be removed oldest first, and stops removing
// at the first non-completed event.
static int Counter = 0;
auto *Result = reinterpret_cast<pi_event_status *>(param_value);
*Result = (++Counter % 2 == 0) ? PI_EVENT_COMPLETE : PI_EVENT_RUNNING;
*Result = (Counter < (ExpectedEventThreshold / 2)) ? PI_EVENT_COMPLETE
: PI_EVENT_RUNNING;
Counter++;
return PI_SUCCESS;
}

Expand Down Expand Up @@ -117,7 +125,6 @@ TEST(QueueEventClear, CleanupOnThreshold) {
queue Q{Ctx, default_selector()};

unsigned char *HostAlloc = (unsigned char *)malloc_host(1, Ctx);
const int ExpectedEventThreshold = 128;
TestContext->EventReferenceCount = ExpectedEventThreshold;
for (size_t I = 0; I < ExpectedEventThreshold; ++I) {
Q.memset(HostAlloc, 42, 1).wait();
Expand Down