Skip to content

[SYCL][Graph] Fix barrier transitive queue recording #15601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
bool AllEventsEmptyOrNop = std::all_of(
begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
auto EventImpl = detail::getSyclObjImpl(Event);
return EventImpl->isDefaultConstructed() || EventImpl->isNOP();
return (EventImpl->isDefaultConstructed() || EventImpl->isNOP()) &&
!EventImpl->getCommandGraph();
});
if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents &&
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
Expand Down
87 changes: 87 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/transitive_queue_barrier.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// Regression test for using transitive queue recording when a graph
// event is passed as a dependency to a barrier operation in a different
// in-order queue.

#include "../graph_common.hpp"
#include <sycl/properties/all_properties.hpp>

int main() {
using T = int;

device Dev;
context Ctx{Dev};

property_list InOrderProp = {property::queue::in_order{}};
queue Q1{Ctx, Dev, InOrderProp};
queue Q2{Ctx, Dev, InOrderProp};

const exp_ext::queue_state Recording = exp_ext::queue_state::recording;
const exp_ext::queue_state Executing = exp_ext::queue_state::executing;

auto assertQueueState = [&](exp_ext::queue_state ExpectedQ1,
exp_ext::queue_state ExpectedQ2) {
assert(Q1.ext_oneapi_get_state() == ExpectedQ1);
assert(Q2.ext_oneapi_get_state() == ExpectedQ2);
};

T *PtrA = malloc_device<T>(Size, Q1);
T *PtrB = malloc_device<T>(Size, Q1);
T *PtrC = malloc_device<T>(Size, Q1);

exp_ext::command_graph Graph{Q1.get_context(), Q1.get_device()};

Graph.begin_recording(Q1);
assertQueueState(Recording, Executing);

T PatternA = 42;
auto EventA =
Q1.submit([&](handler &CGH) { CGH.fill(PtrA, PatternA, Size); });
assertQueueState(Recording, Executing);

T PatternB = 0xA;
auto EventB = Q1.fill(PtrB, PatternB, Size);
assertQueueState(Recording, Executing);

auto Barrier1 = Q1.ext_oneapi_submit_barrier();
assertQueueState(Recording, Executing);

// Depends on Q1 barrier, should put Q2 in recording state
auto Barrier = Q2.ext_oneapi_submit_barrier({Barrier1});
assertQueueState(Recording, Recording);

// Q2 is now in recording state
auto EventC = Q2.submit([&](handler &CGH) {
CGH.parallel_for(range<1>(Size),
[=](item<1> Id) { PtrC[Id] = PtrA[Id] + PtrB[Id]; });
});
assertQueueState(Recording, Recording);

Graph.end_recording();
assertQueueState(Executing, Executing);

auto GraphExec = Graph.finalize();

Q1.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
Q1.wait_and_throw();

std::vector<T> DataA(Size), DataB(Size), DataC(Size);
Q1.copy(PtrA, DataA.data(), Size);
Q1.copy(PtrB, DataB.data(), Size);
Q1.copy(PtrC, DataC.data(), Size);
Q1.wait_and_throw();

for (size_t i = 0; i < Size; i++) {
assert(check_value(i, PatternA, DataA[i], "DataA"));
assert(check_value(i, PatternB, DataB[i], "DataB"));
assert(check_value(i, (PatternA + PatternB), DataC[i], "DataC"));
}

return 0;
}
Loading