Skip to content

Commit 7f59dea

Browse files
authored
[SYCL][Graph] Fix barrier transitive queue recording (#15601)
It has been discovered that when a graph event is passed as a dependency to a barrier operation on another in-order queue. This does not trigger the queue to record the operation as it should. Fix this with regression test.
1 parent 87cce87 commit 7f59dea

File tree

2 files changed

+89
-1
lines changed

2 files changed

+89
-1
lines changed

sycl/source/queue.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
274274
bool AllEventsEmptyOrNop = std::all_of(
275275
begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
276276
auto EventImpl = detail::getSyclObjImpl(Event);
277-
return EventImpl->isDefaultConstructed() || EventImpl->isNOP();
277+
return (EventImpl->isDefaultConstructed() || EventImpl->isNOP()) &&
278+
!EventImpl->getCommandGraph();
278279
});
279280
if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents &&
280281
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// Regression test for using transitive queue recording when a graph
9+
// event is passed as a dependency to a barrier operation in a different
10+
// in-order queue.
11+
12+
#include "../graph_common.hpp"
13+
#include <sycl/properties/all_properties.hpp>
14+
15+
int main() {
16+
using T = int;
17+
18+
device Dev;
19+
context Ctx{Dev};
20+
21+
property_list InOrderProp = {property::queue::in_order{}};
22+
queue Q1{Ctx, Dev, InOrderProp};
23+
queue Q2{Ctx, Dev, InOrderProp};
24+
25+
const exp_ext::queue_state Recording = exp_ext::queue_state::recording;
26+
const exp_ext::queue_state Executing = exp_ext::queue_state::executing;
27+
28+
auto assertQueueState = [&](exp_ext::queue_state ExpectedQ1,
29+
exp_ext::queue_state ExpectedQ2) {
30+
assert(Q1.ext_oneapi_get_state() == ExpectedQ1);
31+
assert(Q2.ext_oneapi_get_state() == ExpectedQ2);
32+
};
33+
34+
T *PtrA = malloc_device<T>(Size, Q1);
35+
T *PtrB = malloc_device<T>(Size, Q1);
36+
T *PtrC = malloc_device<T>(Size, Q1);
37+
38+
exp_ext::command_graph Graph{Q1.get_context(), Q1.get_device()};
39+
40+
Graph.begin_recording(Q1);
41+
assertQueueState(Recording, Executing);
42+
43+
T PatternA = 42;
44+
auto EventA =
45+
Q1.submit([&](handler &CGH) { CGH.fill(PtrA, PatternA, Size); });
46+
assertQueueState(Recording, Executing);
47+
48+
T PatternB = 0xA;
49+
auto EventB = Q1.fill(PtrB, PatternB, Size);
50+
assertQueueState(Recording, Executing);
51+
52+
auto Barrier1 = Q1.ext_oneapi_submit_barrier();
53+
assertQueueState(Recording, Executing);
54+
55+
// Depends on Q1 barrier, should put Q2 in recording state
56+
auto Barrier = Q2.ext_oneapi_submit_barrier({Barrier1});
57+
assertQueueState(Recording, Recording);
58+
59+
// Q2 is now in recording state
60+
auto EventC = Q2.submit([&](handler &CGH) {
61+
CGH.parallel_for(range<1>(Size),
62+
[=](item<1> Id) { PtrC[Id] = PtrA[Id] + PtrB[Id]; });
63+
});
64+
assertQueueState(Recording, Recording);
65+
66+
Graph.end_recording();
67+
assertQueueState(Executing, Executing);
68+
69+
auto GraphExec = Graph.finalize();
70+
71+
Q1.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
72+
Q1.wait_and_throw();
73+
74+
std::vector<T> DataA(Size), DataB(Size), DataC(Size);
75+
Q1.copy(PtrA, DataA.data(), Size);
76+
Q1.copy(PtrB, DataB.data(), Size);
77+
Q1.copy(PtrC, DataC.data(), Size);
78+
Q1.wait_and_throw();
79+
80+
for (size_t i = 0; i < Size; i++) {
81+
assert(check_value(i, PatternA, DataA[i], "DataA"));
82+
assert(check_value(i, PatternB, DataB[i], "DataB"));
83+
assert(check_value(i, (PatternA + PatternB), DataC[i], "DataC"));
84+
}
85+
86+
return 0;
87+
}

0 commit comments

Comments
 (0)