Skip to content

Commit 8e7995d

Browse files
[SYCL] Fix in-order dependency filtering for isolated kernels (#12386)
Isolated kernels (i.e. those that don't modify the graph) bypass scheduler. Due to a bug, the worker queue for events associated with such kernels wasn't being set, so the queue check during in-order dependency filtering always failed. With this issue fixed, such dependencies are now properly filtered out from the event list passed to the same in-order queue.
1 parent b742e03 commit 8e7995d

File tree

4 files changed

+140
-184
lines changed

4 files changed

+140
-184
lines changed

sycl/source/handler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ event handler::finalize() {
306306
PI_ERROR_INVALID_OPERATION);
307307
} else {
308308
NewEvent = std::make_shared<detail::event_impl>(MQueue);
309+
NewEvent->setWorkerQueue(MQueue);
309310
NewEvent->setContextImpl(MQueue->getContextImplPtr());
310311
NewEvent->setStateIncomplete();
311312
NewEvent->setSubmissionTime();

sycl/test-e2e/Plugin/level_zero_barrier_optimization.cpp

Lines changed: 72 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -7,151 +7,83 @@
77

88
#include <sycl/sycl.hpp>
99

10+
class TestKernel;
11+
sycl::event submitKernel(sycl::queue &Q) {
12+
return Q.submit(
13+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel>([]() {}); });
14+
}
15+
16+
void verifyEvent(sycl::event &E) {
17+
assert(E.get_info<sycl::info::event::command_execution_status>() ==
18+
sycl::info::event_command_status::complete);
19+
}
20+
1021
int main() {
1122
sycl::queue Q1({sycl::property::queue::in_order{}});
1223
sycl::queue Q2({sycl::property::queue::in_order{}});
1324
sycl::queue Q3({sycl::property::queue::in_order{},
1425
sycl::property::queue::enable_profiling{}});
1526

16-
// Test case 1 - events in the barrier's waitlist are from different queues.
17-
std::cout << "Test1" << std::endl;
18-
auto Event1 = Q1.submit(
19-
[&](sycl::handler &cgh) { cgh.single_task<class kernel1>([]() {}); });
20-
auto Event2 = Q2.submit(
21-
[&](sycl::handler &cgh) { cgh.single_task<class kernel2>([]() {}); });
22-
23-
// CHECK: Test1
24-
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
25-
// CHECK: ZE ---> zeEventCreate
26-
// CHECK: ZE ---> zeCommandListAppendWaitOnEvents
27-
// CHECK: ZE ---> zeCommandListAppendSignalEvent
28-
// CHECK: ) ---> pi_result : PI_SUCCESS
29-
auto BarrierEvent1 = Q1.ext_oneapi_submit_barrier({Event1, Event2});
30-
BarrierEvent1.wait();
31-
32-
// Check that kernel events are completed after waiting for barrier event.
33-
assert(Event1.get_info<sycl::info::event::command_execution_status>() ==
34-
sycl::info::event_command_status::complete);
35-
assert(Event2.get_info<sycl::info::event::command_execution_status>() ==
36-
sycl::info::event_command_status::complete);
37-
38-
// Test case 2 - events in the barrier's waitlist are from the same queue.
39-
std::cout << "Test2" << std::endl;
40-
auto Event3 = Q1.submit(
41-
[&](sycl::handler &cgh) { cgh.single_task<class kernel3>([]() {}); });
42-
auto Event4 = Q1.submit(
43-
[&](sycl::handler &cgh) { cgh.single_task<class kernel4>([]() {}); });
44-
45-
// CHECK: Test2
46-
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
47-
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
48-
// CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent
49-
// CHECK-NOT: ZE ---> zeCommandListAppendBarrier
50-
// CHECK: ) ---> pi_result : PI_SUCCESS
51-
auto BarrierEvent2 = Q1.ext_oneapi_submit_barrier({Event3, Event4});
52-
BarrierEvent2.wait();
53-
54-
// Check that kernel events are completed after waiting for barrier event.
55-
assert(Event3.get_info<sycl::info::event::command_execution_status>() ==
56-
sycl::info::event_command_status::complete);
57-
assert(Event4.get_info<sycl::info::event::command_execution_status>() ==
58-
sycl::info::event_command_status::complete);
59-
60-
// Test case 3 - submit barrier after queue sync, i.e. last event = nullptr.
61-
std::cout << "Test3" << std::endl;
62-
auto Event5 = Q2.submit(
63-
[&](sycl::handler &cgh) { cgh.single_task<class kernel5>([]() {}); });
64-
auto Event6 = Q2.submit(
65-
[&](sycl::handler &cgh) { cgh.single_task<class kernel6>([]() {}); });
66-
Q2.wait();
67-
68-
// CHECK: Test3
69-
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
70-
// CHECK: ZE ---> zeEventCreate
71-
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
72-
// CHECK: ZE ---> zeCommandListAppendSignalEvent
73-
// CHECK: ) ---> pi_result : PI_SUCCESS
74-
auto BarrierEvent3 = Q2.ext_oneapi_submit_barrier({Event5, Event6});
75-
BarrierEvent3.wait();
76-
77-
// Check that kernel events are completed after waiting for barrier event.
78-
assert(Event5.get_info<sycl::info::event::command_execution_status>() ==
79-
sycl::info::event_command_status::complete);
80-
assert(Event6.get_info<sycl::info::event::command_execution_status>() ==
81-
sycl::info::event_command_status::complete);
82-
83-
// Test case 4 - last command event is not in the waitlist.
84-
std::cout << "Test4" << std::endl;
85-
auto Event7 = Q2.submit(
86-
[&](sycl::handler &cgh) { cgh.single_task<class kernel7>([]() {}); });
87-
auto Event8 = Q2.submit(
88-
[&](sycl::handler &cgh) { cgh.single_task<class kernel8>([]() {}); });
89-
auto Event9 = Q2.submit(
90-
[&](sycl::handler &cgh) { cgh.single_task<class kernel9>([]() {}); });
91-
92-
// CHECK: Test4
93-
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
94-
// CHECK-NOT: ZE ---> zeEventCreate
95-
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
96-
// CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent
97-
// CHECK: ) ---> pi_result : PI_SUCCESS
98-
auto BarrierEvent4 = Q2.ext_oneapi_submit_barrier({Event7, Event8});
99-
BarrierEvent4.wait();
100-
101-
// Check that kernel events are completed after waiting for barrier event.
102-
assert(Event7.get_info<sycl::info::event::command_execution_status>() ==
103-
sycl::info::event_command_status::complete);
104-
assert(Event8.get_info<sycl::info::event::command_execution_status>() ==
105-
sycl::info::event_command_status::complete);
106-
107-
// Test case 5 - events in the barrier's waitlist are from the same queue Q2,
108-
// but submission to the different queue Q1 which is synced.
109-
std::cout << "Test5" << std::endl;
110-
Q1.wait();
111-
auto Event10 = Q2.submit(
112-
[&](sycl::handler &cgh) { cgh.single_task<class kernel10>([]() {}); });
113-
auto Event11 = Q2.submit(
114-
[&](sycl::handler &cgh) { cgh.single_task<class kernel11>([]() {}); });
115-
116-
// CHECK: Test5
117-
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
118-
// CHECK: ZE ---> zeEventCreate
119-
// CHECK: ZE ---> zeCommandListAppendWaitOnEvents
120-
// CHECK: ZE ---> zeCommandListAppendSignalEvent
121-
// CHECK: ) ---> pi_result : PI_SUCCESS
122-
auto BarrierEvent5 = Q1.ext_oneapi_submit_barrier({Event10, Event11});
123-
BarrierEvent5.wait();
124-
125-
// Check that kernel events are completed after waiting for barrier event.
126-
assert(Event10.get_info<sycl::info::event::command_execution_status>() ==
127-
sycl::info::event_command_status::complete);
128-
assert(Event11.get_info<sycl::info::event::command_execution_status>() ==
129-
sycl::info::event_command_status::complete);
130-
131-
// Test case 6 - submit barrier after queue sync with profiling enabled, i.e.
132-
// last event = nullptr.
133-
std::cout << "Test3" << std::endl;
134-
auto Event12 = Q3.submit(
135-
[&](sycl::handler &cgh) { cgh.single_task<class kernel12>([]() {}); });
136-
auto Event13 = Q3.submit(
137-
[&](sycl::handler &cgh) { cgh.single_task<class kernel13>([]() {}); });
138-
Q3.wait();
139-
140-
// CHECK: Test3
141-
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
142-
// CHECK: ZE ---> zeEventCreate
143-
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
144-
// CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent
145-
// CHECK: ZE ---> zeCommandListAppendBarrier
146-
// CHECK: ) ---> pi_result : PI_SUCCESS
147-
auto BarrierEvent6 = Q3.ext_oneapi_submit_barrier({Event12, Event13});
148-
BarrierEvent6.wait();
149-
150-
// Check that kernel events are completed after waiting for barrier event.
151-
assert(Event12.get_info<sycl::info::event::command_execution_status>() ==
152-
sycl::info::event_command_status::complete);
153-
assert(Event13.get_info<sycl::info::event::command_execution_status>() ==
154-
sycl::info::event_command_status::complete);
155-
27+
// Any dependencies from the same queue are filtered out on the SYCL runtime
28+
// level, only cases with cross-queue events need to be checked here.
29+
{
30+
// Test case 1 - events in the barrier's waitlist are from different queues.
31+
std::cout << "Test1" << std::endl;
32+
auto EventA = submitKernel(Q1);
33+
auto EventB = submitKernel(Q2);
34+
35+
// CHECK: Test1
36+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
37+
// CHECK: ZE ---> zeEventCreate
38+
// CHECK: ZE ---> zeCommandListAppendWaitOnEvents
39+
// CHECK: ZE ---> zeCommandListAppendSignalEvent
40+
// CHECK: ) ---> pi_result : PI_SUCCESS
41+
auto BarrierEvent = Q2.ext_oneapi_submit_barrier({EventA, EventB});
42+
BarrierEvent.wait();
43+
44+
verifyEvent(EventA);
45+
verifyEvent(EventB);
46+
}
47+
{
48+
// Test case 2 - events in the barrier's waitlist are from the same queue
49+
// Q2, but submission to the different queue Q1 which is synced.
50+
std::cout << "Test2" << std::endl;
51+
Q1.wait();
52+
auto EventA = submitKernel(Q2);
53+
auto EventB = submitKernel(Q2);
54+
55+
// CHECK: Test2
56+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
57+
// CHECK: ZE ---> zeEventCreate
58+
// CHECK: ZE ---> zeCommandListAppendWaitOnEvents
59+
// CHECK: ZE ---> zeCommandListAppendSignalEvent
60+
// CHECK: ) ---> pi_result : PI_SUCCESS
61+
auto BarrierEvent = Q1.ext_oneapi_submit_barrier({EventA, EventB});
62+
BarrierEvent.wait();
63+
64+
verifyEvent(EventA);
65+
verifyEvent(EventB);
66+
}
67+
{
68+
// Test case 3 - submit barrier after queue sync with profiling enabled,
69+
// i.e. last event = nullptr.
70+
std::cout << "Test3" << std::endl;
71+
auto EventA = submitKernel(Q2);
72+
auto EventB = submitKernel(Q3);
73+
Q2.wait();
74+
Q3.wait();
75+
// CHECK: Test3
76+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
77+
// CHECK: ZE ---> zeEventCreate
78+
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
79+
// CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent
80+
// CHECK: ZE ---> zeCommandListAppendBarrier
81+
// CHECK: ) ---> pi_result : PI_SUCCESS
82+
auto BarrierEvent = Q3.ext_oneapi_submit_barrier({EventA, EventB});
83+
BarrierEvent.wait();
84+
85+
verifyEvent(EventA);
86+
verifyEvent(EventB);
87+
}
15688
return 0;
15789
}

sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include <vector>
1919

20+
namespace {
2021
using namespace sycl;
2122
using EventImplPtr = std::shared_ptr<detail::event_impl>;
2223

@@ -291,20 +292,20 @@ TEST_F(DependsOnTests, EnqueueNoMemObjDoubleKernelDepHost) {
291292
}
292293

293294
std::vector<pi_event> EventsInWaitList;
294-
inline pi_result redefinedextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking,
295-
void *dst_ptr,
296-
const void *src_ptr, size_t size,
297-
pi_uint32 num_events_in_waitlist,
298-
const pi_event *events_waitlist,
299-
pi_event *event) {
295+
pi_result redefinedextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking,
296+
void *dst_ptr, const void *src_ptr,
297+
size_t size,
298+
pi_uint32 num_events_in_waitlist,
299+
const pi_event *events_waitlist,
300+
pi_event *event) {
300301
*event = createDummyHandle<pi_event>();
301302
for (auto i = 0u; i < num_events_in_waitlist; i++) {
302303
EventsInWaitList.push_back(events_waitlist[i]);
303304
}
304305
return PI_SUCCESS;
305306
}
306307

307-
inline pi_result redefinedEnqueueEventsWaitWithBarrier(
308+
pi_result redefinedEnqueueEventsWaitWithBarrier(
308309
pi_queue command_queue, pi_uint32 num_events_in_wait_list,
309310
const pi_event *event_wait_list, pi_event *event) {
310311
*event = createDummyHandle<pi_event>();
@@ -388,3 +389,4 @@ TEST_F(DependsOnTests, BarrierWithWaitList) {
388389
EXPECT_EQ(EventsInWaitList[0], SingleTaskEventImpl->getHandleRef());
389390
Queue.wait();
390391
}
392+
} // anonymous namespace

0 commit comments

Comments
 (0)