Skip to content

Commit 499b4bf

Browse files
authored
[SYCL] Optimize urEnqueueEventsWaitWithBarrier for in-order queues (#10995)
Don't insert zeCommandListAppendWaitOnEvents/zeCommandListAppendSignalEvent for queue::ext_oneapi_submit_barrier() if we have in-order queue and all events in the waitlist are from the same queue.
1 parent a8ab2bb commit 499b4bf

File tree

2 files changed

+151
-9
lines changed

2 files changed

+151
-9
lines changed

sycl/plugins/unified_runtime/ur/adapters/level_zero/event.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
152152
[&Queue](ur_command_list_ptr_t CmdList,
153153
const _ur_ze_event_list_t &EventWaitList,
154154
ur_event_handle_t &Event, bool IsInternal) {
155-
// For in-order queue and empty wait-list just use the last command
156-
// event as the barrier event.
157-
if (Queue->isInOrderQueue() && !EventWaitList.Length &&
158-
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {
159-
UR_CALL(urEventRetain(Queue->LastCommandEvent));
160-
Event = Queue->LastCommandEvent;
161-
return UR_RESULT_SUCCESS;
162-
}
163-
164155
UR_CALL(createEventAndAssociateQueue(
165156
Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList,
166157
IsInternal));
@@ -204,6 +195,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
204195
bool IsInternal = OutEvent == nullptr;
205196
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
206197

198+
auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList,
199+
EventWaitList]() {
200+
if (!NumEventsInWaitList)
201+
return true;
202+
203+
for (uint32_t I = 0; I < NumEventsInWaitList; ++I)
204+
if (Queue != EventWaitList[I]->UrQueue)
205+
return false;
206+
207+
return true;
208+
};
209+
210+
// For in-order queue and wait-list which is empty or has events from
211+
// the same queue just use the last command event as the barrier event.
212+
if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue() &&
213+
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {
214+
UR_CALL(urEventRetain(Queue->LastCommandEvent));
215+
*Event = Queue->LastCommandEvent;
216+
return UR_RESULT_SUCCESS;
217+
}
218+
207219
// Indicator for whether batching is allowed. This may be changed later in
208220
// this function, but allow it by default.
209221
bool OkToBatch = true;
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// REQUIRES: level_zero
2+
// RUN: %{build} -o %t.out
3+
// RUN: env SYCL_PI_TRACE=2 ZE_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s
4+
5+
// Test to check that we don't insert unnecessary L0 commands for
6+
// queue::ext_oneapi_submit_barrier() when we have in-order queue.
7+
8+
#include <sycl/sycl.hpp>
9+
10+
int main() {
11+
sycl::queue Q1({sycl::property::queue::in_order{}});
12+
sycl::queue Q2({sycl::property::queue::in_order{}});
13+
14+
// Test case 1 - events in the barrier's waitlist are from different queues.
15+
std::cout << "Test1" << std::endl;
16+
auto Event1 = Q1.submit(
17+
[&](sycl::handler &cgh) { cgh.single_task<class kernel1>([]() {}); });
18+
auto Event2 = Q2.submit(
19+
[&](sycl::handler &cgh) { cgh.single_task<class kernel2>([]() {}); });
20+
21+
// CHECK: Test1
22+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
23+
// CHECK: ZE ---> zeEventCreate
24+
// CHECK: ZE ---> zeCommandListAppendWaitOnEvents
25+
// CHECK: ZE ---> zeCommandListAppendSignalEvent
26+
// CHECK: ) ---> pi_result : PI_SUCCESS
27+
auto BarrierEvent1 = Q1.ext_oneapi_submit_barrier({Event1, Event2});
28+
BarrierEvent1.wait();
29+
30+
// Check that kernel events are completed after waiting for barrier event.
31+
assert(Event1.get_info<sycl::info::event::command_execution_status>() ==
32+
sycl::info::event_command_status::complete);
33+
assert(Event2.get_info<sycl::info::event::command_execution_status>() ==
34+
sycl::info::event_command_status::complete);
35+
36+
// Test case 2 - events in the barrier's waitlist are from the same queue.
37+
std::cout << "Test2" << std::endl;
38+
auto Event3 = Q1.submit(
39+
[&](sycl::handler &cgh) { cgh.single_task<class kernel3>([]() {}); });
40+
auto Event4 = Q1.submit(
41+
[&](sycl::handler &cgh) { cgh.single_task<class kernel4>([]() {}); });
42+
43+
// CHECK: Test2
44+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
45+
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
46+
// CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent
47+
// CHECK-NOT: ZE ---> zeCommandListAppendBarrier
48+
// CHECK: ) ---> pi_result : PI_SUCCESS
49+
auto BarrierEvent2 = Q1.ext_oneapi_submit_barrier({Event3, Event4});
50+
BarrierEvent2.wait();
51+
52+
// Check that kernel events are completed after waiting for barrier event.
53+
assert(Event3.get_info<sycl::info::event::command_execution_status>() ==
54+
sycl::info::event_command_status::complete);
55+
assert(Event4.get_info<sycl::info::event::command_execution_status>() ==
56+
sycl::info::event_command_status::complete);
57+
58+
// Test case 3 - submit barrier after queue sync, i.e. last event = nullptr.
59+
std::cout << "Test3" << std::endl;
60+
auto Event5 = Q2.submit(
61+
[&](sycl::handler &cgh) { cgh.single_task<class kernel5>([]() {}); });
62+
auto Event6 = Q2.submit(
63+
[&](sycl::handler &cgh) { cgh.single_task<class kernel6>([]() {}); });
64+
Q2.wait();
65+
66+
// CHECK: Test3
67+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
68+
// CHECK: ZE ---> zeEventCreate
69+
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
70+
// CHECK: ZE ---> zeCommandListAppendSignalEvent
71+
// CHECK: ) ---> pi_result : PI_SUCCESS
72+
auto BarrierEvent3 = Q2.ext_oneapi_submit_barrier({Event5, Event6});
73+
BarrierEvent3.wait();
74+
75+
// Check that kernel events are completed after waiting for barrier event.
76+
assert(Event5.get_info<sycl::info::event::command_execution_status>() ==
77+
sycl::info::event_command_status::complete);
78+
assert(Event6.get_info<sycl::info::event::command_execution_status>() ==
79+
sycl::info::event_command_status::complete);
80+
81+
// Test case 4 - last command event is not in the waitlist.
82+
std::cout << "Test4" << std::endl;
83+
auto Event7 = Q2.submit(
84+
[&](sycl::handler &cgh) { cgh.single_task<class kernel7>([]() {}); });
85+
auto Event8 = Q2.submit(
86+
[&](sycl::handler &cgh) { cgh.single_task<class kernel8>([]() {}); });
87+
auto Event9 = Q2.submit(
88+
[&](sycl::handler &cgh) { cgh.single_task<class kernel9>([]() {}); });
89+
90+
// CHECK: Test4
91+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
92+
// CHECK-NOT: ZE ---> zeEventCreate
93+
// CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents
94+
// CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent
95+
// CHECK: ) ---> pi_result : PI_SUCCESS
96+
auto BarrierEvent4 = Q2.ext_oneapi_submit_barrier({Event7, Event8});
97+
BarrierEvent4.wait();
98+
99+
// Check that kernel events are completed after waiting for barrier event.
100+
assert(Event7.get_info<sycl::info::event::command_execution_status>() ==
101+
sycl::info::event_command_status::complete);
102+
assert(Event8.get_info<sycl::info::event::command_execution_status>() ==
103+
sycl::info::event_command_status::complete);
104+
105+
// Test case 5 - events in the barrier's waitlist are from the same queue Q2,
106+
// but submission to the different queue Q1 which is synced.
107+
std::cout << "Test5" << std::endl;
108+
Q1.wait();
109+
auto Event10 = Q2.submit(
110+
[&](sycl::handler &cgh) { cgh.single_task<class kernel10>([]() {}); });
111+
auto Event11 = Q2.submit(
112+
[&](sycl::handler &cgh) { cgh.single_task<class kernel11>([]() {}); });
113+
114+
// CHECK: Test5
115+
// CHECK: ---> piEnqueueEventsWaitWithBarrier(
116+
// CHECK: ZE ---> zeEventCreate
117+
// CHECK: ZE ---> zeCommandListAppendWaitOnEvents
118+
// CHECK: ZE ---> zeCommandListAppendSignalEvent
119+
// CHECK: ) ---> pi_result : PI_SUCCESS
120+
auto BarrierEvent5 = Q1.ext_oneapi_submit_barrier({Event10, Event11});
121+
BarrierEvent5.wait();
122+
123+
// Check that kernel events are completed after waiting for barrier event.
124+
assert(Event10.get_info<sycl::info::event::command_execution_status>() ==
125+
sycl::info::event_command_status::complete);
126+
assert(Event11.get_info<sycl::info::event::command_execution_status>() ==
127+
sycl::info::event_command_status::complete);
128+
129+
return 0;
130+
}

0 commit comments

Comments
 (0)