Skip to content

Commit 51dcb29

Browse files
[SYCL] Fix in-order queue dependencies for no scheduler path (#15412)
Runtime could submit kernel directly to scheduler if no buffers/streams are used and if event dependencies are already handled by queue (in case if it is in-order one). Although check if dependencies are submitted to the same queue was missed. Now we add events submitted to another queue but on the same context to event list in kernel launching. --------- Signed-off-by: Tikhomirova, Kseniya <[email protected]>
1 parent 7989104 commit 51dcb29

File tree

4 files changed

+79
-5
lines changed

4 files changed

+79
-5
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,8 @@ static std::string commandToName(Command::CommandType Type) {
229229
#endif
230230

231231
std::vector<ur_event_handle_t>
232-
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
232+
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
233+
const QueueImplPtr &CommandQueue, bool IsHostTaskCommand) {
233234
std::vector<ur_event_handle_t> RetUrEvents;
234235
for (auto &EventImpl : EventImpls) {
235236
auto Handle = EventImpl->getHandle();
@@ -240,8 +241,8 @@ Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
240241
// At this stage dependency is definitely ur task and need to check if
241242
// current one is a host task. In this case we should not skip ur event due
242243
// to different sync mechanisms for different task types on in-order queue.
243-
if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue &&
244-
MWorkerQueue->isInOrder() && !isHostTask())
244+
if (CommandQueue && EventImpl->getWorkerQueue() == CommandQueue &&
245+
CommandQueue->isInOrder() && !IsHostTaskCommand)
245246
continue;
246247

247248
RetUrEvents.push_back(Handle);
@@ -250,6 +251,11 @@ Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
250251
return RetUrEvents;
251252
}
252253

254+
std::vector<ur_event_handle_t>
255+
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
256+
return getUrEvents(EventImpls, MWorkerQueue, isHostTask());
257+
}
258+
253259
// This function is implemented (duplicating getUrEvents a lot) as short term
254260
// solution for the issue that barrier with wait list could not
255261
// handle empty ur event handles when kernel is enqueued on host task

sycl/source/detail/scheduler/commands.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ class Command {
240240
/// in order queue
241241
std::vector<ur_event_handle_t>
242242
getUrEvents(const std::vector<EventImplPtr> &EventImpls) const;
243+
244+
static std::vector<ur_event_handle_t>
245+
getUrEvents(const std::vector<EventImplPtr> &EventImpls,
246+
const QueueImplPtr &CommandQueue, bool IsHostTaskCommand);
243247
/// Collect UR events from EventImpls and filter out some of them in case of
244248
/// in order queue. Does blocking enqueue if event is expected to produce ur
245249
/// event but has empty native handle.

sycl/source/handler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,8 @@ event handler::finalize() {
258258
// the graph is not changed, then this faster path is used to submit
259259
// kernel bypassing scheduler and avoiding CommandGroup, Command objects
260260
// creation.
261-
262-
std::vector<ur_event_handle_t> RawEvents;
261+
std::vector<ur_event_handle_t> RawEvents =
262+
detail::Command::getUrEvents(impl->CGData.MEvents, MQueue, false);
263263
detail::EventImplPtr NewEvent;
264264

265265
#ifdef XPTI_ENABLE_INSTRUMENTATION

sycl/unittests/scheduler/InOrderQueueDeps.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,68 @@ TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) {
125125
EXPECT_TRUE(BarrierCalled);
126126
}
127127
}
128+
129+
std::vector<size_t> KernelEventListSize;
130+
131+
inline ur_result_t customEnqueueKernelLaunch(void *pParams) {
132+
auto params = *static_cast<ur_enqueue_kernel_launch_params_t *>(pParams);
133+
KernelEventListSize.push_back(*params.pnumEventsInWaitList);
134+
return UR_RESULT_SUCCESS;
135+
}
136+
137+
TEST_F(SchedulerTest, TwoInOrderQueuesOnSameContext) {
138+
KernelEventListSize.clear();
139+
sycl::unittest::UrMock<> Mock;
140+
mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch",
141+
&customEnqueueKernelLaunch);
142+
143+
sycl::platform Plt = sycl::platform();
144+
145+
context Ctx{Plt};
146+
queue InOrderQueueFirst{Ctx, default_selector_v, property::queue::in_order()};
147+
queue InOrderQueueSecond{Ctx, default_selector_v,
148+
property::queue::in_order()};
149+
150+
event EvFirst = InOrderQueueFirst.submit(
151+
[&](sycl::handler &CGH) { CGH.single_task<TestKernel<>>([] {}); });
152+
std::ignore = InOrderQueueSecond.submit([&](sycl::handler &CGH) {
153+
CGH.depends_on(EvFirst);
154+
CGH.single_task<TestKernel<>>([] {});
155+
});
156+
157+
InOrderQueueFirst.wait();
158+
InOrderQueueSecond.wait();
159+
160+
ASSERT_EQ(KernelEventListSize.size(), 2u);
161+
EXPECT_EQ(KernelEventListSize[0] /*EventsCount*/, 0u);
162+
EXPECT_EQ(KernelEventListSize[1] /*EventsCount*/, 1u);
163+
}
164+
165+
TEST_F(SchedulerTest, InOrderQueueNoSchedulerPath) {
166+
KernelEventListSize.clear();
167+
sycl::unittest::UrMock<> Mock;
168+
mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch",
169+
&customEnqueueKernelLaunch);
170+
171+
sycl::platform Plt = sycl::platform();
172+
173+
context Ctx{Plt};
174+
queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()};
175+
176+
event EvFirst = InOrderQueue.submit(
177+
[&](sycl::handler &CGH) { CGH.single_task<TestKernel<>>([] {}); });
178+
std::ignore = InOrderQueue.submit([&](sycl::handler &CGH) {
179+
CGH.depends_on(EvFirst);
180+
CGH.single_task<TestKernel<>>([] {});
181+
});
182+
183+
InOrderQueue.wait();
184+
185+
ASSERT_EQ(KernelEventListSize.size(), 2u);
186+
EXPECT_EQ(KernelEventListSize[0] /*EventsCount*/, 0u);
187+
// native device events for device kernel submitted to the same in-order queue
188+
// don't need to be explicitly passed as dependencies
189+
EXPECT_EQ(KernelEventListSize[1] /*EventsCount*/, 0u);
190+
}
191+
128192
} // anonymous namespace

0 commit comments

Comments
 (0)