Skip to content

Commit 6327221

Browse files
[SYCL] Remove redundant event dependencies for in-order queues (#2929)
Avoid passing explicit dependencies on events from the same in-order queue since the order of execution is already guaranteed in that case.
1 parent 2f1f316 commit 6327221

File tree

4 files changed

+155
-4
lines changed

4 files changed

+155
-4
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,12 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
494494
return;
495495
}
496496

497+
// Do not add redundant event dependencies for in-order queues.
498+
const QueueImplPtr &WorkerQueue = getWorkerQueue();
499+
if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue &&
500+
WorkerQueue->has_property<property::queue::in_order>())
501+
return;
502+
497503
ContextImplPtr DepEventContext = DepEvent->getContextImpl();
498504
// If contexts don't match we'll connect them using host task
499505
if (DepEventContext != Context && !Context->is_host()) {
@@ -507,6 +513,8 @@ ContextImplPtr Command::getContext() const {
507513
return detail::getSyclObjImpl(MQueue->get_context());
508514
}
509515

516+
QueueImplPtr Command::getWorkerQueue() const { return MQueue; }
517+
510518
void Command::addDep(DepDesc NewDep) {
511519
if (NewDep.MDepCommand) {
512520
processDepEvent(NewDep.MDepCommand->getEvent(), NewDep);
@@ -1128,12 +1136,15 @@ void MemCpyCommand::emitInstrumentationData() {
11281136
}
11291137

11301138
ContextImplPtr MemCpyCommand::getContext() const {
1131-
const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue;
1139+
const QueueImplPtr &Queue = getWorkerQueue();
11321140
return detail::getSyclObjImpl(Queue->get_context());
11331141
}
11341142

1143+
QueueImplPtr MemCpyCommand::getWorkerQueue() const {
1144+
return MQueue->is_host() ? MSrcQueue : MQueue;
1145+
}
1146+
11351147
cl_int MemCpyCommand::enqueueImp() {
1136-
QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue;
11371148
waitForPreparedHostEvents();
11381149
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
11391150

@@ -1266,12 +1277,16 @@ void MemCpyCommandHost::emitInstrumentationData() {
12661277
}
12671278

12681279
ContextImplPtr MemCpyCommandHost::getContext() const {
1269-
const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue;
1280+
const QueueImplPtr &Queue = getWorkerQueue();
12701281
return detail::getSyclObjImpl(Queue->get_context());
12711282
}
12721283

1284+
QueueImplPtr MemCpyCommandHost::getWorkerQueue() const {
1285+
return MQueue->is_host() ? MSrcQueue : MQueue;
1286+
}
1287+
12731288
cl_int MemCpyCommandHost::enqueueImp() {
1274-
QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue;
1289+
const QueueImplPtr &Queue = getWorkerQueue();
12751290
waitForPreparedHostEvents();
12761291
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
12771292
std::vector<RT::PiEvent> RawEvents = getPiEvents(EventImpls);

sycl/source/detail/scheduler/commands.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,10 @@ class Command {
181181

182182
virtual ContextImplPtr getContext() const;
183183

184+
/// Get the queue this command will be submitted to. Could differ from MQueue
185+
/// for memory copy commands.
186+
virtual QueueImplPtr getWorkerQueue() const;
187+
184188
protected:
185189
EventImplPtr MEvent;
186190
QueueImplPtr MQueue;
@@ -443,6 +447,7 @@ class MemCpyCommand : public Command {
443447
const Requirement *getRequirement() const final override { return &MDstReq; }
444448
void emitInstrumentationData() final override;
445449
ContextImplPtr getContext() const final override;
450+
QueueImplPtr getWorkerQueue() const final override;
446451

447452
private:
448453
cl_int enqueueImp() final override;
@@ -466,6 +471,7 @@ class MemCpyCommandHost : public Command {
466471
const Requirement *getRequirement() const final override { return &MDstReq; }
467472
void emitInstrumentationData() final override;
468473
ContextImplPtr getContext() const final override;
474+
QueueImplPtr getWorkerQueue() const final override;
469475

470476
private:
471477
cl_int enqueueImp() final override;

sycl/unittests/scheduler/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ add_sycl_unittest(SchedulerTests OBJECT
1010
LeavesCollection.cpp
1111
NoUnifiedHostMemory.cpp
1212
StreamInitDependencyOnHost.cpp
13+
InOrderQueueDeps.cpp
1314
utils.cpp
1415
)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
//==------------ InOrderQueueueueDeps.cpp --- Scheduler unit tests ---------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "SchedulerTest.hpp"
10+
#include "SchedulerTestUtils.hpp"
11+
12+
#include <helpers/PiMock.hpp>
13+
14+
#include <iostream>
15+
#include <memory>
16+
17+
using namespace cl::sycl;
18+
19+
static pi_result
20+
redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
21+
void *host_ptr, pi_mem *ret_mem,
22+
const pi_mem_properties *properties = nullptr) {
23+
return PI_SUCCESS;
24+
}
25+
26+
static pi_result redefinedMemRelease(pi_mem mem) { return PI_SUCCESS; }
27+
28+
static pi_result redefinedEnqueueMemBufferReadRect(
29+
pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
30+
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
31+
pi_buff_rect_region region, size_t buffer_row_pitch,
32+
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
33+
void *ptr, pi_uint32 num_events_in_wait_list,
34+
const pi_event *event_wait_list, pi_event *event) {
35+
EXPECT_EQ(num_events_in_wait_list, 0u);
36+
*event = reinterpret_cast<pi_event>(1);
37+
return PI_SUCCESS;
38+
}
39+
40+
static pi_result redefinedEnqueueMemBufferWriteRect(
41+
pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
42+
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
43+
pi_buff_rect_region region, size_t buffer_row_pitch,
44+
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
45+
const void *ptr, pi_uint32 num_events_in_wait_list,
46+
const pi_event *event_wait_list, pi_event *event) {
47+
EXPECT_EQ(num_events_in_wait_list, 0u);
48+
*event = reinterpret_cast<pi_event>(1);
49+
return PI_SUCCESS;
50+
}
51+
52+
static pi_result redefinedEnqueueMemBufferMap(
53+
pi_queue command_queue, pi_mem buffer, pi_bool blocking_map,
54+
pi_map_flags map_flags, size_t offset, size_t size,
55+
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
56+
pi_event *event, void **ret_map) {
57+
EXPECT_EQ(num_events_in_wait_list, 0u);
58+
*event = reinterpret_cast<pi_event>(1);
59+
return PI_SUCCESS;
60+
}
61+
62+
static pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
63+
void *mapped_ptr,
64+
pi_uint32 num_events_in_wait_list,
65+
const pi_event *event_wait_list,
66+
pi_event *event) {
67+
EXPECT_EQ(num_events_in_wait_list, 0u);
68+
*event = reinterpret_cast<pi_event>(1);
69+
return PI_SUCCESS;
70+
}
71+
72+
static pi_result redefinedEventsWait(pi_uint32 num_events,
73+
const pi_event *event_list) {
74+
return PI_SUCCESS;
75+
}
76+
77+
pi_result redefinedEventRelease(pi_event event) { return PI_SUCCESS; }
78+
79+
TEST_F(SchedulerTest, InOrderQueueDeps) {
80+
default_selector Selector;
81+
platform Plt{default_selector()};
82+
if (Plt.is_host()) {
83+
std::cout << "Not run due to host-only environment\n";
84+
return;
85+
}
86+
87+
unittest::PiMock Mock{Plt};
88+
Mock.redefine<detail::PiApiKind::piMemBufferCreate>(redefinedMemBufferCreate);
89+
Mock.redefine<detail::PiApiKind::piMemRelease>(redefinedMemRelease);
90+
Mock.redefine<detail::PiApiKind::piEnqueueMemBufferReadRect>(
91+
redefinedEnqueueMemBufferReadRect);
92+
Mock.redefine<detail::PiApiKind::piEnqueueMemBufferWriteRect>(
93+
redefinedEnqueueMemBufferWriteRect);
94+
Mock.redefine<detail::PiApiKind::piEnqueueMemBufferMap>(
95+
redefinedEnqueueMemBufferMap);
96+
Mock.redefine<detail::PiApiKind::piEnqueueMemUnmap>(redefinedEnqueueMemUnmap);
97+
Mock.redefine<detail::PiApiKind::piEventsWait>(redefinedEventsWait);
98+
Mock.redefine<detail::PiApiKind::piEventRelease>(redefinedEventRelease);
99+
100+
context Ctx{Plt};
101+
queue InOrderQueue{Ctx, Selector, property::queue::in_order()};
102+
cl::sycl::detail::QueueImplPtr InOrderQueueImpl =
103+
detail::getSyclObjImpl(InOrderQueue);
104+
105+
device HostDevice;
106+
std::shared_ptr<detail::queue_impl> DefaultHostQueue{
107+
new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};
108+
109+
MockScheduler MS;
110+
111+
int val;
112+
buffer<int, 1> Buf(&val, range<1>(1));
113+
detail::Requirement Req = getMockRequirement(Buf);
114+
115+
detail::MemObjRecord *Record =
116+
MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req);
117+
MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl);
118+
MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);
119+
120+
// Check that sequential memory movements submitted to the same in-order
121+
// queue do not depend on each other.
122+
detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue);
123+
detail::EnqueueResultT Res;
124+
MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
125+
Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl);
126+
MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
127+
Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue);
128+
MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
129+
}

0 commit comments

Comments
 (0)