Skip to content

Commit 079fc97

Browse files
[SYCL][Graph] Fixes enqueue barrier slowdown (#11933)
The implementation of ext_oneapi_submit_barrier involved exponential slowdown due to unnecessary extra dependencies to barrier nodes. This PR solves this issue by: 1) improving the function that searches for graph leaves (exit nodes) 2) removing unnecessary dependencies to previous barriers when adding new nodes. Addresses Issue: #11915
1 parent fdfaadb commit 079fc97

File tree

4 files changed

+33
-9
lines changed

4 files changed

+33
-9
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -416,15 +416,13 @@ void graph_impl::makeEdge(std::shared_ptr<node_impl> Src,
416416

417417
std::vector<sycl::detail::EventImplPtr> graph_impl::getExitNodesEvents() {
418418
std::vector<sycl::detail::EventImplPtr> Events;
419-
auto EnqueueExitNodesEvents = [&](std::shared_ptr<node_impl> &Node,
420-
std::deque<std::shared_ptr<node_impl>> &) {
419+
420+
for (auto Node : MNodeStorage) {
421421
if (Node->MSuccessors.empty()) {
422422
Events.push_back(getEventForNode(Node));
423423
}
424-
return false;
425-
};
424+
}
426425

427-
searchDepthFirst(EnqueueExitNodesEvents);
428426
return Events;
429427
}
430428

sycl/source/detail/graph_impl.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,24 @@ class graph_impl {
784784
/// @return vector of events associated to exit nodes.
785785
std::vector<sycl::detail::EventImplPtr> getExitNodesEvents();
786786

787+
/// Removes all Barrier nodes from the list of extra dependencies
788+
/// MExtraDependencies.
789+
/// @return vector of events associated to previous barrier nodes.
790+
std::vector<sycl::detail::EventImplPtr>
791+
removeBarriersFromExtraDependencies() {
792+
std::vector<sycl::detail::EventImplPtr> Events;
793+
for (auto It = MExtraDependencies.begin();
794+
It != MExtraDependencies.end();) {
795+
if ((*It)->MCGType == sycl::detail::CG::Barrier) {
796+
Events.push_back(getEventForNode(*It));
797+
It = MExtraDependencies.erase(It);
798+
} else {
799+
++It;
800+
}
801+
}
802+
return Events;
803+
}
804+
787805
private:
788806
/// Iterate over the graph depth-first and run \p NodeFunc on each node.
789807
/// @param NodeFunc A function which receives as input a node in the graph to
@@ -861,7 +879,7 @@ class graph_impl {
861879
/// added to this graph.
862880
/// This list is mainly used by barrier nodes which must be considered
863881
/// as predecessors for all nodes subsequently added to the graph.
864-
std::vector<std::shared_ptr<node_impl>> MExtraDependencies;
882+
std::list<std::shared_ptr<node_impl>> MExtraDependencies;
865883
};
866884

867885
/// Class representing the implementation of command_graph<executable>.

sycl/source/handler.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,14 @@ event handler::finalize() {
389389
// nodes/events of the graph
390390
if (MEventsWaitWithBarrier.size() == 0) {
391391
MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
392+
// Graph-wide barriers take precedence over previous one.
393+
// We therefore remove the previous ones from ExtraDependencies list.
394+
// The current barrier is then added to this list in the graph_impl.
395+
std::vector<detail::EventImplPtr> EventsBarriers =
396+
GraphImpl->removeBarriersFromExtraDependencies();
397+
MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
398+
std::begin(EventsBarriers),
399+
std::end(EventsBarriers));
392400
}
393401
CGData.MEvents.insert(std::end(CGData.MEvents),
394402
std::begin(MEventsWaitWithBarrier),

sycl/unittests/Extensions/CommandGraph.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,7 +1458,7 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) {
14581458
// (B2)
14591459
// /|\
14601460
// / | \
1461-
// (6) (7) (8) (those nodes also have B1 as a predecessor)
1461+
// (6) (7) (8)
14621462
ASSERT_EQ(GraphImpl->MRoots.size(), 3lu);
14631463
for (auto Root : GraphImpl->MRoots) {
14641464
auto Node = Root.lock();
@@ -1468,7 +1468,7 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) {
14681468
ASSERT_EQ(GraphImpl->getEventForNode(SuccNode),
14691469
sycl::detail::getSyclObjImpl(Barrier1));
14701470
ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu);
1471-
ASSERT_EQ(SuccNode->MSuccessors.size(), 6lu);
1471+
ASSERT_EQ(SuccNode->MSuccessors.size(), 3lu);
14721472
for (auto Succ1 : SuccNode->MSuccessors) {
14731473
auto SuccBarrier1 = Succ1.lock();
14741474
if (SuccBarrier1->MCGType == sycl::detail::CG::Barrier) {
@@ -1479,7 +1479,7 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) {
14791479
for (auto Succ2 : SuccBarrier1->MSuccessors) {
14801480
auto SuccBarrier2 = Succ2.lock();
14811481
// Nodes 6, 7, 8
1482-
ASSERT_EQ(SuccBarrier2->MPredecessors.size(), 2lu);
1482+
ASSERT_EQ(SuccBarrier2->MPredecessors.size(), 1lu);
14831483
ASSERT_EQ(SuccBarrier2->MSuccessors.size(), 0lu);
14841484
}
14851485
} else {

0 commit comments

Comments
 (0)