Skip to content

Commit 92ddf8d

Browse files
EwanCmfrancepilloisBensuo
authored
[SYCL][Graph] Fix issues with sycl_ext_oneapi_graph subgraphs (#10822)
This PR addresses some issues with the [sycl_ext_oneapi_graph](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc) extension regarding sub-graphs. A sub-graph is when an executable `command_graph` is added as a node in another graph. Currently, adding an executable graph object as a subgraph permanently connects the child graph's root nodes to the parent graph. This affects subsequent independent submissions of the executable child graph object and later additions as a subgraph node. This is incorrect behaviour, as the previous uses as a sub-graph should have no effect on the state of the executable graph object. Fixed in this PR by duplicating the nodes of a subgraph when added to a parent, enabling multiple parents. As a result, nodes of the initial subgraph and the one of the main graph are no longer the exact same (shared_pointer) but two different nodes with similar content. ## Authors Co-authored-by: Maxime France-Pillois <[email protected]> --------- Co-authored-by: Maxime France-Pillois <[email protected]> Co-authored-by: Ben Tracy <[email protected]>
1 parent 5c30815 commit 92ddf8d

File tree

10 files changed

+299
-53
lines changed

10 files changed

+299
-53
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,17 @@ bool checkForRequirement(sycl::detail::AccessorImplHost *Req,
7171
}
7272
return SuccessorAddedDep;
7373
}
74+
75+
void duplicateNode(const std::shared_ptr<node_impl> Node,
76+
std::shared_ptr<node_impl> &NodeCopy) {
77+
if (Node->MCGType == sycl::detail::CG::None) {
78+
NodeCopy = std::make_shared<node_impl>();
79+
NodeCopy->MCGType = sycl::detail::CG::None;
80+
} else {
81+
NodeCopy = std::make_shared<node_impl>(Node->MCGType, Node->getCGCopy());
82+
}
83+
}
84+
7485
} // anonymous namespace
7586

7687
void exec_graph_impl::schedule() {
@@ -81,7 +92,7 @@ void exec_graph_impl::schedule() {
8192
}
8293
}
8394

84-
std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
95+
std::shared_ptr<node_impl> graph_impl::addNodesToExits(
8596
const std::list<std::shared_ptr<node_impl>> &NodeList) {
8697
// Find all input and output nodes from the node list
8798
std::vector<std::shared_ptr<node_impl>> Inputs;
@@ -104,6 +115,36 @@ std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
104115
return this->add(Outputs);
105116
}
106117

118+
std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
119+
const std::shared_ptr<exec_graph_impl> &SubGraphExec) {
120+
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;
121+
122+
std::list<std::shared_ptr<node_impl>> NodesList = SubGraphExec->getSchedule();
123+
std::list<std::shared_ptr<node_impl>> NewNodesList{NodesList.size()};
124+
125+
// Duplication of nodes
126+
for (auto NodeIt = NodesList.end(), NewNodesIt = NewNodesList.end();
127+
NodeIt != NodesList.begin();) {
128+
--NodeIt;
129+
--NewNodesIt;
130+
auto Node = *NodeIt;
131+
std::shared_ptr<node_impl> NodeCopy;
132+
duplicateNode(Node, NodeCopy);
133+
*NewNodesIt = NodeCopy;
134+
NodesMap.insert({Node, NodeCopy});
135+
for (auto &NextNode : Node->MSuccessors) {
136+
if (NodesMap.find(NextNode) != NodesMap.end()) {
137+
auto Successor = NodesMap[NextNode];
138+
NodeCopy->registerSuccessor(Successor, NodeCopy);
139+
} else {
140+
assert("Node duplication failed. A duplicated node is missing.");
141+
}
142+
}
143+
}
144+
145+
return addNodesToExits(NewNodesList);
146+
}
147+
107148
void graph_impl::addRoot(const std::shared_ptr<node_impl> &Root) {
108149
MRoots.insert(Root);
109150
}
@@ -313,6 +354,11 @@ void exec_graph_impl::createCommandBuffers(sycl::device Device) {
313354

314355
// TODO extract kernel bundle logic from enqueueImpKernel
315356
for (const auto &Node : MSchedule) {
357+
// Empty nodes are not processed as other nodes, but only their
358+
// dependencies are propagated in findRealDeps
359+
if (Node->isEmpty())
360+
continue;
361+
316362
sycl::detail::CG::CGTYPE type = Node->MCGType;
317363
// If the node is a kernel with no special requirements we can enqueue it
318364
// directly.
@@ -453,8 +499,9 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
453499
"Error during emulated graph command group submission.");
454500
}
455501
ScheduledEvents.push_back(NewEvent);
456-
} else {
457-
502+
} else if (!NodeImpl->isEmpty()) {
503+
// Empty nodes are node processed as other nodes, but only their
504+
// dependencies are propagated in findRealDeps
458505
sycl::detail::EventImplPtr EventImpl =
459506
sycl::detail::Scheduler::getInstance().addCG(NodeImpl->getCGCopy(),
460507
Queue);

sycl/source/detail/graph_impl.hpp

Lines changed: 64 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,51 @@ class node_impl {
7373
std::unique_ptr<sycl::detail::CG> &&CommandGroup)
7474
: MCGType(CGType), MCommandGroup(std::move(CommandGroup)) {}
7575

76+
/// Tests if two nodes have the same content,
77+
/// i.e. same command group
78+
/// This function should only be used for internal purposes.
79+
/// A true return from this operator is not a guarantee that the nodes are
80+
/// equals according to the Common reference semantics. But this function is
81+
/// an helper to verify that two nodes contain equivalent Command Groups.
82+
/// @param Node node to compare with
83+
/// @return true if two nodes have equivament command groups. false otherwise.
84+
bool operator==(const node_impl &Node) {
85+
if (MCGType != Node.MCGType)
86+
return false;
87+
88+
switch (MCGType) {
89+
case sycl::detail::CG::CGTYPE::Kernel: {
90+
sycl::detail::CGExecKernel *ExecKernelA =
91+
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
92+
sycl::detail::CGExecKernel *ExecKernelB =
93+
static_cast<sycl::detail::CGExecKernel *>(Node.MCommandGroup.get());
94+
return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0;
95+
}
96+
case sycl::detail::CG::CGTYPE::CopyUSM: {
97+
sycl::detail::CGCopyUSM *CopyA =
98+
static_cast<sycl::detail::CGCopyUSM *>(MCommandGroup.get());
99+
sycl::detail::CGCopyUSM *CopyB =
100+
static_cast<sycl::detail::CGCopyUSM *>(MCommandGroup.get());
101+
return (CopyA->getSrc() == CopyB->getSrc()) &&
102+
(CopyA->getDst() == CopyB->getDst()) &&
103+
(CopyA->getLength() == CopyB->getLength());
104+
}
105+
case sycl::detail::CG::CGTYPE::CopyAccToAcc:
106+
case sycl::detail::CG::CGTYPE::CopyAccToPtr:
107+
case sycl::detail::CG::CGTYPE::CopyPtrToAcc: {
108+
sycl::detail::CGCopy *CopyA =
109+
static_cast<sycl::detail::CGCopy *>(MCommandGroup.get());
110+
sycl::detail::CGCopy *CopyB =
111+
static_cast<sycl::detail::CGCopy *>(MCommandGroup.get());
112+
return (CopyA->getSrc() == CopyB->getSrc()) &&
113+
(CopyA->getDst() == CopyB->getDst());
114+
}
115+
default:
116+
assert(false && "Unexpected command group type!");
117+
return false;
118+
}
119+
}
120+
76121
/// Recursively add nodes to execution stack.
77122
/// @param NodeImpl Node to schedule.
78123
/// @param Schedule Execution ordering to add node to.
@@ -83,10 +128,8 @@ class node_impl {
83128
if (std::find(Schedule.begin(), Schedule.end(), Next) == Schedule.end())
84129
Next->sortTopological(Next, Schedule);
85130
}
86-
// We don't need to schedule empty nodes as they are only used when
87-
// calculating dependencies
88-
if (!NodeImpl->isEmpty())
89-
Schedule.push_front(NodeImpl);
131+
132+
Schedule.push_front(NodeImpl);
90133
}
91134

92135
/// Checks if this node has a given requirement.
@@ -171,26 +214,16 @@ class node_impl {
171214
/// Tests is the caller is similar to Node
172215
/// @return True if the two nodes are similar
173216
bool isSimilar(std::shared_ptr<node_impl> Node) {
174-
if (MCGType != Node->MCGType)
175-
return false;
176-
177217
if (MSuccessors.size() != Node->MSuccessors.size())
178218
return false;
179219

180220
if (MPredecessors.size() != Node->MPredecessors.size())
181221
return false;
182222

183-
if ((MCGType == sycl::detail::CG::CGTYPE::Kernel) &&
184-
(Node->MCGType == sycl::detail::CG::CGTYPE::Kernel)) {
185-
sycl::detail::CGExecKernel *ExecKernelA =
186-
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
187-
sycl::detail::CGExecKernel *ExecKernelB =
188-
static_cast<sycl::detail::CGExecKernel *>(Node->MCommandGroup.get());
223+
if (*this == *Node.get())
224+
return true;
189225

190-
if (ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) != 0)
191-
return false;
192-
}
193-
return true;
226+
return false;
194227
}
195228

196229
/// Recursive traversal of successor nodes checking for
@@ -336,11 +369,12 @@ class graph_impl {
336369
"No event has been recorded for the specified graph node");
337370
}
338371

339-
/// Adds sub-graph nodes from an executable graph to this graph.
340-
/// @param NodeList List of nodes from sub-graph in schedule order.
372+
/// Duplicates and Adds sub-graph nodes from an executable graph to this
373+
/// graph.
374+
/// @param SubGraphExec sub-graph to add to the parent.
341375
/// @return An empty node is used to schedule dependencies on this sub-graph.
342376
std::shared_ptr<node_impl>
343-
addSubgraphNodes(const std::list<std::shared_ptr<node_impl>> &NodeList);
377+
addSubgraphNodes(const std::shared_ptr<exec_graph_impl> &SubGraphExec);
344378

345379
/// Query for the context tied to this graph.
346380
/// @return Context associated with graph.
@@ -486,6 +520,12 @@ class graph_impl {
486520
/// Insert node into list of root nodes.
487521
/// @param Root Node to add to list of root nodes.
488522
void addRoot(const std::shared_ptr<node_impl> &Root);
523+
524+
/// Adds nodes to the exit nodes of this graph.
525+
/// @param NodeList List of nodes from sub-graph in schedule order.
526+
/// @return An empty node is used to schedule dependencies on this sub-graph.
527+
std::shared_ptr<node_impl>
528+
addNodesToExits(const std::list<std::shared_ptr<node_impl>> &NodeList);
489529
};
490530

491531
/// Class representing the implementation of command_graph<executable>.
@@ -537,6 +577,10 @@ class exec_graph_impl {
537577
return MSchedule;
538578
}
539579

580+
/// Query the graph_impl.
581+
/// @return pointer to the graph_impl MGraphImpl
582+
const std::shared_ptr<graph_impl> &getGraphImpl() const { return MGraphImpl; }
583+
540584
private:
541585
/// Create a command-group for the node and add it to command-buffer by going
542586
/// through the scheduler.

sycl/source/handler.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,9 @@ void handler::ext_oneapi_graph(
13611361
}
13621362
// Store the node representing the subgraph in the handler so that we can
13631363
// return it to the user later.
1364-
MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl->getSchedule());
1364+
// The nodes of the subgraph are duplicated when added to its parents.
1365+
// This avoids changing properties of the graph added as a subgraph.
1366+
MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl);
13651367

13661368
// If we are recording an in-order queue remember the subgraph node, so it
13671369
// can be used as a dependency for any more nodes recorded from this queue.

sycl/test-e2e/Graph/Explicit/sub_graph_multiple_submission.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//
77
// CHECK-NOT: LEAK
88

9-
// XFAIL:*
10-
// Submit a graph as a subgraph more than once doesn't yet work.
11-
129
#define GRAPH_E2E_EXPLICIT
1310

1411
#include "../Inputs/sub_graph_multiple_submission.cpp"

sycl/test-e2e/Graph/Explicit/sub_graph_two_parent_graphs.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//
77
// CHECK-NOT: LEAK
88

9-
// XFAIL: *
10-
// Subgraph doesn't work properly in second parent graph
11-
129
#define GRAPH_E2E_EXPLICIT
1310

1411
#include "../Inputs/sub_graph_two_parent_graphs.cpp"

sycl/test-e2e/Graph/Inputs/sub_graph_multiple_submission.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ int main() {
6262
Queue.memcpy(Output.data(), X, N * sizeof(float), E).wait();
6363

6464
for (size_t i = 0; i < N; i++) {
65-
assert(Output[i] == -6.25f);
65+
assert(Output[i] == -4.5f);
6666
}
6767

6868
sycl::free(X, Queue);

sycl/test-e2e/Graph/Inputs/sub_graph_two_parent_graphs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ int main() {
5050
auto ExecGraphA = GraphA.finalize();
5151

5252
auto B1 = add_node(GraphB, Queue, [&](handler &CGH) {
53-
CGH.parallel_for(N, [=](id<1> it) { X[it] = static_cast<float>(i); });
53+
CGH.parallel_for(N, [=](id<1> it) { X[it] = static_cast<float>(it); });
5454
});
5555

5656
auto B2 = add_node(

sycl/test-e2e/Graph/RecordReplay/sub_graph_multiple_submission.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//
77
// CHECK-NOT: LEAK
88

9-
// XFAIL:*
10-
// Submit a graph as a subgraph more than once doesn't yet work.
11-
129
#define GRAPH_E2E_RECORD_REPLAY
1310

1411
#include "../Inputs/sub_graph_multiple_submission.cpp"

sycl/test-e2e/Graph/RecordReplay/sub_graph_two_parent_graphs.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//
77
// CHECK-NOT: LEAK
88

9-
// XFAIL: *
10-
// Subgraph doesn't work properly in second parent graph
11-
129
#define GRAPH_E2E_RECORD_REPLAY
1310

1411
#include "../Inputs/sub_graph_two_parent_graphs.cpp"

0 commit comments

Comments
 (0)