Skip to content

[SYCL][Graph] Fix issues with sycl_ext_oneapi_graph subgraphs #10822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 25, 2023
53 changes: 50 additions & 3 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ bool checkForRequirement(sycl::detail::AccessorImplHost *Req,
}
return SuccessorAddedDep;
}

void duplicateNode(const std::shared_ptr<node_impl> Node,
std::shared_ptr<node_impl> &NodeCopy) {
if (Node->MCGType == sycl::detail::CG::None) {
NodeCopy = std::make_shared<node_impl>();
NodeCopy->MCGType = sycl::detail::CG::None;
} else {
NodeCopy = std::make_shared<node_impl>(Node->MCGType, Node->getCGCopy());
}
}

} // anonymous namespace

void exec_graph_impl::schedule() {
Expand All @@ -81,7 +92,7 @@ void exec_graph_impl::schedule() {
}
}

std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
std::shared_ptr<node_impl> graph_impl::addNodesToExits(
const std::list<std::shared_ptr<node_impl>> &NodeList) {
// Find all input and output nodes from the node list
std::vector<std::shared_ptr<node_impl>> Inputs;
Expand All @@ -104,6 +115,36 @@ std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
return this->add(Outputs);
}

std::shared_ptr<node_impl> graph_impl::addSubgraphNodes(
const std::shared_ptr<exec_graph_impl> &SubGraphExec) {
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;

std::list<std::shared_ptr<node_impl>> NodesList = SubGraphExec->getSchedule();
std::list<std::shared_ptr<node_impl>> NewNodesList{NodesList.size()};

// Duplication of nodes
for (auto NodeIt = NodesList.end(), NewNodesIt = NewNodesList.end();
NodeIt != NodesList.begin();) {
--NodeIt;
--NewNodesIt;
auto Node = *NodeIt;
std::shared_ptr<node_impl> NodeCopy;
duplicateNode(Node, NodeCopy);
*NewNodesIt = NodeCopy;
NodesMap.insert({Node, NodeCopy});
for (auto &NextNode : Node->MSuccessors) {
if (NodesMap.find(NextNode) != NodesMap.end()) {
auto Successor = NodesMap[NextNode];
NodeCopy->registerSuccessor(Successor, NodeCopy);
} else {
assert("Node duplication failed. A duplicated node is missing.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This caused MacOS build failure in post commit. Fixing in #10977.

}
}
}

return addNodesToExits(NewNodesList);
}

void graph_impl::addRoot(const std::shared_ptr<node_impl> &Root) {
MRoots.insert(Root);
}
Expand Down Expand Up @@ -313,6 +354,11 @@ void exec_graph_impl::createCommandBuffers(sycl::device Device) {

// TODO extract kernel bundle logic from enqueueImpKernel
for (const auto &Node : MSchedule) {
// Empty nodes are not processed as other nodes, but only their
// dependencies are propagated in findRealDeps
if (Node->isEmpty())
continue;

sycl::detail::CG::CGTYPE type = Node->MCGType;
// If the node is a kernel with no special requirements we can enqueue it
// directly.
Expand Down Expand Up @@ -453,8 +499,9 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
"Error during emulated graph command group submission.");
}
ScheduledEvents.push_back(NewEvent);
} else {

} else if (!NodeImpl->isEmpty()) {
// Empty nodes are node processed as other nodes, but only their
// dependencies are propagated in findRealDeps
sycl::detail::EventImplPtr EventImpl =
sycl::detail::Scheduler::getInstance().addCG(NodeImpl->getCGCopy(),
Queue);
Expand Down
84 changes: 64 additions & 20 deletions sycl/source/detail/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,51 @@ class node_impl {
std::unique_ptr<sycl::detail::CG> &&CommandGroup)
: MCGType(CGType), MCommandGroup(std::move(CommandGroup)) {}

/// Tests if two nodes have the same content,
/// i.e. same command group
/// This function should only be used for internal purposes.
/// A true return from this operator is not a guarantee that the nodes are
/// equals according to the Common reference semantics. But this function is
/// an helper to verify that two nodes contain equivalent Command Groups.
/// @param Node node to compare with
/// @return true if two nodes have equivament command groups. false otherwise.
bool operator==(const node_impl &Node) {
if (MCGType != Node.MCGType)
return false;

switch (MCGType) {
case sycl::detail::CG::CGTYPE::Kernel: {
sycl::detail::CGExecKernel *ExecKernelA =
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
sycl::detail::CGExecKernel *ExecKernelB =
static_cast<sycl::detail::CGExecKernel *>(Node.MCommandGroup.get());
return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0;
}
case sycl::detail::CG::CGTYPE::CopyUSM: {
sycl::detail::CGCopyUSM *CopyA =
static_cast<sycl::detail::CGCopyUSM *>(MCommandGroup.get());
sycl::detail::CGCopyUSM *CopyB =
static_cast<sycl::detail::CGCopyUSM *>(MCommandGroup.get());
return (CopyA->getSrc() == CopyB->getSrc()) &&
(CopyA->getDst() == CopyB->getDst()) &&
(CopyA->getLength() == CopyB->getLength());
}
case sycl::detail::CG::CGTYPE::CopyAccToAcc:
case sycl::detail::CG::CGTYPE::CopyAccToPtr:
case sycl::detail::CG::CGTYPE::CopyPtrToAcc: {
sycl::detail::CGCopy *CopyA =
static_cast<sycl::detail::CGCopy *>(MCommandGroup.get());
sycl::detail::CGCopy *CopyB =
static_cast<sycl::detail::CGCopy *>(MCommandGroup.get());
return (CopyA->getSrc() == CopyB->getSrc()) &&
(CopyA->getDst() == CopyB->getDst());
}
default:
assert(false && "Unexpected command group type!");
return false;
}
}

/// Recursively add nodes to execution stack.
/// @param NodeImpl Node to schedule.
/// @param Schedule Execution ordering to add node to.
Expand All @@ -83,10 +128,8 @@ class node_impl {
if (std::find(Schedule.begin(), Schedule.end(), Next) == Schedule.end())
Next->sortTopological(Next, Schedule);
}
// We don't need to schedule empty nodes as they are only used when
// calculating dependencies
if (!NodeImpl->isEmpty())
Schedule.push_front(NodeImpl);

Schedule.push_front(NodeImpl);
}

/// Checks if this node has a given requirement.
Expand Down Expand Up @@ -171,26 +214,16 @@ class node_impl {
/// Tests is the caller is similar to Node
/// @return True if the two nodes are similar
bool isSimilar(std::shared_ptr<node_impl> Node) {
if (MCGType != Node->MCGType)
return false;

if (MSuccessors.size() != Node->MSuccessors.size())
return false;

if (MPredecessors.size() != Node->MPredecessors.size())
return false;

if ((MCGType == sycl::detail::CG::CGTYPE::Kernel) &&
(Node->MCGType == sycl::detail::CG::CGTYPE::Kernel)) {
sycl::detail::CGExecKernel *ExecKernelA =
static_cast<sycl::detail::CGExecKernel *>(MCommandGroup.get());
sycl::detail::CGExecKernel *ExecKernelB =
static_cast<sycl::detail::CGExecKernel *>(Node->MCommandGroup.get());
if (*this == *Node.get())
return true;

if (ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) != 0)
return false;
}
return true;
return false;
}

/// Recursive traversal of successor nodes checking for
Expand Down Expand Up @@ -336,11 +369,12 @@ class graph_impl {
"No event has been recorded for the specified graph node");
}

/// Adds sub-graph nodes from an executable graph to this graph.
/// @param NodeList List of nodes from sub-graph in schedule order.
/// Duplicates and Adds sub-graph nodes from an executable graph to this
/// graph.
/// @param SubGraphExec sub-graph to add to the parent.
/// @return An empty node is used to schedule dependencies on this sub-graph.
std::shared_ptr<node_impl>
addSubgraphNodes(const std::list<std::shared_ptr<node_impl>> &NodeList);
addSubgraphNodes(const std::shared_ptr<exec_graph_impl> &SubGraphExec);

/// Query for the context tied to this graph.
/// @return Context associated with graph.
Expand Down Expand Up @@ -486,6 +520,12 @@ class graph_impl {
/// Insert node into list of root nodes.
/// @param Root Node to add to list of root nodes.
void addRoot(const std::shared_ptr<node_impl> &Root);

/// Adds nodes to the exit nodes of this graph.
/// @param NodeList List of nodes from sub-graph in schedule order.
/// @return An empty node is used to schedule dependencies on this sub-graph.
std::shared_ptr<node_impl>
addNodesToExits(const std::list<std::shared_ptr<node_impl>> &NodeList);
};

/// Class representing the implementation of command_graph<executable>.
Expand Down Expand Up @@ -537,6 +577,10 @@ class exec_graph_impl {
return MSchedule;
}

/// Query the graph_impl.
/// @return pointer to the graph_impl MGraphImpl
const std::shared_ptr<graph_impl> &getGraphImpl() const { return MGraphImpl; }

private:
/// Create a command-group for the node and add it to command-buffer by going
/// through the scheduler.
Expand Down
4 changes: 3 additions & 1 deletion sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,9 @@ void handler::ext_oneapi_graph(
}
// Store the node representing the subgraph in the handler so that we can
// return it to the user later.
MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl->getSchedule());
// The nodes of the subgraph are duplicated when added to its parents.
// This avoids changing properties of the graph added as a subgraph.
MSubgraphNode = ParentGraph->addSubgraphNodes(GraphImpl);

// If we are recording an in-order queue remember the subgraph node, so it
// can be used as a dependency for any more nodes recorded from this queue.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL:*
// Submit a graph as a subgraph more than once doesn't yet work.

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/sub_graph_multiple_submission.cpp"
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL: *
// Subgraph doesn't work properly in second parent graph

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/sub_graph_two_parent_graphs.cpp"
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ int main() {
Queue.memcpy(Output.data(), X, N * sizeof(float), E).wait();

for (size_t i = 0; i < N; i++) {
assert(Output[i] == -6.25f);
assert(Output[i] == -4.5f);
}

sycl::free(X, Queue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ int main() {
auto ExecGraphA = GraphA.finalize();

auto B1 = add_node(GraphB, Queue, [&](handler &CGH) {
CGH.parallel_for(N, [=](id<1> it) { X[it] = static_cast<float>(i); });
CGH.parallel_for(N, [=](id<1> it) { X[it] = static_cast<float>(it); });
});

auto B2 = add_node(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL:*
// Submit a graph as a subgraph more than once doesn't yet work.

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/sub_graph_multiple_submission.cpp"
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//
// CHECK-NOT: LEAK

// XFAIL: *
// Subgraph doesn't work properly in second parent graph

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/sub_graph_two_parent_graphs.cpp"
Loading