Skip to content

Commit 2dd82a1

Browse files
committed
[DDG] Data Dependence Graph - Topological Sort (Memory Leak Fix)
Summary: This fixes the memory leak in bec37c3 and re-delivers the reverted patch. In this patch the DDG DAG is sorted topologically to put the nodes in the graph in the order that would satisfy all dependencies. This helps transformations that would like to generate code based on the DDG. Since the DDG is a DAG a reverse-post-order traversal would give us the topological ordering. This patch also sorts the basic blocks passed to the builder based on program order to ensure that the dependencies are computed in the correct direction. Authored By: bmahjour Reviewer: Meinersbur, fhahn, myhsu, xtian, dmgreen, kbarton, jdoerfert Reviewed By: Meinersbur Subscribers: ychen, arphaman, simoll, a.elovikov, mgorny, hiraditya, jfb, wuzish, llvm-commits, jsji, Whitney, etiotto, ppc-slack Tags: #llvm Differential Revision: https://reviews.llvm.org/D70609
1 parent 970d971 commit 2dd82a1

File tree

8 files changed

+417
-355
lines changed

8 files changed

+417
-355
lines changed

llvm/include/llvm/Analysis/DDG.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ using DDGInfo = DependenceGraphInfo<DDGNode>;
300300

301301
/// Data Dependency Graph
302302
class DataDependenceGraph : public DDGBase, public DDGInfo {
303+
friend AbstractDependenceGraphBuilder<DataDependenceGraph>;
303304
friend class DDGBuilder;
304305

305306
public:
@@ -311,7 +312,7 @@ class DataDependenceGraph : public DDGBase, public DDGInfo {
311312
DataDependenceGraph(DataDependenceGraph &&G)
312313
: DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
313314
DataDependenceGraph(Function &F, DependenceInfo &DI);
314-
DataDependenceGraph(const Loop &L, DependenceInfo &DI);
315+
DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI);
315316
~DataDependenceGraph();
316317

317318
/// If node \p N belongs to a pi-block return a pointer to the pi-block,
@@ -381,6 +382,12 @@ class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> {
381382
return *E;
382383
}
383384

385+
const NodeListType &getNodesInPiBlock(const DDGNode &N) final override {
386+
auto *PiNode = dyn_cast<const PiBlockDDGNode>(&N);
387+
assert(PiNode && "Expected a pi-block node.");
388+
return PiNode->getNodes();
389+
}
390+
384391
bool shouldCreatePiBlocks() const final override;
385392
};
386393

llvm/include/llvm/Analysis/DependenceGraphBuilder.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ template <class GraphType> class AbstractDependenceGraphBuilder {
5959
createMemoryDependencyEdges();
6060
createAndConnectRootNode();
6161
createPiBlocks();
62+
sortNodesTopologically();
6263
}
6364

6465
/// Create fine grained nodes. These are typically atomic nodes that
@@ -84,6 +85,9 @@ template <class GraphType> class AbstractDependenceGraphBuilder {
8485
/// the dependence graph into an acyclic graph.
8586
void createPiBlocks();
8687

88+
/// Topologically sort the graph nodes.
89+
void sortNodesTopologically();
90+
8791
protected:
8892
/// Create the root node of the graph.
8993
virtual NodeType &createRootNode() = 0;
@@ -104,6 +108,10 @@ template <class GraphType> class AbstractDependenceGraphBuilder {
104108
/// Create a rooted edge going from \p Src to \p Tgt .
105109
virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;
106110

111+
/// Given a pi-block node, return a vector of all the nodes contained within
112+
/// it.
113+
virtual const NodeListType &getNodesInPiBlock(const NodeType &N) = 0;
114+
107115
/// Deallocate memory of edge \p E.
108116
virtual void destroyEdge(EdgeType &E) { delete &E; }
109117

llvm/lib/Analysis/DDG.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
// The implementation for the data dependence graph.
1010
//===----------------------------------------------------------------------===//
1111
#include "llvm/Analysis/DDG.h"
12+
#include "llvm/ADT/SCCIterator.h"
1213
#include "llvm/Analysis/LoopInfo.h"
14+
#include "llvm/Analysis/LoopIterator.h"
1315
#include "llvm/Support/CommandLine.h"
1416

1517
using namespace llvm;
@@ -179,19 +181,28 @@ using BasicBlockListType = SmallVector<BasicBlock *, 8>;
179181

180182
DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
181183
: DependenceGraphInfo(F.getName().str(), D) {
184+
// Put the basic blocks in program order for correct dependence
185+
// directions.
182186
BasicBlockListType BBList;
183-
for (auto &BB : F.getBasicBlockList())
184-
BBList.push_back(&BB);
187+
for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
188+
for (BasicBlock * BB : SCC)
189+
BBList.push_back(BB);
190+
std::reverse(BBList.begin(), BBList.end());
185191
DDGBuilder(*this, D, BBList).populate();
186192
}
187193

188-
DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D)
194+
DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
195+
DependenceInfo &D)
189196
: DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
190197
L.getHeader()->getName())
191198
.str(),
192199
D) {
200+
// Put the basic blocks in program order for correct dependence
201+
// directions.
202+
LoopBlocksDFS DFS(&L);
203+
DFS.perform(&LI);
193204
BasicBlockListType BBList;
194-
for (BasicBlock *BB : L.blocks())
205+
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
195206
BBList.push_back(BB);
196207
DDGBuilder(*this, D, BBList).populate();
197208
}
@@ -259,7 +270,7 @@ DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
259270
LoopStandardAnalysisResults &AR) {
260271
Function *F = L.getHeader()->getParent();
261272
DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
262-
return std::make_unique<DataDependenceGraph>(L, DI);
273+
return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
263274
}
264275
AnalysisKey DDGAnalysis::Key;
265276

llvm/lib/Analysis/DependenceGraphBuilder.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,5 +353,34 @@ void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
353353
}
354354
}
355355

356+
template <class G>
357+
void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
358+
359+
// If we don't create pi-blocks, then we may not have a DAG.
360+
if (!shouldCreatePiBlocks())
361+
return;
362+
363+
SmallVector<NodeType *, 64> NodesInPO;
364+
using NodeKind = typename NodeType::NodeKind;
365+
for (NodeType *N : post_order(&Graph)) {
366+
if (N->getKind() == NodeKind::PiBlock) {
367+
// Put members of the pi-block right after the pi-block itself, for
368+
// convenience.
369+
const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
370+
NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
371+
PiBlockMembers.end());
372+
}
373+
NodesInPO.push_back(N);
374+
}
375+
376+
size_t OldSize = Graph.Nodes.size();
377+
Graph.Nodes.clear();
378+
for (NodeType *N : reverse(NodesInPO))
379+
Graph.Nodes.push_back(N);
380+
if (Graph.Nodes.size() != OldSize)
381+
assert(false &&
382+
"Expected the number of nodes to stay the same after the sort");
383+
}
384+
356385
template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
357386
template class llvm::DependenceGraphInfo<DDGNode>;

llvm/test/Analysis/DDG/basic-a.ll

Lines changed: 84 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,44 @@
11
; RUN: opt < %s -disable-output "-passes=print<ddg>" 2>&1 | FileCheck %s
22

33
; CHECK-LABEL: 'DDG' for loop 'test1.for.body':
4-
; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction
4+
5+
; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block
6+
; CHECK-NEXT: --- start of nodes in pi-block ---
7+
; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction
8+
; CHECK-NEXT: Instructions:
9+
; CHECK-NEXT: %inc = add i64 %i.02, 1
10+
; CHECK-NEXT: Edges:
11+
; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]]
12+
13+
; CHECK: Node Address:[[N11]]:single-instruction
14+
; CHECK-NEXT: Instructions:
15+
; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ]
16+
; CHECK-NEXT: Edges:
17+
; CHECK-NEXT: [def-use] to [[N10]]
18+
; CHECK-NEXT: --- end of nodes in pi-block ---
19+
; CHECK-NEXT: Edges:
20+
; CHECK-NEXT: [def-use] to [[N1:0x[0-9a-f]*]]
21+
; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]]
22+
; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]]
23+
24+
; CHECK: Node Address:[[N7]]:single-instruction
25+
; CHECK-NEXT: Instructions:
26+
; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n
27+
; CHECK-NEXT: Edges:
28+
; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]]
29+
30+
; CHECK: Node Address:[[N8]]:single-instruction
31+
; CHECK-NEXT: Instructions:
32+
; CHECK-NEXT: br i1 %exitcond, label %test1.for.body, label %for.end.loopexit
33+
; CHECK-NEXT: Edges:none!
34+
35+
; CHECK: Node Address:[[N6]]:single-instruction
36+
; CHECK-NEXT: Instructions:
37+
; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
38+
; CHECK-NEXT: Edges:
39+
; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]]
40+
41+
; CHECK: Node Address:[[N1]]:single-instruction
542
; CHECK-NEXT: Instructions:
643
; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
744
; CHECK-NEXT: Edges:
@@ -23,49 +60,13 @@
2360
; CHECK-NEXT: Instructions:
2461
; CHECK-NEXT: %add = fadd float %0, %conv
2562
; CHECK-NEXT: Edges:
26-
; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]]
27-
28-
; CHECK: Node Address:[[N6:0x[0-9a-f]*]]:single-instruction
29-
; CHECK-NEXT: Instructions:
30-
; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
31-
; CHECK-NEXT: Edges:
3263
; CHECK-NEXT: [def-use] to [[N5]]
3364

3465
; CHECK: Node Address:[[N5]]:single-instruction
3566
; CHECK-NEXT: Instructions:
3667
; CHECK-NEXT: store float %add, float* %arrayidx1, align 4
3768
; CHECK-NEXT: Edges:none!
3869

39-
; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction
40-
; CHECK-NEXT: Instructions:
41-
; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n
42-
; CHECK-NEXT: Edges:
43-
; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]]
44-
45-
; CHECK: Node Address:[[N8]]:single-instruction
46-
; CHECK-NEXT: Instructions:
47-
; CHECK-NEXT: br i1 %exitcond, label %test1.for.body, label %for.end.loopexit
48-
; CHECK-NEXT: Edges:none!
49-
50-
; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block
51-
; CHECK-NEXT: --- start of nodes in pi-block ---
52-
; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction
53-
; CHECK-NEXT: Instructions:
54-
; CHECK-NEXT: %inc = add i64 %i.02, 1
55-
; CHECK-NEXT: Edges:
56-
; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]]
57-
58-
; CHECK: Node Address:[[N11]]:single-instruction
59-
; CHECK-NEXT: Instructions:
60-
; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ]
61-
; CHECK-NEXT: Edges:
62-
; CHECK-NEXT: [def-use] to [[N10]]
63-
; CHECK-NEXT: --- end of nodes in pi-block ---
64-
; CHECK-NEXT: Edges:
65-
; CHECK-NEXT: [def-use] to [[N1]]
66-
; CHECK-NEXT: [def-use] to [[N6]]
67-
; CHECK-NEXT: [def-use] to [[N7]]
68-
6970

7071
;; No memory dependencies.
7172
;; void test1(unsigned long n, float * restrict a, float * restrict b) {
@@ -96,78 +97,80 @@ for.end: ; preds = %test1.for.body, %en
9697

9798

9899
; CHECK-LABEL: 'DDG' for loop 'test2.for.body':
99-
; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction
100-
; CHECK-NEXT: Instructions:
101-
; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
102-
; CHECK-NEXT: Edges:
103-
; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]]
104100

105-
; CHECK: Node Address:[[N2]]:single-instruction
101+
; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block
102+
; CHECK-NEXT: --- start of nodes in pi-block ---
103+
; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction
106104
; CHECK-NEXT: Instructions:
107-
; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4
105+
; CHECK-NEXT: %inc = add i64 %i.02, 1
108106
; CHECK-NEXT: Edges:
109-
; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]]
107+
; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]]
110108

111-
; CHECK: Node Address:[[N4:0x[0-9a-f]*]]:single-instruction
109+
; CHECK: Node Address:[[N12]]:single-instruction
112110
; CHECK-NEXT: Instructions:
113-
; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
111+
; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ]
114112
; CHECK-NEXT: Edges:
115-
; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]]
113+
; CHECK-NEXT: [def-use] to [[N11]]
114+
; CHECK-NEXT: --- end of nodes in pi-block ---
115+
; CHECK-NEXT: Edges:
116+
; CHECK-NEXT: [def-use] to [[N1:0x[0-9a-f]*]]
117+
; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]]
118+
; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]]
119+
; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]]
116120

117-
; CHECK: Node Address:[[N5]]:single-instruction
121+
; CHECK: Node Address:[[N8]]:single-instruction
118122
; CHECK-NEXT: Instructions:
119-
; CHECK-NEXT: %1 = load float, float* %arrayidx1, align 4
123+
; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n
120124
; CHECK-NEXT: Edges:
121-
; CHECK-NEXT: [def-use] to [[N3]]
122-
; CHECK-NEXT: [memory] to [[N6:0x[0-9a-f]*]]
125+
; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]]
123126

124-
; CHECK: Node Address:[[N3]]:single-instruction
127+
; CHECK: Node Address:[[N9]]:single-instruction
125128
; CHECK-NEXT: Instructions:
126-
; CHECK-NEXT: %add = fadd float %0, %1
127-
; CHECK-NEXT: Edges:
128-
; CHECK-NEXT: [def-use] to [[N6]]
129+
; CHECK-NEXT: br i1 %exitcond, label %test2.for.body, label %for.end.loopexit
130+
; CHECK-NEXT: Edges:none!
129131

130-
; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction
132+
; CHECK: Node Address:[[N7]]:single-instruction
131133
; CHECK-NEXT: Instructions:
132134
; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02
133135
; CHECK-NEXT: Edges:
134-
; CHECK-NEXT: [def-use] to [[N6]]
136+
; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]]
135137

136-
; CHECK: Node Address:[[N6]]:single-instruction
138+
; CHECK: Node Address:[[N4]]:single-instruction
137139
; CHECK-NEXT: Instructions:
138-
; CHECK-NEXT: store float %add, float* %arrayidx2, align 4
139-
; CHECK-NEXT: Edges:none!
140+
; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
141+
; CHECK-NEXT: Edges:
142+
; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]]
140143

141-
; CHECK: Node Address:[[N8:0x[0-9a-f]*]]:single-instruction
144+
; CHECK: Node Address:[[N5]]:single-instruction
142145
; CHECK-NEXT: Instructions:
143-
; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n
146+
; CHECK-NEXT: %1 = load float, float* %arrayidx1, align 4
144147
; CHECK-NEXT: Edges:
145-
; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]]
148+
; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]]
149+
; CHECK-NEXT: [memory] to [[N6]]
146150

147-
; CHECK: Node Address:[[N9]]:single-instruction
151+
; CHECK: Node Address:[[N1]]:single-instruction
148152
; CHECK-NEXT: Instructions:
149-
; CHECK-NEXT: br i1 %exitcond, label %test2.for.body, label %for.end.loopexit
150-
; CHECK-NEXT: Edges:none!
153+
; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
154+
; CHECK-NEXT: Edges:
155+
; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]]
151156

152-
; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:pi-block
153-
; CHECK-NEXT: --- start of nodes in pi-block ---
154-
; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction
157+
; CHECK: Node Address:[[N2]]:single-instruction
155158
; CHECK-NEXT: Instructions:
156-
; CHECK-NEXT: %inc = add i64 %i.02, 1
159+
; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4
157160
; CHECK-NEXT: Edges:
158-
; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]]
161+
; CHECK-NEXT: [def-use] to [[N3]]
159162

160-
; CHECK: Node Address:[[N12]]:single-instruction
163+
; CHECK: Node Address:[[N3]]:single-instruction
161164
; CHECK-NEXT: Instructions:
162-
; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ]
163-
; CHECK-NEXT: Edges:
164-
; CHECK-NEXT: [def-use] to [[N11]]
165-
; CHECK-NEXT: --- end of nodes in pi-block ---
165+
; CHECK-NEXT: %add = fadd float %0, %1
166166
; CHECK-NEXT: Edges:
167-
; CHECK-NEXT: [def-use] to [[N1]]
168-
; CHECK-NEXT: [def-use] to [[N4]]
169-
; CHECK-NEXT: [def-use] to [[N7]]
170-
; CHECK-NEXT: [def-use] to [[N8]]
167+
; CHECK-NEXT: [def-use] to [[N6]]
168+
169+
; CHECK: Node Address:[[N6]]:single-instruction
170+
; CHECK-NEXT: Instructions:
171+
; CHECK-NEXT: store float %add, float* %arrayidx2, align 4
172+
; CHECK-NEXT: Edges:none!
173+
171174

172175

173176
;; Loop-independent memory dependencies.

0 commit comments

Comments
 (0)