Skip to content

Commit 36bdc3d

Browse files
author
Whitney Tsang
committed
[LoopFusion] Move instructions from FC0.Latch to FC1.Latch.
Summary:This PR move instructions from FC0.Latch bottom up to the beginning of FC1.Latch as long as they are proven safe. To illustrate why this is beneficial, let's consider the following example: Before Fusion: header1: br header2 header2: br header2, latch1 latch1: br header1, preheader3 preheader3: br header3 header3: br header4 header4: br header4, latch3 latch3: br header3, exit3 After Fusion (before this PR): header1: br header2 header2: br header2, latch1 latch1: br header3 header3: br header4 header4: br header4, latch3 latch3: br header1, exit3 Note that preheader3 is removed during fusion before this PR. Notice that we cannot fuse loop2 with loop4 as there exists block latch1 in between. This PR move instructions from latch1 to beginning of latch3, and remove block latch1. LoopFusion is now able to fuse loop nest recursively. After Fusion (after this PR): header1: br header2 header2: br header3 header3: br header4 header4: br header2, latch3 latch3: br header1, exit3 Reviewer: kbarton, jdoerfert, Meinersbur, dmgreen, fhahn, hfinkel, bmahjour, etiotto Reviewed By: kbarton, Meinersbur Subscribers: hiraditya, llvm-commits Tag: LLVM Differential Revision: https://reviews.llvm.org/D71165
1 parent 84161f1 commit 36bdc3d

File tree

7 files changed

+139
-47
lines changed

7 files changed

+139
-47
lines changed

llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
4545
const DominatorTree &DT, const PostDominatorTree &PDT,
4646
DependenceInfo &DI);
4747

48+
/// Move instructions from \p FromBB bottom up to the beginning of \p ToBB
49+
/// when proven safe.
50+
void moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
51+
const DominatorTree &DT, const PostDominatorTree &PDT,
52+
DependenceInfo &DI);
53+
4854
} // end namespace llvm
4955

5056
#endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,29 @@ struct LoopFuser {
11141114
return FC.ExitBlock->size() == 1;
11151115
}
11161116

1117+
/// Simplify the condition of the latch branch of \p FC to true, when both of
1118+
/// its successors are the same.
1119+
void simplifyLatchBranch(const FusionCandidate &FC) const {
1120+
BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
1121+
if (FCLatchBranch) {
1122+
assert(FCLatchBranch->isConditional() &&
1123+
FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
1124+
"Expecting the two successors of FCLatchBranch to be the same");
1125+
FCLatchBranch->setCondition(
1126+
llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
1127+
}
1128+
}
1129+
1130+
/// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
1131+
/// successor, then merge FC0.Latch with its unique successor.
1132+
void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1133+
moveInstsBottomUp(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
1134+
if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
1135+
MergeBlockIntoPredecessor(Succ, &DTU, &LI);
1136+
DTU.flush();
1137+
}
1138+
}
1139+
11171140
/// Fuse two fusion candidates, creating a new fused loop.
11181141
///
11191142
/// This method contains the mechanics of fusing two loops, represented by \p
@@ -1247,6 +1270,10 @@ struct LoopFuser {
12471270
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
12481271
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
12491272

1273+
// Change the condition of FC0 latch branch to true, as both successors of
1274+
// the branch are the same.
1275+
simplifyLatchBranch(FC0);
1276+
12501277
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
12511278
// performed the updates above.
12521279
if (FC0.Latch != FC0.ExitingBlock)
@@ -1269,9 +1296,15 @@ struct LoopFuser {
12691296

12701297
// Is there a way to keep SE up-to-date so we don't need to forget the loops
12711298
// and rebuild the information in subsequent passes of fusion?
1299+
// Note: Need to forget the loops before merging the loop latches, as
1300+
// mergeLatch may remove the only block in FC1.
12721301
SE.forgetLoop(FC1.L);
12731302
SE.forgetLoop(FC0.L);
12741303

1304+
// Move instructions from FC0.Latch to FC1.Latch.
1305+
// Note: mergeLatch requires an updated DT.
1306+
mergeLatch(FC0, FC1);
1307+
12751308
// Merge the loops.
12761309
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
12771310
FC1.L->block_end());
@@ -1491,6 +1524,10 @@ struct LoopFuser {
14911524
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
14921525
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
14931526

1527+
// Change the condition of FC0 latch branch to true, as both successors of
1528+
// the branch are the same.
1529+
simplifyLatchBranch(FC0);
1530+
14941531
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
14951532
// performed the updates above.
14961533
if (FC0.Latch != FC0.ExitingBlock)
@@ -1522,9 +1559,15 @@ struct LoopFuser {
15221559

15231560
// Is there a way to keep SE up-to-date so we don't need to forget the loops
15241561
// and rebuild the information in subsequent passes of fusion?
1562+
// Note: Need to forget the loops before merging the loop latches, as
1563+
// mergeLatch may remove the only block in FC1.
15251564
SE.forgetLoop(FC1.L);
15261565
SE.forgetLoop(FC0.L);
15271566

1567+
// Move instructions from FC0.Latch to FC1.Latch.
1568+
// Note: mergeLatch requires an updated DT.
1569+
mergeLatch(FC0, FC1);
1570+
15281571
// Merge the loops.
15291572
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
15301573
FC1.L->block_end());

llvm/lib/Transforms/Utils/CodeMoverUtils.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
117117
if (MoveForward) {
118118
// When I is being moved forward, we need to make sure the InsertPoint
119119
// dominates every users. Or else, a user may be using an undefined I.
120-
for (const Value *User : I.users())
121-
if (auto *UserInst = dyn_cast<Instruction>(User))
122-
if (!DT.dominates(&InsertPoint, UserInst))
120+
for (const Use &U : I.uses())
121+
if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
122+
if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
123123
return false;
124124
} else {
125125
// When I is being moved backward, we need to make sure all its opernads
@@ -173,3 +173,17 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
173173

174174
return true;
175175
}
176+
177+
void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
178+
const DominatorTree &DT,
179+
const PostDominatorTree &PDT, DependenceInfo &DI) {
180+
for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
181+
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
182+
Instruction &I = *It;
183+
// Increment the iterator before modifying FromBB.
184+
++It;
185+
186+
if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
187+
I.moveBefore(MovePos);
188+
}
189+
}

llvm/test/Transforms/LoopFusion/four_loops.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,14 @@
99
; CHECK-NEXT: bb:
1010
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]]
1111
; CHECK: [[LOOP1HEADER]]
12-
; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
13-
; CHECK: [[LOOP1LATCH]]
14-
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]]
12+
; CHECK: br label %[[LOOP2BODY:bb[0-9]+]]
1513
; CHECK: [[LOOP2BODY]]
16-
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
17-
; CHECK: [[LOOP2LATCH]]
18-
; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]]
14+
; CHECK: br label %[[LOOP3BODY:bb[0-9]+]]
1915
; CHECK: [[LOOP3BODY]]
20-
; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
21-
; CHECK: [[LOOP3LATCH]]
22-
; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]]
16+
; CHECK: br label %[[LOOP4BODY:bb[0-9]+]]
2317
; CHECK: [[LOOP4BODY]]
24-
; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
25-
; CHECK: [[LOOP4LATCH]]
18+
; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
19+
; CHECK: [[LOOP1LATCH]]
2620
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]]
2721
; CHECK: ret void
2822
define void @dep_free() {

llvm/test/Transforms/LoopFusion/guarded.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
; CHECK: [[LOOP1PREHEADER]]
99
; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]]
1010
; CHECK: [[LOOP1BODY]]
11-
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2BODY]]
12-
; CHECK: [[LOOP2BODY]]
1311
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]]
1412
; CHECK: [[LOOP2EXIT]]
1513
; CHECK: br label %[[LOOP1SUCC]]

llvm/test/Transforms/LoopFusion/loop_nest.ll

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,16 @@
2525
; CHECK: [[LOOP1HEADER]]
2626
; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]]
2727
; CHECK: [[LOOP3HEADER]]
28-
; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
29-
; CHECK: [[LOOP3LATCH]]
30-
; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
31-
; CHECK: [[LOOP1LATCH]]
32-
; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]]
33-
; CHECK: [[LOOP2PREHEADER]]
28+
; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]]
29+
; CHECK: [[LOOP2HEADER]]
3430
; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]]
3531
; CHECK: [[LOOP4HEADER]]
36-
; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
37-
; CHECK: [[LOOP4LATCH]]
38-
; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]]
39-
; CHECK: [[LOOP2LATCH]]
40-
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
32+
; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
33+
; CHECK: [[LOOP1LATCH]]
34+
; CHECK-NEXT: %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1
35+
; CHECK-NEXT: %add.outer.fc0 = add nuw nsw i32 %.06, 1
36+
; CHECK-NEXT: %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100
37+
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
4138
; CHECK: ret void
4239

4340
; TODO: The current version of loop fusion does not allow the inner loops to be
@@ -48,8 +45,8 @@ bb:
4845
br label %bb16
4946

5047
bb16: ; preds = %bb, %bb27
51-
%.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ]
52-
%indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ]
48+
%.06 = phi i32 [ 0, %bb ], [ %add.outer.fc0, %bb27 ]
49+
%indvars.iv105 = phi i64 [ 0, %bb ], [ %inc.outer.fc0, %bb27 ]
5350
br label %bb18
5451

5552
bb30: ; preds = %bb27
@@ -73,10 +70,10 @@ bb25: ; preds = %bb18
7370
br i1 %exitcond9, label %bb18, label %bb27
7471

7572
bb27: ; preds = %bb25
76-
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1
77-
%tmp28 = add nuw nsw i32 %.06, 1
78-
%exitcond12 = icmp ne i64 %indvars.iv.next11, 100
79-
br i1 %exitcond12, label %bb16, label %bb30
73+
%inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1
74+
%add.outer.fc0 = add nuw nsw i32 %.06, 1
75+
%cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100
76+
br i1 %cmp.outer.fc0, label %bb16, label %bb30
8077

8178
bb33: ; preds = %bb30, %bb45
8279
%.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ]

llvm/test/Transforms/LoopFusion/simple.ll

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
; CHECK-NEXT: bb:
77
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
88
; CHECK: [[LOOP1HEADER]]
9-
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
10-
; CHECK: [[LOOP1LATCH]]
11-
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
9+
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
1210
; CHECK: [[LOOP2HEADER]]
1311
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
1412
; CHECK: [[LOOP2LATCH]]
@@ -72,9 +70,7 @@ bb29: ; preds = %bb18
7270
; CHECK: [[LOOP1PREHEADER]]
7371
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
7472
; CHECK: [[LOOP1HEADER]]
75-
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
76-
; CHECK: [[LOOP1LATCH]]
77-
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
73+
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
7874
; CHECK: [[LOOP2HEADER]]
7975
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
8076
; CHECK: [[LOOP2LATCH]]
@@ -129,9 +125,7 @@ bb27: ; preds = %bb17
129125
; CHECK-NEXT: bb:
130126
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
131127
; CHECK: [[LOOP1HEADER]]
132-
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
133-
; CHECK: [[LOOP1LATCH]]
134-
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
128+
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
135129
; CHECK: [[LOOP2HEADER]]
136130
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
137131
; CHECK: [[LOOP2LATCH]]
@@ -179,8 +173,6 @@ bb19: ; preds = %bb18
179173
; CHECK: [[LOOP1PREHEADER]]
180174
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
181175
; CHECK: [[LOOP1HEADER]]
182-
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
183-
; CHECK: [[LOOP2HEADER]]
184176
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]]
185177
; CHECK: ret void
186178
define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) {
@@ -217,9 +209,7 @@ bb23: ; preds = %bb17, %bb
217209
; CHECK-NEXT: bb:
218210
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
219211
; CHECK: [[LOOP1HEADER]]
220-
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
221-
; CHECK: [[LOOP1LATCH]]
222-
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
212+
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
223213
; CHECK: [[LOOP2HEADER]]
224214
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
225215
; CHECK: [[LOOP2LATCH]]
@@ -266,3 +256,53 @@ bb25: ; preds = %bb19
266256
bb26: ; preds = %bb25
267257
ret void
268258
}
259+
260+
; Test that instructions in loop 1 latch are moved to the beginning of loop 2
261+
; latch iff it is proven safe. %inc.first and %cmp.first are moved, but
262+
; `store i32 0, i32* %Ai.first` is not.
263+
264+
; CHECK: void @flow_dep
265+
; CHECK-LABEL: entry:
266+
; CHECK-NEXT: br label %for.first
267+
; CHECK-LABEL: for.first:
268+
; CHECK: store i32 0, i32* %Ai.first
269+
; CHECK: %Ai.second =
270+
; CHECK: br label %for.second.latch
271+
; CHECK-LABEL: for.second.latch:
272+
; CHECK-NEXT: %inc.first = add nsw i64 %i.first, 1
273+
; CHECK-NEXT: %cmp.first = icmp slt i64 %inc.first, 100
274+
; CHECK: br i1 %cmp.second, label %for.first, label %for.end
275+
; CHECK-LABEL: for.end:
276+
; CHECK-NEXT: ret void
277+
278+
define void @flow_dep(i32* noalias %A, i32* noalias %B) {
279+
entry:
280+
br label %for.first
281+
282+
for.first:
283+
%i.first = phi i64 [ 0, %entry ], [ %inc.first, %for.first ]
284+
%Ai.first = getelementptr inbounds i32, i32* %A, i64 %i.first
285+
store i32 0, i32* %Ai.first, align 4
286+
%inc.first = add nsw i64 %i.first, 1
287+
%cmp.first = icmp slt i64 %inc.first, 100
288+
br i1 %cmp.first, label %for.first, label %for.second.preheader
289+
290+
for.second.preheader:
291+
br label %for.second
292+
293+
for.second:
294+
%i.second = phi i64 [ %inc.second, %for.second.latch ], [ 0, %for.second.preheader ]
295+
%Ai.second = getelementptr inbounds i32, i32* %A, i64 %i.second
296+
%0 = load i32, i32* %Ai.second, align 4
297+
%Bi = getelementptr inbounds i32, i32* %B, i64 %i.second
298+
store i32 %0, i32* %Bi, align 4
299+
br label %for.second.latch
300+
301+
for.second.latch:
302+
%inc.second = add nsw i64 %i.second, 1
303+
%cmp.second = icmp slt i64 %inc.second, 100
304+
br i1 %cmp.second, label %for.second, label %for.end
305+
306+
for.end:
307+
ret void
308+
}

0 commit comments

Comments
 (0)