Skip to content

Commit 1006ac3

Browse files
author
Whitney Tsang
committed
[LoopNest] Consider loop nest with inner loop guard using outer loop
induction variable to be perfect This patch allow more conditional branches to be considered as loop guard, and so more loop nests can be considered perfect. Reviewed By: bmahjour, sidbav Differential Revision: https://reviews.llvm.org/D94717
1 parent f744723 commit 1006ac3

File tree

6 files changed

+220
-81
lines changed

6 files changed

+220
-81
lines changed

llvm/include/llvm/Analysis/LoopNestAnalysis.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ class LoopNest {
6161
static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE);
6262

6363
/// Recursivelly traverse all empty 'single successor' basic blocks of \p From
64-
/// (if there are any). Return the last basic block found or \p End if it was
65-
/// reached during the search.
64+
/// (if there are any). When \p CheckUniquePred is set to true, check if
65+
/// each of the empty single successors has a unique predecessor. Return
66+
/// the last basic block found or \p End if it was reached during the search.
6667
static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From,
67-
const BasicBlock *End);
68+
const BasicBlock *End,
69+
bool CheckUniquePred = false);
6870

6971
/// Return the outermost loop in the loop nest.
7072
Loop &getOutermostLoop() const { return *Loops.front(); }

llvm/lib/Analysis/LoopInfo.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/IVDescriptors.h"
2121
#include "llvm/Analysis/LoopInfoImpl.h"
2222
#include "llvm/Analysis/LoopIterator.h"
23+
#include "llvm/Analysis/LoopNestAnalysis.h"
2324
#include "llvm/Analysis/MemorySSA.h"
2425
#include "llvm/Analysis/MemorySSAUpdater.h"
2526
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -380,10 +381,6 @@ BranchInst *Loop::getLoopGuardBranch() const {
380381
if (!ExitFromLatch)
381382
return nullptr;
382383

383-
BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor();
384-
if (!ExitFromLatchSucc)
385-
return nullptr;
386-
387384
BasicBlock *GuardBB = Preheader->getUniquePredecessor();
388385
if (!GuardBB)
389386
return nullptr;
@@ -397,7 +394,17 @@ BranchInst *Loop::getLoopGuardBranch() const {
397394
BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader)
398395
? GuardBI->getSuccessor(1)
399396
: GuardBI->getSuccessor(0);
400-
return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr;
397+
398+
// Check if ExitFromLatch (or any BasicBlock which is an empty unique
399+
// successor of ExitFromLatch) is equal to GuardOtherSucc. If
400+
// skipEmptyBlockUntil returns GuardOtherSucc, then the guard branch for the
401+
// loop is GuardBI (return GuardBI), otherwise return nullptr.
402+
if (&LoopNest::skipEmptyBlockUntil(ExitFromLatch, GuardOtherSucc,
403+
/*CheckUniquePred=*/true) ==
404+
GuardOtherSucc)
405+
return GuardBI;
406+
else
407+
return nullptr;
401408
}
402409

403410
bool Loop::isCanonical(ScalarEvolution &SE) const {

llvm/lib/Analysis/LoopNestAnalysis.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) {
206206
}
207207

208208
const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From,
209-
const BasicBlock *End) {
209+
const BasicBlock *End,
210+
bool CheckUniquePred) {
210211
assert(From && "Expecting valid From");
211212
assert(End && "Expecting valid End");
212213

@@ -220,8 +221,9 @@ const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From,
220221
// Visited is used to avoid running into an infinite loop.
221222
SmallPtrSet<const BasicBlock *, 4> Visited;
222223
const BasicBlock *BB = From->getUniqueSuccessor();
223-
const BasicBlock *PredBB = BB;
224-
while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) {
224+
const BasicBlock *PredBB = From;
225+
while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB) &&
226+
(!CheckUniquePred || BB->getUniquePredecessor())) {
225227
Visited.insert(BB);
226228
PredBB = BB;
227229
BB = BB->getUniqueSuccessor();
@@ -335,9 +337,11 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop,
335337

336338
// Ensure the inner loop exit block lead to the outer loop latch possibly
337339
// through empty blocks.
338-
const BasicBlock &SuccInner =
339-
LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch);
340-
if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) {
340+
if ((!ExtraPhiBlock ||
341+
&LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(),
342+
ExtraPhiBlock) != ExtraPhiBlock) &&
343+
(&LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(),
344+
OuterLoopLatch) != OuterLoopLatch)) {
341345
DEBUG_WITH_TYPE(
342346
VerboseDebug,
343347
dbgs() << "Inner loop exit block " << *InnerLoopExit

llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -424,70 +424,3 @@ for.cond.for.end13_crit_edge:
424424
for.end13:
425425
ret void
426426
}
427-
428-
; Test an imperfect loop nest of the form:
429-
; for (int i = 0; i < nx; ++i)
430-
; if (i > 5) { // user branch
431-
; for (int j = 1; j <= 5; j+=2)
432-
; y[j][i] = x[i][j] + j;
433-
; }
434-
435-
define void @imperf_nest_6(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) {
436-
; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_6_loop_i, Loops: ( imperf_nest_6_loop_i imperf_nest_6_loop_j )
437-
entry:
438-
%cmp2 = icmp slt i32 0, %nx
439-
br i1 %cmp2, label %imperf_nest_6_loop_i.lr.ph, label %for.end13
440-
441-
imperf_nest_6_loop_i.lr.ph:
442-
br label %imperf_nest_6_loop_i
443-
444-
imperf_nest_6_loop_i:
445-
%i.0 = phi i32 [ 0, %imperf_nest_6_loop_i.lr.ph ], [ %inc12, %for.inc11 ]
446-
%cmp1 = icmp sgt i32 %i.0, 5
447-
br i1 %cmp1, label %imperf_nest_6_loop_j.lr.ph, label %if.end
448-
449-
imperf_nest_6_loop_j.lr.ph:
450-
br label %imperf_nest_6_loop_j
451-
452-
imperf_nest_6_loop_j:
453-
%j.0 = phi i32 [ 1, %imperf_nest_6_loop_j.lr.ph ], [ %inc, %for.inc ]
454-
%idxprom = sext i32 %i.0 to i64
455-
%arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom
456-
%0 = load i32*, i32** %arrayidx, align 8
457-
%idxprom5 = sext i32 %j.0 to i64
458-
%arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5
459-
%1 = load i32, i32* %arrayidx6, align 4
460-
%add = add nsw i32 %1, %j.0
461-
%idxprom7 = sext i32 %j.0 to i64
462-
%arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7
463-
%2 = load i32*, i32** %arrayidx8, align 8
464-
%idxprom9 = sext i32 %i.0 to i64
465-
%arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9
466-
store i32 %add, i32* %arrayidx10, align 4
467-
br label %for.inc
468-
469-
for.inc:
470-
%inc = add nsw i32 %j.0, 2
471-
%cmp3 = icmp sle i32 %inc, 5
472-
br i1 %cmp3, label %imperf_nest_6_loop_j, label %for.cond2.for.end_crit_edge
473-
474-
for.cond2.for.end_crit_edge:
475-
br label %for.end
476-
477-
for.end:
478-
br label %if.end
479-
480-
if.end:
481-
br label %for.inc11
482-
483-
for.inc11:
484-
%inc12 = add nsw i32 %i.0, 1
485-
%cmp = icmp slt i32 %inc12, %nx
486-
br i1 %cmp, label %imperf_nest_6_loop_i, label %for.cond.for.end13_crit_edge
487-
488-
for.cond.for.end13_crit_edge:
489-
br label %for.end13
490-
491-
for.end13:
492-
ret void
493-
}

llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,3 +322,148 @@ for.end7:
322322
%x.addr.0.lcssa = phi i32 [ %split7, %for.cond.for.end7_crit_edge ], [ %x, %entry ]
323323
ret i32 %x.addr.0.lcssa
324324
}
325+
326+
; Test a perfect loop nest of the form:
327+
; for (int i = 0; i < nx; ++i)
328+
; if (i < ny) { // guard branch for the j-loop
329+
; for (int j=i; j < ny; j+=1)
330+
; y[j][i] = x[i][j] + j;
331+
; }
332+
define double @perf_nest_guard_branch(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) {
333+
; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: test6Loop2, Loops: ( test6Loop2 )
334+
; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: test6Loop1, Loops: ( test6Loop1 test6Loop2 )
335+
entry:
336+
%cmp2 = icmp slt i32 0, %nx
337+
br i1 %cmp2, label %test6Loop1.lr.ph, label %for.end13
338+
339+
test6Loop1.lr.ph: ; preds = %entry
340+
br label %test6Loop1
341+
342+
test6Loop1: ; preds = %test6Loop1.lr.ph, %for.inc11
343+
%i.0 = phi i32 [ 0, %test6Loop1.lr.ph ], [ %inc12, %for.inc11 ]
344+
%cmp1 = icmp slt i32 %i.0, %ny
345+
br i1 %cmp1, label %test6Loop2.lr.ph, label %if.end
346+
347+
test6Loop2.lr.ph: ; preds = %if.then
348+
br label %test6Loop2
349+
350+
test6Loop2: ; preds = %test6Loop2.lr.ph, %for.inc
351+
%j.0 = phi i32 [ %i.0, %test6Loop2.lr.ph ], [ %inc, %for.inc ]
352+
%idxprom = sext i32 %i.0 to i64
353+
%arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom
354+
%0 = load i32*, i32** %arrayidx, align 8
355+
%idxprom5 = sext i32 %j.0 to i64
356+
%arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5
357+
%1 = load i32, i32* %arrayidx6, align 4
358+
%add = add nsw i32 %1, %j.0
359+
%idxprom7 = sext i32 %j.0 to i64
360+
%arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7
361+
%2 = load i32*, i32** %arrayidx8, align 8
362+
%idxprom9 = sext i32 %i.0 to i64
363+
%arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9
364+
store i32 %add, i32* %arrayidx10, align 4
365+
br label %for.inc
366+
367+
for.inc: ; preds = %test6Loop2
368+
%inc = add nsw i32 %j.0, 1
369+
%cmp3 = icmp slt i32 %inc, %ny
370+
br i1 %cmp3, label %test6Loop2, label %for.cond2.for.end_crit_edge
371+
372+
for.cond2.for.end_crit_edge: ; preds = %for.inc
373+
br label %for.end
374+
375+
for.end: ; preds = %for.cond2.for.end_crit_edge, %if.then
376+
br label %if.end
377+
378+
if.end: ; preds = %for.end, %test6Loop1
379+
br label %for.inc11
380+
381+
for.inc11: ; preds = %if.end
382+
%inc12 = add nsw i32 %i.0, 1
383+
%cmp = icmp slt i32 %inc12, %nx
384+
br i1 %cmp, label %test6Loop1, label %for.cond.for.end13_crit_edge
385+
386+
for.cond.for.end13_crit_edge: ; preds = %for.inc11
387+
br label %for.end13
388+
389+
for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
390+
%arrayidx14 = getelementptr inbounds i32*, i32** %y, i64 0
391+
%3 = load i32*, i32** %arrayidx14, align 8
392+
%arrayidx15 = getelementptr inbounds i32, i32* %3, i64 0
393+
%4 = load i32, i32* %arrayidx15, align 4
394+
%conv = sitofp i32 %4 to double
395+
ret double %conv
396+
}
397+
398+
; Test a perfect loop nest of the form:
399+
; for (int i = 0; i < nx; ++i)
400+
; if (i < ny) { // guard branch for the j-loop
401+
; for (int j=i; j < ny; j+=1)
402+
; y[j][i] = x[i][j] + j;
403+
; }
404+
405+
define double @test6(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) {
406+
; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: test6Loop2, Loops: ( test6Loop2 )
407+
; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: test6Loop1, Loops: ( test6Loop1 test6Loop2 )
408+
entry:
409+
%cmp2 = icmp slt i32 0, %nx
410+
br i1 %cmp2, label %test6Loop1.lr.ph, label %for.end13
411+
412+
test6Loop1.lr.ph: ; preds = %entry
413+
br label %test6Loop1
414+
415+
test6Loop1: ; preds = %test6Loop1.lr.ph, %for.inc11
416+
%i.0 = phi i32 [ 0, %test6Loop1.lr.ph ], [ %inc12, %for.inc11 ]
417+
%cmp1 = icmp slt i32 %i.0, %ny
418+
br i1 %cmp1, label %test6Loop2.lr.ph, label %if.end
419+
420+
test6Loop2.lr.ph: ; preds = %if.then
421+
br label %test6Loop2
422+
423+
test6Loop2: ; preds = %test6Loop2.lr.ph, %for.inc
424+
%j.0 = phi i32 [ %i.0, %test6Loop2.lr.ph ], [ %inc, %for.inc ]
425+
%idxprom = sext i32 %i.0 to i64
426+
%arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom
427+
%0 = load i32*, i32** %arrayidx, align 8
428+
%idxprom5 = sext i32 %j.0 to i64
429+
%arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5
430+
%1 = load i32, i32* %arrayidx6, align 4
431+
%add = add nsw i32 %1, %j.0
432+
%idxprom7 = sext i32 %j.0 to i64
433+
%arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7
434+
%2 = load i32*, i32** %arrayidx8, align 8
435+
%idxprom9 = sext i32 %i.0 to i64
436+
%arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9
437+
store i32 %add, i32* %arrayidx10, align 4
438+
br label %for.inc
439+
440+
for.inc: ; preds = %test6Loop2
441+
%inc = add nsw i32 %j.0, 1
442+
%cmp3 = icmp slt i32 %inc, %ny
443+
br i1 %cmp3, label %test6Loop2, label %for.cond2.for.end_crit_edge
444+
445+
for.cond2.for.end_crit_edge: ; preds = %for.inc
446+
br label %for.end
447+
448+
for.end: ; preds = %for.cond2.for.end_crit_edge, %if.then
449+
br label %if.end
450+
451+
if.end: ; preds = %for.end, %test6Loop1
452+
br label %for.inc11
453+
454+
for.inc11: ; preds = %if.end
455+
%inc12 = add nsw i32 %i.0, 1
456+
%cmp = icmp slt i32 %inc12, %nx
457+
br i1 %cmp, label %test6Loop1, label %for.cond.for.end13_crit_edge
458+
459+
for.cond.for.end13_crit_edge: ; preds = %for.inc11
460+
br label %for.end13
461+
462+
for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
463+
%arrayidx14 = getelementptr inbounds i32*, i32** %y, i64 0
464+
%3 = load i32*, i32** %arrayidx14, align 8
465+
%arrayidx15 = getelementptr inbounds i32, i32* %3, i64 0
466+
%4 = load i32, i32* %arrayidx15, align 4
467+
%conv = sitofp i32 %4 to double
468+
ret double %conv
469+
}

llvm/unittests/Analysis/LoopInfoTest.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,3 +1500,51 @@ TEST(LoopInfoTest, LoopNotRotated) {
15001500
EXPECT_FALSE(L->isRotatedForm());
15011501
});
15021502
}
1503+
1504+
TEST(LoopInfoTest, LoopUserBranch) {
1505+
const char *ModuleStr =
1506+
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
1507+
"define void @foo(i32* %B, i64 signext %nx, i1 %cond) {\n"
1508+
"entry:\n"
1509+
" br i1 %cond, label %bb, label %guard\n"
1510+
"guard:\n"
1511+
" %cmp.guard = icmp slt i64 0, %nx\n"
1512+
" br i1 %cmp.guard, label %for.i.preheader, label %for.end\n"
1513+
"for.i.preheader:\n"
1514+
" br label %for.i\n"
1515+
"for.i:\n"
1516+
" %i = phi i64 [ 0, %for.i.preheader ], [ %inc13, %for.i ]\n"
1517+
" %Bi = getelementptr inbounds i32, i32* %B, i64 %i\n"
1518+
" store i32 0, i32* %Bi, align 4\n"
1519+
" %inc13 = add nsw i64 %i, 1\n"
1520+
" %cmp = icmp slt i64 %inc13, %nx\n"
1521+
" br i1 %cmp, label %for.i, label %for.i.exit\n"
1522+
"for.i.exit:\n"
1523+
" br label %bb\n"
1524+
"bb:\n"
1525+
" br label %for.end\n"
1526+
"for.end:\n"
1527+
" ret void\n"
1528+
"}\n";
1529+
1530+
// Parse the module.
1531+
LLVMContext Context;
1532+
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
1533+
1534+
runWithLoopInfo(*M, "foo", [&](Function &F, LoopInfo &LI) {
1535+
Function::iterator FI = F.begin();
1536+
FI = ++FI;
1537+
BasicBlock *Guard = &*FI;
1538+
assert(Guard->getName() == "guard");
1539+
1540+
FI = ++FI;
1541+
BasicBlock *Header = &*(++FI);
1542+
assert(Header->getName() == "for.i");
1543+
1544+
Loop *L = LI.getLoopFor(Header);
1545+
EXPECT_NE(L, nullptr);
1546+
1547+
// L should not have a guard branch
1548+
EXPECT_EQ(L->getLoopGuardBranch(), nullptr);
1549+
});
1550+
}

0 commit comments

Comments
 (0)