Skip to content

Commit 6e045e3

Browse files
committed
[LoopUnroll] Add support for loops with exiting headers and uncond latches.
This patch generalizes the UnrollLoop utility to support loops that exit from the header instead of the latch. Usually, LoopRotate would take care of must of those cases, but in some cases (e.g. -Oz), LoopRotate does not kick in. Codesize impact looks relatively neutral on ARM64 with -Oz + LTO. Program master patch diff External/S.../CFP2006/447.dealII/447.dealII 629060.00 627676.00 -0.2% External/SPEC/CINT2000/176.gcc/176.gcc 1245916.00 1244932.00 -0.1% MultiSourc...Prolangs-C/simulator/simulator 86100.00 86156.00 0.1% MultiSourc...arks/Rodinia/backprop/backprop 66212.00 66252.00 0.1% MultiSourc...chmarks/Prolangs-C++/life/life 67276.00 67312.00 0.1% MultiSourc...s/Prolangs-C/compiler/compiler 69824.00 69788.00 -0.1% MultiSourc...Prolangs-C/assembler/assembler 86672.00 86696.00 0.0% Reviewers: efriedma, vsk, paquette Reviewed By: paquette Differential Revision: https://reviews.llvm.org/D61962 llvm-svn: 364398
1 parent 956a787 commit 6e045e3

File tree

6 files changed

+523
-64
lines changed

6 files changed

+523
-64
lines changed

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 170 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ using namespace llvm;
4444
// TODO: Should these be here or in LoopUnroll?
4545
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
4646
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
47+
STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
48+
"conditional latch (completely or otherwise)");
4749

4850
static cl::opt<bool>
4951
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
@@ -295,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
295297
return LoopUnrollResult::Unmodified;
296298
}
297299

298-
// The current loop unroll pass can only unroll loops with a single latch
300+
// The current loop unroll pass can unroll loops with a single latch or header
299301
// that's a conditional branch exiting the loop.
300302
// FIXME: The implementation can be extended to work with more complicated
301303
// cases, e.g. loops with multiple latches.
302304
BasicBlock *Header = L->getHeader();
305+
BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
303306
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
304307

305-
if (!BI || BI->isUnconditional()) {
306-
// The loop-rotate pass can be helpful to avoid this in many cases.
308+
// FIXME: Support loops without conditional latch and multiple exiting blocks.
309+
if (!BI ||
310+
(BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
311+
L->getExitingBlock() != Header))) {
312+
LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
313+
"branch in the latch or header.\n");
314+
return LoopUnrollResult::Unmodified;
315+
}
316+
317+
auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
318+
return BI->isConditional() && BI->getSuccessor(S1) == Header &&
319+
!L->contains(BI->getSuccessor(S2));
320+
};
321+
322+
// If we have a conditional latch, it must exit the loop.
323+
if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
324+
!CheckLatchSuccessors(1, 0)) {
307325
LLVM_DEBUG(
308-
dbgs()
309-
<< " Can't unroll; loop not terminated by a conditional branch.\n");
326+
dbgs() << "Can't unroll; a conditional latch must exit the loop");
310327
return LoopUnrollResult::Unmodified;
311328
}
312329

313-
auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
314-
return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
330+
auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
331+
return HeaderBI && HeaderBI->isConditional() &&
332+
L->contains(HeaderBI->getSuccessor(S1)) &&
333+
!L->contains(HeaderBI->getSuccessor(S2));
315334
};
316335

317-
if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
318-
LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
319-
" exiting the loop can be unrolled\n");
336+
// If we do not have a conditional latch, the header must exit the loop.
337+
if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
338+
!CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
339+
LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
320340
return LoopUnrollResult::Unmodified;
321341
}
322342

@@ -503,8 +523,17 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
503523
SE->forgetTopmostLoop(L);
504524
}
505525

506-
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
507-
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
526+
bool ContinueOnTrue;
527+
bool LatchIsExiting = BI->isConditional();
528+
BasicBlock *LoopExit = nullptr;
529+
if (LatchIsExiting) {
530+
ContinueOnTrue = L->contains(BI->getSuccessor(0));
531+
LoopExit = BI->getSuccessor(ContinueOnTrue);
532+
} else {
533+
NumUnrolledWithHeader++;
534+
ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
535+
LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
536+
}
508537

509538
// For the first iteration of the loop, we should use the precloned values for
510539
// PHI nodes. Insert associations now.
@@ -514,11 +543,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
514543
OrigPHINode.push_back(cast<PHINode>(I));
515544
}
516545

517-
std::vector<BasicBlock*> Headers;
518-
std::vector<BasicBlock*> Latches;
546+
std::vector<BasicBlock *> Headers;
547+
std::vector<BasicBlock *> HeaderSucc;
548+
std::vector<BasicBlock *> Latches;
519549
Headers.push_back(Header);
520550
Latches.push_back(LatchBlock);
521551

552+
if (!LatchIsExiting) {
553+
auto *Term = cast<BranchInst>(Header->getTerminator());
554+
if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
555+
assert(L->contains(Term->getSuccessor(0)));
556+
HeaderSucc.push_back(Term->getSuccessor(0));
557+
} else {
558+
assert(L->contains(Term->getSuccessor(1)));
559+
HeaderSucc.push_back(Term->getSuccessor(1));
560+
}
561+
}
562+
522563
// The current on-the-fly SSA update requires blocks to be processed in
523564
// reverse postorder so that LastValueMap contains the correct value at each
524565
// exit.
@@ -608,6 +649,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
608649
if (*BB == LatchBlock)
609650
Latches.push_back(New);
610651

652+
// Keep track of the successor of the new header in the current iteration.
653+
for (auto *Pred : predecessors(*BB))
654+
if (Pred == Header) {
655+
HeaderSucc.push_back(New);
656+
break;
657+
}
658+
611659
NewBlocks.push_back(New);
612660
UnrolledLoopBlocks.push_back(New);
613661

@@ -657,51 +705,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
657705
}
658706
}
659707

660-
// Now that all the basic blocks for the unrolled iterations are in place,
661-
// set up the branches to connect them.
662-
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
663-
// The original branch was replicated in each unrolled iteration.
664-
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
665-
666-
// The branch destination.
667-
unsigned j = (i + 1) % e;
668-
BasicBlock *Dest = Headers[j];
669-
bool NeedConditional = true;
670-
671-
if (RuntimeTripCount && j != 0) {
672-
NeedConditional = false;
673-
}
674-
675-
// For a complete unroll, make the last iteration end with a branch
676-
// to the exit block.
677-
if (CompletelyUnroll) {
678-
if (j == 0)
679-
Dest = LoopExit;
680-
// If using trip count upper bound to completely unroll, we need to keep
681-
// the conditional branch except the last one because the loop may exit
682-
// after any iteration.
683-
assert(NeedConditional &&
684-
"NeedCondition cannot be modified by both complete "
685-
"unrolling and runtime unrolling");
686-
NeedConditional =
687-
(ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
688-
} else if (j != BreakoutTrip &&
689-
(ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
690-
// If we know the trip count or a multiple of it, we can safely use an
691-
// unconditional branch for some iterations.
692-
NeedConditional = false;
693-
}
694-
708+
auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
709+
ArrayRef<BasicBlock *> NextBlocks,
710+
BasicBlock *CurrentHeader,
711+
bool NeedConditional) {
712+
auto *Term = cast<BranchInst>(Src->getTerminator());
695713
if (NeedConditional) {
696714
// Update the conditional branch's successor for the following
697715
// iteration.
698716
Term->setSuccessor(!ContinueOnTrue, Dest);
699717
} else {
700718
// Remove phi operands at this loop exit
701719
if (Dest != LoopExit) {
702-
BasicBlock *BB = Latches[i];
703-
for (BasicBlock *Succ: successors(BB)) {
704-
if (Succ == Headers[i])
720+
BasicBlock *BB = Src;
721+
for (BasicBlock *Succ : successors(BB)) {
722+
if (Succ == CurrentHeader)
705723
continue;
706724
for (PHINode &Phi : Succ->phis())
707725
Phi.removeIncomingValue(BB, false);
@@ -711,6 +729,90 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
711729
BranchInst::Create(Dest, Term);
712730
Term->eraseFromParent();
713731
}
732+
};
733+
734+
// Now that all the basic blocks for the unrolled iterations are in place,
735+
// set up the branches to connect them.
736+
if (LatchIsExiting) {
737+
// Set up latches to branch to the new header in the unrolled iterations or
738+
// the loop exit for the last latch in a fully unrolled loop.
739+
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
740+
// The branch destination.
741+
unsigned j = (i + 1) % e;
742+
BasicBlock *Dest = Headers[j];
743+
bool NeedConditional = true;
744+
745+
if (RuntimeTripCount && j != 0) {
746+
NeedConditional = false;
747+
}
748+
749+
// For a complete unroll, make the last iteration end with a branch
750+
// to the exit block.
751+
if (CompletelyUnroll) {
752+
if (j == 0)
753+
Dest = LoopExit;
754+
// If using trip count upper bound to completely unroll, we need to keep
755+
// the conditional branch except the last one because the loop may exit
756+
// after any iteration.
757+
assert(NeedConditional &&
758+
"NeedCondition cannot be modified by both complete "
759+
"unrolling and runtime unrolling");
760+
NeedConditional =
761+
(ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
762+
} else if (j != BreakoutTrip &&
763+
(ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
764+
// If we know the trip count or a multiple of it, we can safely use an
765+
// unconditional branch for some iterations.
766+
NeedConditional = false;
767+
}
768+
769+
setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
770+
}
771+
} else {
772+
// Setup headers to branch to their new successors in the unrolled
773+
// iterations.
774+
for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
775+
// The branch destination.
776+
unsigned j = (i + 1) % e;
777+
BasicBlock *Dest = HeaderSucc[i];
778+
bool NeedConditional = true;
779+
780+
if (RuntimeTripCount && j != 0)
781+
NeedConditional = false;
782+
783+
if (CompletelyUnroll)
784+
// We cannot drop the conditional branch for the last condition, as we
785+
// may have to execute the loop body depending on the condition.
786+
NeedConditional = j == 0 || ULO.PreserveCondBr;
787+
else if (j != BreakoutTrip &&
788+
(ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
789+
// If we know the trip count or a multiple of it, we can safely use an
790+
// unconditional branch for some iterations.
791+
NeedConditional = false;
792+
793+
setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
794+
}
795+
796+
// Set up latches to branch to the new header in the unrolled iterations or
797+
// the loop exit for the last latch in a fully unrolled loop.
798+
799+
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
800+
// The original branch was replicated in each unrolled iteration.
801+
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
802+
803+
// The branch destination.
804+
unsigned j = (i + 1) % e;
805+
BasicBlock *Dest = Headers[j];
806+
807+
// When completely unrolling, the last latch becomes unreachable.
808+
if (CompletelyUnroll && j == 0)
809+
new UnreachableInst(Term->getContext(), Term);
810+
else
811+
// Replace the conditional branch with an unconditional one.
812+
BranchInst::Create(Dest, Term);
813+
814+
Term->eraseFromParent();
815+
}
714816
}
715817

716818
// Update dominators of blocks we might reach through exits.
@@ -727,19 +829,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
727829
ChildrenToUpdate.push_back(ChildBB);
728830
}
729831
BasicBlock *NewIDom;
730-
if (BB == LatchBlock) {
832+
BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
833+
auto &TermBlocks = LatchIsExiting ? Latches : Headers;
834+
if (BB == TermBlock) {
731835
// The latch is special because we emit unconditional branches in
732836
// some cases where the original loop contained a conditional branch.
733837
// Since the latch is always at the bottom of the loop, if the latch
734838
// dominated an exit before unrolling, the new dominator of that exit
735839
// must also be a latch. Specifically, the dominator is the first
736840
// latch which ends in a conditional branch, or the last latch if
737841
// there is no such latch.
738-
NewIDom = Latches.back();
739-
for (BasicBlock *IterLatch : Latches) {
740-
Instruction *Term = IterLatch->getTerminator();
842+
// For loops exiting from the header, we limit the supported loops
843+
// to have a single exiting block.
844+
NewIDom = TermBlocks.back();
845+
for (BasicBlock *Iter : TermBlocks) {
846+
Instruction *Term = Iter->getTerminator();
741847
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
742-
NewIDom = IterLatch;
848+
NewIDom = Iter;
743849
break;
744850
}
745851
}
@@ -756,13 +862,17 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
756862
}
757863

758864
assert(!DT || !UnrollVerifyDomtree ||
759-
DT->verify(DominatorTree::VerificationLevel::Fast));
865+
DT->verify(DominatorTree::VerificationLevel::Fast));
760866

761867
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
762868
// Merge adjacent basic blocks, if possible.
763869
for (BasicBlock *Latch : Latches) {
764-
BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
765-
if (Term->isUnconditional()) {
870+
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
871+
assert((Term ||
872+
(CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
873+
"Need a branch as terminator, except when fully unrolling with "
874+
"unconditional latch");
875+
if (Term && Term->isUnconditional()) {
766876
BasicBlock *Dest = Term->getSuccessor(0);
767877
BasicBlock *Fold = Dest->getUniquePredecessor();
768878
if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {

llvm/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -verify-scev-maps -S | FileCheck %s
22

33
; Check SCEV expansion uses existing value when unrolling an inner loop with runtime trip count in a loop nest.
4+
; The outer loop gets unrolled twice, so we see 2 selects in the outer loop blocks.
45
; CHECK-LABEL: @foo(
6+
; CHECK-LABEL: for.body.loopexit:
7+
; CHECK: select
8+
; CHECK-LABEL: for.body:
59
; CHECK: select
610
; CHECK-NOT: select
711
; CHECK: ret
@@ -14,7 +18,7 @@ for.body: ; preds = %for.body5, %for.bod
1418
%xfL.addr.033 = phi i32 [ %xfL, %entry ], [ %add, %for.body5 ]
1519
%add = add nsw i32 %xfL.addr.033, %scaleL
1620
%shr = ashr i32 %add, 16
17-
%cmp.i = icmp slt i32 0, %shr
21+
%cmp.i = icmp slt i32 10, %shr
1822
%.sroa.speculated = select i1 %cmp.i, i32 0, i32 %shr
1923
%cmp425 = icmp slt i32 0, %.sroa.speculated
2024
br i1 %cmp425, label %for.body5.preheader, label %for.end

0 commit comments

Comments
 (0)