Skip to content

Commit b30c9c9

Browse files
committed
LoopUnrollRuntime: Add weights to all branches
Make sure every conditional branch constructed by `LoopUnrollRuntime` code sets branch weights. - Add new 1:127 weights for the conditional jumps checking whether the whole (unrolled) loop should be skipped in the generated prolog or epilog code. - Remove `updateLatchBranchWeightsForRemainderLoop` function and just add weights immediately when constructing the relevant branches. This leads to simpler code and makes the code more obvious as every call to `CreateCondBr` now has a `BranchWeights` parameter. - Rework formula for epilogue latch weights, to assume equal distribution of remainders and remove `assert` (as I was able to reach this code when forcing small unroll factors on the commandline). Differential Revision: https://reviews.llvm.org/D158642
1 parent b8ede3b commit b30c9c9

File tree

5 files changed

+119
-75
lines changed

5 files changed

+119
-75
lines changed

llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

Lines changed: 60 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@ static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
5656
"unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
5757
cl::desc("Assume the non latch exit block to be predictable"));
5858

59+
// Probability that the loop trip count is so small that after the prolog
60+
// we do not enter the unrolled loop at all.
61+
// It is unlikely that the loop trip count is smaller than the unroll factor;
62+
// other than that, the choice of constant is not tuned yet.
63+
static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
64+
// Probability that the loop trip count is so small that we skip the unrolled
65+
// loop completely and immediately enter the epilogue loop.
66+
// It is unlikely that the loop trip count is smaller than the unroll factor;
67+
// other than that, the choice of constant is not tuned yet.
68+
static const uint32_t EpilogHeaderWeights[] = {1, 127};
69+
5970
/// Connect the unrolling prolog code to the original loop.
6071
/// The unrolling prolog code contains code to execute the
6172
/// 'extra' iterations if the run-time trip count modulo the
@@ -169,7 +180,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
169180
SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
170181
nullptr, PreserveLCSSA);
171182
// Add the branch to the exit block (around the unrolled loop)
172-
B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
183+
MDNode *BranchWeights = nullptr;
184+
if (hasBranchWeightMD(*Latch->getTerminator())) {
185+
// Assume loop is nearly always entered.
186+
MDBuilder MDB(B.getContext());
187+
BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);
188+
}
189+
B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
190+
BranchWeights);
173191
InsertPt->eraseFromParent();
174192
if (DT) {
175193
auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
@@ -194,8 +212,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
194212
BasicBlock *Exit, BasicBlock *PreHeader,
195213
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
196214
ValueToValueMapTy &VMap, DominatorTree *DT,
197-
LoopInfo *LI, bool PreserveLCSSA,
198-
ScalarEvolution &SE) {
215+
LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
216+
unsigned Count) {
199217
BasicBlock *Latch = L->getLoopLatch();
200218
assert(Latch && "Loop must have a latch");
201219
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -292,7 +310,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
292310
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
293311
PreserveLCSSA);
294312
// Add the branch to the exit block (around the unrolling loop)
295-
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
313+
MDNode *BranchWeights = nullptr;
314+
if (hasBranchWeightMD(*Latch->getTerminator())) {
315+
// Assume equal distribution in interval [0, Count).
316+
MDBuilder MDB(B.getContext());
317+
BranchWeights = MDB.createBranchWeights(1, Count - 1);
318+
}
319+
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
296320
InsertPt->eraseFromParent();
297321
if (DT) {
298322
auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
@@ -316,8 +340,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
316340
const bool UnrollRemainder,
317341
BasicBlock *InsertTop,
318342
BasicBlock *InsertBot, BasicBlock *Preheader,
319-
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
320-
ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
343+
std::vector<BasicBlock *> &NewBlocks,
344+
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
345+
DominatorTree *DT, LoopInfo *LI, unsigned Count) {
321346
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
322347
BasicBlock *Header = L->getHeader();
323348
BasicBlock *Latch = L->getLoopLatch();
@@ -371,7 +396,26 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
371396
Value *IdxNext =
372397
Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
373398
Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
374-
Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
399+
MDNode *BranchWeights = nullptr;
400+
if (hasBranchWeightMD(*LatchBR)) {
401+
uint32_t ExitWeight;
402+
uint32_t BackEdgeWeight;
403+
if (Count >= 3) {
404+
// Note: We do not enter this loop for zero-remainders. The check
405+
// is at the end of the loop. We assume equal distribution between
406+
// possible remainders in [1, Count).
407+
ExitWeight = 1;
408+
BackEdgeWeight = (Count - 2) / 2;
409+
} else {
410+
// Unnecessary backedge, should never be taken. The conditional
411+
// jump should be optimized away later.
412+
ExitWeight = 1;
413+
BackEdgeWeight = 0;
414+
}
415+
MDBuilder MDB(Builder.getContext());
416+
BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
417+
}
418+
Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
375419
NewIdx->addIncoming(Zero, InsertTop);
376420
NewIdx->addIncoming(IdxNext, NewBB);
377421
LatchBR->eraseFromParent();
@@ -465,32 +509,6 @@ static bool canProfitablyUnrollMultiExitLoop(
465509
// know of kinds of multiexit loops that would benefit from unrolling.
466510
}
467511

468-
// Assign the maximum possible trip count as the back edge weight for the
469-
// remainder loop if the original loop comes with a branch weight.
470-
static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
471-
Loop *RemainderLoop,
472-
uint64_t UnrollFactor) {
473-
uint64_t TrueWeight, FalseWeight;
474-
BranchInst *LatchBR =
475-
cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
476-
if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight))
477-
return;
478-
uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
479-
? FalseWeight
480-
: TrueWeight;
481-
assert(UnrollFactor > 1);
482-
uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
483-
BasicBlock *Header = RemainderLoop->getHeader();
484-
BasicBlock *Latch = RemainderLoop->getLoopLatch();
485-
auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
486-
unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
487-
MDBuilder MDB(RemainderLatchBR->getContext());
488-
MDNode *WeightNode =
489-
HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
490-
: MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
491-
RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
492-
}
493-
494512
/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
495513
/// accounting for the possibility of unsigned overflow in the 2s complement
496514
/// domain. Preconditions:
@@ -776,7 +794,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
776794
BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
777795
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
778796
// Branch to either remainder (extra iterations) loop or unrolling loop.
779-
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
797+
MDNode *BranchWeights = nullptr;
798+
if (hasBranchWeightMD(*Latch->getTerminator())) {
799+
// Assume loop is nearly always entered.
800+
MDBuilder MDB(B.getContext());
801+
BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
802+
}
803+
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
780804
PreHeaderBR->eraseFromParent();
781805
if (DT) {
782806
if (UseEpilogRemainder)
@@ -805,12 +829,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
805829
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
806830
Loop *remainderLoop = CloneLoopBlocks(
807831
L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
808-
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
809-
810-
// Assign the maximum possible trip count as the back edge weight for the
811-
// remainder loop if the original loop comes with a branch weight.
812-
if (remainderLoop && !UnrollRemainder)
813-
updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
832+
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
814833

815834
// Insert the cloned blocks into the function.
816835
F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
@@ -904,7 +923,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
904923
// Connect the epilog code to the original loop and update the
905924
// PHI functions.
906925
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
907-
NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
926+
NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
908927

909928
// Update counter in loop for unrolling.
910929
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.

llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
165165
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
166166
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7
167167
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
168-
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_PROL_PREHEADER:%.*]], label [[INNER_1_HEADER_PROL_LOOPEXIT:%.*]]
168+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_PROL_PREHEADER:%.*]], label [[INNER_1_HEADER_PROL_LOOPEXIT:%.*]], !prof [[PROF3:![0-9]+]]
169169
; CHECK: inner.1.header.prol.preheader:
170170
; CHECK-NEXT: br label [[INNER_1_HEADER_PROL:%.*]]
171171
; CHECK: inner.1.header.prol:
@@ -180,7 +180,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
180180
; CHECK-NEXT: [[CMP_2_PROL:%.*]] = icmp sgt i64 [[INNER_1_IV_PROL]], 0
181181
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
182182
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
183-
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[INNER_1_HEADER_PROL]], label [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
183+
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[INNER_1_HEADER_PROL]], label [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF4:![0-9]+]], !llvm.loop [[LOOP5:![0-9]+]]
184184
; CHECK: inner.1.header.prol.loopexit.unr-lcssa:
185185
; CHECK-NEXT: [[L_1_LCSSA_UNR_PH:%.*]] = phi i32 [ [[L_1_PROL]], [[INNER_1_LATCH_PROL]] ]
186186
; CHECK-NEXT: [[INNER_1_IV_UNR_PH:%.*]] = phi i64 [ [[INNER_1_IV_NEXT_PROL]], [[INNER_1_LATCH_PROL]] ]
@@ -189,7 +189,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
189189
; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ undef, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
190190
; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
191191
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7
192-
; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]]
192+
; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]]
193193
; CHECK: outer.header.new:
194194
; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]]
195195
; CHECK: inner.1.header:
@@ -233,7 +233,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
233233
; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8
234234
; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8
235235
; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0
236-
; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF5:![0-9]+]]
236+
; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]]
237237
; CHECK: outer.middle.unr-lcssa:
238238
; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ]
239239
; CHECK-NEXT: br label [[OUTER_MIDDLE]]

llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
; CHECK-LABEL: for.body.epil:
88
; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]]
99
; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499}
10-
; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 3, i32 1}
10+
; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1}
1111

1212
define i3 @test(ptr %a, i3 %n) {
1313
entry:

llvm/test/Transforms/LoopUnroll/runtime-loop.ll

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,41 +18,54 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
1818

1919
; COMMON-LABEL: @test(
2020

21-
; EPILOG: %xtraiter = and i32 %n
22-
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
23-
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
21+
; EPILOG: entry:
22+
; EPILOG: br i1 %cmp1, label %for.end, label %for.body.preheader, !prof [[EPILOG_PROF_0:![0-9]+]]
23+
; EPILOG: for.body.preheader:
24+
; EPILOG: %xtraiter = and i32 %n
25+
; EPILOG: br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !prof [[EPILOG_PROF_1:![0-9]+]]
26+
27+
; EPILOG: for.end.loopexit.unr-lcssa:
28+
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
29+
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit, !prof [[EPILOG_PROF_2:![0-9]+]]
2430

2531
; NOEPILOG-NOT: %xtraiter = and i32 %n
2632

27-
; PROLOG: %xtraiter = and i32 %n
28-
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
29-
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
33+
; PROLOG: entry:
34+
; PROLOG: br i1 %cmp1, label %for.end, label %for.body.preheader, !prof [[PROLOG_PROF_0:![0-9]+]]
35+
36+
; PROLOG: for.body.preheader:
37+
; PROLOG: %xtraiter = and i32 %n
38+
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
39+
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit, !prof [[PROLOG_PROF_1:![0-9]+]]
3040

3141
; NOPROLOG-NOT: %xtraiter = and i32 %n
3242

3343
; EPILOG: for.body.epil:
34-
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
35-
; EPILOG: %epil.iter.next = add i32 %epil.iter, 1
36-
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
37-
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
44+
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
45+
; EPILOG: %epil.iter.next = add i32 %epil.iter, 1
46+
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
47+
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof [[EPILOG_PROF_3:![0-9]+]], !llvm.loop [[EPILOG_LOOP:![0-9]+]]
3848

3949
; NOEPILOG: for.body:
4050
; NOEPILOG-NOT: for.body.epil:
4151

4252
; PROLOG: for.body.prol:
43-
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
44-
; PROLOG: %prol.iter.next = add i32 %prol.iter, 1
45-
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
46-
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
53+
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
54+
; PROLOG: %prol.iter.next = add i32 %prol.iter, 1
55+
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
56+
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !prof [[PROLOG_PROF_2:![0-9]+]], !llvm.loop [[PROLOG_LOOP:![0-9]+]]
57+
58+
; PROLOG: for.body.prol.loopexit:
59+
; PROLOG: br i1 %2, label %for.end.loopexit, label %for.body.preheader.new, !prof [[PROLOG_PROF_1:![0-9]+]]
4760

4861
; NOPROLOG: for.body:
4962
; NOPROLOG-NOT: for.body.prol:
5063

5164

52-
define i32 @test(ptr nocapture %a, i32 %n) nounwind uwtable readonly {
65+
define i32 @test(ptr nocapture %a, i32 %n) nounwind uwtable readonly !prof !2 {
5366
entry:
5467
%cmp1 = icmp eq i32 %n, 0
55-
br i1 %cmp1, label %for.end, label %for.body
68+
br i1 %cmp1, label %for.end, label %for.body, !prof !3
5669

5770
for.body: ; preds = %for.body, %entry
5871
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
@@ -63,7 +76,7 @@ for.body: ; preds = %for.body, %entry
6376
%indvars.iv.next = add i64 %indvars.iv, 1
6477
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
6578
%exitcond = icmp eq i32 %lftr.wideiv, %n
66-
br i1 %exitcond, label %for.end, label %for.body
79+
br i1 %exitcond, label %for.end, label %for.body, !prof !4
6780

6881
for.end: ; preds = %for.body, %entry
6982
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
@@ -274,12 +287,24 @@ exit2.loopexit:
274287

275288
!0 = distinct !{!0, !1}
276289
!1 = !{!"llvm.loop.unroll.runtime.disable"}
290+
!2 = !{!"function_entry_count", i64 1}
291+
!3 = !{!"branch_weights", i32 1, i32 11}
292+
!4 = !{!"branch_weights", i32 1, i32 42}
277293

278294
; need to use LABEL here to separate function IR matching from metadata matching
279295
; COMMON-LABEL: {{^}}!0 =
280296

281-
; EPILOG-SAME: distinct !{!0, !1}
282-
; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
297+
; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
298+
; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127}
299+
; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7}
300+
; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1}
301+
302+
; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]}
303+
; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"}
304+
305+
; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
306+
; PROLOG: [[PROLOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127}
307+
; PROLOG: [[PROLOG_PROF_2]] = !{!"branch_weights", i32 3, i32 1}
283308

284-
; PROLOG-SAME: distinct !{!0, !1}
285-
; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}
309+
; PROLOG: distinct !{[[PROLOG_LOOP]], [[PROLOG_LOOP_1:![0-9]+]]}
310+
; PROLOG: [[PROLOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"}

llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44

55
; CHECK-LABEL: @bar_prof
66
; CHECK: loop:
7-
; CHECK: %mul = mul
8-
; CHECK: %mul.1 = mul
9-
; CHECK: %mul.2 = mul
10-
; CHECK: %mul.3 = mul
11-
; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa.loopexit, label %loop, !prof !1
7+
; CHECK: %mul = mul
8+
; CHECK: %mul.1 = mul
9+
; CHECK: %mul.2 = mul
10+
; CHECK: %mul.3 = mul
11+
; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa.loopexit, label %loop, !prof [[PROF0:![0-9]+]]
1212
; CHECK: loop.epil:
13-
; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof !2, !llvm.loop !3
13+
; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}}
1414
define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 {
1515
entry:
1616
br label %loop
@@ -60,5 +60,5 @@ loop.end:
6060
!1 = !{!"function_entry_count", i64 1}
6161
!2 = !{!"branch_weights", i32 1, i32 1000}
6262

63-
; CHECK: !1 = !{!"branch_weights", i32 1, i32 124}
64-
; CHECK: !2 = !{!"branch_weights", i32 7, i32 1}
63+
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124}
64+
; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1}

0 commit comments

Comments
 (0)