@@ -56,6 +56,17 @@ static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
56
56
" unroll-runtime-other-exit-predictable" , cl::init(false ), cl::Hidden,
57
57
cl::desc(" Assume the non latch exit block to be predictable" ));
58
58
59
+ // Probability that the loop trip count is so small that after the prolog
60
+ // we do not enter the unrolled loop at all.
61
+ // It is unlikely that the loop trip count is smaller than the unroll factor;
62
+ // other than that, the choice of constant is not tuned yet.
63
+ static const uint32_t UnrolledLoopHeaderWeights[] = {1 , 127 };
64
+ // Probability that the loop trip count is so small that we skip the unrolled
65
+ // loop completely and immediately enter the epilogue loop.
66
+ // It is unlikely that the loop trip count is smaller than the unroll factor;
67
+ // other than that, the choice of constant is not tuned yet.
68
+ static const uint32_t EpilogHeaderWeights[] = {1 , 127 };
69
+
59
70
// / Connect the unrolling prolog code to the original loop.
60
71
// / The unrolling prolog code contains code to execute the
61
72
// / 'extra' iterations if the run-time trip count modulo the
@@ -169,7 +180,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
169
180
SplitBlockPredecessors (OriginalLoopLatchExit, Preds, " .unr-lcssa" , DT, LI,
170
181
nullptr , PreserveLCSSA);
171
182
// Add the branch to the exit block (around the unrolled loop)
172
- B.CreateCondBr (BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
183
+ MDNode *BranchWeights = nullptr ;
184
+ if (hasBranchWeightMD (*Latch->getTerminator ())) {
185
+ // Assume loop is nearly always entered.
186
+ MDBuilder MDB (B.getContext ());
187
+ BranchWeights = MDB.createBranchWeights (UnrolledLoopHeaderWeights);
188
+ }
189
+ B.CreateCondBr (BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
190
+ BranchWeights);
173
191
InsertPt->eraseFromParent ();
174
192
if (DT) {
175
193
auto *NewDom = DT->findNearestCommonDominator (OriginalLoopLatchExit,
@@ -194,8 +212,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
194
212
BasicBlock *Exit, BasicBlock *PreHeader,
195
213
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
196
214
ValueToValueMapTy &VMap, DominatorTree *DT,
197
- LoopInfo *LI, bool PreserveLCSSA,
198
- ScalarEvolution &SE ) {
215
+ LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
216
+ unsigned Count ) {
199
217
BasicBlock *Latch = L->getLoopLatch ();
200
218
assert (Latch && " Loop must have a latch" );
201
219
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -292,7 +310,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
292
310
SplitBlockPredecessors (Exit, Preds, " .epilog-lcssa" , DT, LI, nullptr ,
293
311
PreserveLCSSA);
294
312
// Add the branch to the exit block (around the unrolling loop)
295
- B.CreateCondBr (BrLoopExit, EpilogPreHeader, Exit);
313
+ MDNode *BranchWeights = nullptr ;
314
+ if (hasBranchWeightMD (*Latch->getTerminator ())) {
315
+ // Assume equal distribution in interval [0, Count).
316
+ MDBuilder MDB (B.getContext ());
317
+ BranchWeights = MDB.createBranchWeights (1 , Count - 1 );
318
+ }
319
+ B.CreateCondBr (BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
296
320
InsertPt->eraseFromParent ();
297
321
if (DT) {
298
322
auto *NewDom = DT->findNearestCommonDominator (Exit, NewExit);
@@ -316,8 +340,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
316
340
const bool UnrollRemainder,
317
341
BasicBlock *InsertTop,
318
342
BasicBlock *InsertBot, BasicBlock *Preheader,
319
- std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
320
- ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
343
+ std::vector<BasicBlock *> &NewBlocks,
344
+ LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
345
+ DominatorTree *DT, LoopInfo *LI, unsigned Count) {
321
346
StringRef suffix = UseEpilogRemainder ? " epil" : " prol" ;
322
347
BasicBlock *Header = L->getHeader ();
323
348
BasicBlock *Latch = L->getLoopLatch ();
@@ -371,7 +396,26 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
371
396
Value *IdxNext =
372
397
Builder.CreateAdd (NewIdx, One, NewIdx->getName () + " .next" );
373
398
Value *IdxCmp = Builder.CreateICmpNE (IdxNext, NewIter, NewIdx->getName () + " .cmp" );
374
- Builder.CreateCondBr (IdxCmp, FirstLoopBB, InsertBot);
399
+ MDNode *BranchWeights = nullptr ;
400
+ if (hasBranchWeightMD (*LatchBR)) {
401
+ uint32_t ExitWeight;
402
+ uint32_t BackEdgeWeight;
403
+ if (Count >= 3 ) {
404
+ // Note: We do not enter this loop for zero-remainders. The check
405
+ // is at the end of the loop. We assume equal distribution between
406
+ // possible remainders in [1, Count).
407
+ ExitWeight = 1 ;
408
+ BackEdgeWeight = (Count - 2 ) / 2 ;
409
+ } else {
410
+ // Unnecessary backedge, should never be taken. The conditional
411
+ // jump should be optimized away later.
412
+ ExitWeight = 1 ;
413
+ BackEdgeWeight = 0 ;
414
+ }
415
+ MDBuilder MDB (Builder.getContext ());
416
+ BranchWeights = MDB.createBranchWeights (BackEdgeWeight, ExitWeight);
417
+ }
418
+ Builder.CreateCondBr (IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
375
419
NewIdx->addIncoming (Zero, InsertTop);
376
420
NewIdx->addIncoming (IdxNext, NewBB);
377
421
LatchBR->eraseFromParent ();
@@ -465,32 +509,6 @@ static bool canProfitablyUnrollMultiExitLoop(
465
509
// know of kinds of multiexit loops that would benefit from unrolling.
466
510
}
467
511
468
- // Assign the maximum possible trip count as the back edge weight for the
469
- // remainder loop if the original loop comes with a branch weight.
470
- static void updateLatchBranchWeightsForRemainderLoop (Loop *OrigLoop,
471
- Loop *RemainderLoop,
472
- uint64_t UnrollFactor) {
473
- uint64_t TrueWeight, FalseWeight;
474
- BranchInst *LatchBR =
475
- cast<BranchInst>(OrigLoop->getLoopLatch ()->getTerminator ());
476
- if (!extractBranchWeights (*LatchBR, TrueWeight, FalseWeight))
477
- return ;
478
- uint64_t ExitWeight = LatchBR->getSuccessor (0 ) == OrigLoop->getHeader ()
479
- ? FalseWeight
480
- : TrueWeight;
481
- assert (UnrollFactor > 1 );
482
- uint64_t BackEdgeWeight = (UnrollFactor - 1 ) * ExitWeight;
483
- BasicBlock *Header = RemainderLoop->getHeader ();
484
- BasicBlock *Latch = RemainderLoop->getLoopLatch ();
485
- auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator ());
486
- unsigned HeaderIdx = (RemainderLatchBR->getSuccessor (0 ) == Header ? 0 : 1 );
487
- MDBuilder MDB (RemainderLatchBR->getContext ());
488
- MDNode *WeightNode =
489
- HeaderIdx ? MDB.createBranchWeights (ExitWeight, BackEdgeWeight)
490
- : MDB.createBranchWeights (BackEdgeWeight, ExitWeight);
491
- RemainderLatchBR->setMetadata (LLVMContext::MD_prof, WeightNode);
492
- }
493
-
494
512
// / Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
495
513
// / accounting for the possibility of unsigned overflow in the 2s complement
496
514
// / domain. Preconditions:
@@ -776,7 +794,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
776
794
BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
777
795
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
778
796
// Branch to either remainder (extra iterations) loop or unrolling loop.
779
- B.CreateCondBr (BranchVal, RemainderLoop, UnrollingLoop);
797
+ MDNode *BranchWeights = nullptr ;
798
+ if (hasBranchWeightMD (*Latch->getTerminator ())) {
799
+ // Assume loop is nearly always entered.
800
+ MDBuilder MDB (B.getContext ());
801
+ BranchWeights = MDB.createBranchWeights (EpilogHeaderWeights);
802
+ }
803
+ B.CreateCondBr (BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
780
804
PreHeaderBR->eraseFromParent ();
781
805
if (DT) {
782
806
if (UseEpilogRemainder)
@@ -805,12 +829,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
805
829
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
806
830
Loop *remainderLoop = CloneLoopBlocks (
807
831
L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
808
- NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
809
-
810
- // Assign the maximum possible trip count as the back edge weight for the
811
- // remainder loop if the original loop comes with a branch weight.
812
- if (remainderLoop && !UnrollRemainder)
813
- updateLatchBranchWeightsForRemainderLoop (L, remainderLoop, Count);
832
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
814
833
815
834
// Insert the cloned blocks into the function.
816
835
F->splice (InsertBot->getIterator (), F, NewBlocks[0 ]->getIterator (), F->end ());
@@ -904,7 +923,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
904
923
// Connect the epilog code to the original loop and update the
905
924
// PHI functions.
906
925
ConnectEpilog (L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
907
- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
926
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count );
908
927
909
928
// Update counter in loop for unrolling.
910
929
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
0 commit comments