@@ -852,7 +852,7 @@ class InnerLoopVectorizer {
852
852
// / Middle Block between the vector and the scalar.
853
853
BasicBlock *LoopMiddleBlock;
854
854
855
- // / The unique ExitBlock of the scalar loop if one exists . Note that
855
+ // / The ( unique) ExitBlock of the scalar loop. Note that
856
856
// / there can be multiple exiting edges reaching this block.
857
857
BasicBlock *LoopExitBlock;
858
858
@@ -3147,13 +3147,9 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
3147
3147
DT->getNode (Bypass)->getIDom ()) &&
3148
3148
" TC check is expected to dominate Bypass" );
3149
3149
3150
- // Update dominator for Bypass & LoopExit (if needed) .
3150
+ // Update dominator for Bypass & LoopExit.
3151
3151
DT->changeImmediateDominator (Bypass, TCCheckBlock);
3152
- if (!Cost->requiresScalarEpilogue ())
3153
- // If there is an epilogue which must run, there's no edge from the
3154
- // middle block to exit blocks and thus no need to update the immediate
3155
- // dominator of the exit blocks.
3156
- DT->changeImmediateDominator (LoopExitBlock, TCCheckBlock);
3152
+ DT->changeImmediateDominator (LoopExitBlock, TCCheckBlock);
3157
3153
3158
3154
ReplaceInstWithInst (
3159
3155
TCCheckBlock->getTerminator (),
@@ -3192,11 +3188,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
3192
3188
// Update dominator only if this is first RT check.
3193
3189
if (LoopBypassBlocks.empty ()) {
3194
3190
DT->changeImmediateDominator (Bypass, SCEVCheckBlock);
3195
- if (!Cost->requiresScalarEpilogue ())
3196
- // If there is an epilogue which must run, there's no edge from the
3197
- // middle block to exit blocks and thus no need to update the immediate
3198
- // dominator of the exit blocks.
3199
- DT->changeImmediateDominator (LoopExitBlock, SCEVCheckBlock);
3191
+ DT->changeImmediateDominator (LoopExitBlock, SCEVCheckBlock);
3200
3192
}
3201
3193
3202
3194
ReplaceInstWithInst (
@@ -3252,11 +3244,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
3252
3244
// Update dominator only if this is first RT check.
3253
3245
if (LoopBypassBlocks.empty ()) {
3254
3246
DT->changeImmediateDominator (Bypass, MemCheckBlock);
3255
- if (!Cost->requiresScalarEpilogue ())
3256
- // If there is an epilogue which must run, there's no edge from the
3257
- // middle block to exit blocks and thus no need to update the immediate
3258
- // dominator of the exit blocks.
3259
- DT->changeImmediateDominator (LoopExitBlock, MemCheckBlock);
3247
+ DT->changeImmediateDominator (LoopExitBlock, MemCheckBlock);
3260
3248
}
3261
3249
3262
3250
Instruction *FirstCheckInst;
@@ -3381,10 +3369,9 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
3381
3369
Loop *InnerLoopVectorizer::createVectorLoopSkeleton (StringRef Prefix) {
3382
3370
LoopScalarBody = OrigLoop->getHeader ();
3383
3371
LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
3372
+ LoopExitBlock = OrigLoop->getUniqueExitBlock ();
3373
+ assert (LoopExitBlock && " Must have an exit block" );
3384
3374
assert (LoopVectorPreHeader && " Invalid loop structure" );
3385
- LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
3386
- assert ((LoopExitBlock || Cost->requiresScalarEpilogue ()) &&
3387
- " multiple exit loop without required epilogue?" );
3388
3375
3389
3376
LoopMiddleBlock =
3390
3377
SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -3393,20 +3380,12 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
3393
3380
SplitBlock (LoopMiddleBlock, LoopMiddleBlock->getTerminator (), DT, LI,
3394
3381
nullptr , Twine (Prefix) + " scalar.ph" );
3395
3382
3383
+ // Set up branch from middle block to the exit and scalar preheader blocks.
3384
+ // completeLoopSkeleton will update the condition to use an iteration check,
3385
+ // if required to decide whether to execute the remainder.
3386
+ BranchInst *BrInst =
3387
+ BranchInst::Create (LoopExitBlock, LoopScalarPreHeader, Builder.getTrue ());
3396
3388
auto *ScalarLatchTerm = OrigLoop->getLoopLatch ()->getTerminator ();
3397
-
3398
- // Set up the middle block terminator. Two cases:
3399
- // 1) If we know that we must execute the scalar epilogue, emit an
3400
- // unconditional branch.
3401
- // 2) Otherwise, we must have a single unique exit block (due to how we
3402
- // implement the multiple exit case). In this case, set up a conditonal
3403
- // branch from the middle block to the loop scalar preheader, and the
3404
- // exit block. completeLoopSkeleton will update the condition to use an
3405
- // iteration check, if required to decide whether to execute the remainder.
3406
- BranchInst *BrInst = Cost->requiresScalarEpilogue () ?
3407
- BranchInst::Create (LoopScalarPreHeader) :
3408
- BranchInst::Create (LoopExitBlock, LoopScalarPreHeader,
3409
- Builder.getTrue ());
3410
3389
BrInst->setDebugLoc (ScalarLatchTerm->getDebugLoc ());
3411
3390
ReplaceInstWithInst (LoopMiddleBlock->getTerminator (), BrInst);
3412
3391
@@ -3418,11 +3397,7 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
3418
3397
nullptr , nullptr , Twine (Prefix) + " vector.body" );
3419
3398
3420
3399
// Update dominator for loop exit.
3421
- if (!Cost->requiresScalarEpilogue ())
3422
- // If there is an epilogue which must run, there's no edge from the
3423
- // middle block to exit blocks and thus no need to update the immediate
3424
- // dominator of the exit blocks.
3425
- DT->changeImmediateDominator (LoopExitBlock, LoopMiddleBlock);
3400
+ DT->changeImmediateDominator (LoopExitBlock, LoopMiddleBlock);
3426
3401
3427
3402
// Create and register the new vector loop.
3428
3403
Loop *Lp = LI->AllocateLoop ();
@@ -3519,14 +3494,10 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
3519
3494
auto *ScalarLatchTerm = OrigLoop->getLoopLatch ()->getTerminator ();
3520
3495
3521
3496
// Add a check in the middle block to see if we have completed
3522
- // all of the iterations in the first vector loop. Three cases:
3523
- // 1) If we require a scalar epilogue, there is no conditional branch as
3524
- // we unconditionally branch to the scalar preheader. Do nothing.
3525
- // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
3526
- // Thus if tail is to be folded, we know we don't need to run the
3527
- // remainder and we can use the previous value for the condition (true).
3528
- // 3) Otherwise, construct a runtime check.
3529
- if (!Cost->requiresScalarEpilogue () && !Cost->foldTailByMasking ()) {
3497
+ // all of the iterations in the first vector loop.
3498
+ // If (N - N%VF) == N, then we *don't* need to run the remainder.
3499
+ // If tail is to be folded, we know we don't need to run the remainder.
3500
+ if (!Cost->foldTailByMasking ()) {
3530
3501
Instruction *CmpN = CmpInst::Create (Instruction::ICmp, CmpInst::ICMP_EQ,
3531
3502
Count, VectorTripCount, " cmp.n" ,
3532
3503
LoopMiddleBlock->getTerminator ());
@@ -3590,17 +3561,17 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
3590
3561
| [ ]_| <-- vector loop.
3591
3562
| |
3592
3563
| v
3593
- \ -[ ] <--- middle-block.
3594
- \/ |
3595
- /\ v
3596
- | - >[ ] <--- new preheader.
3564
+ | -[ ] <--- middle-block.
3565
+ | / |
3566
+ | / v
3567
+ -|- >[ ] <--- new preheader.
3597
3568
| |
3598
- (opt) v <-- edge from middle to exit iff epilogue is not required.
3569
+ | v
3599
3570
| [ ] \
3600
- | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue) .
3571
+ | [ ]_| <-- old scalar loop to handle remainder.
3601
3572
\ |
3602
3573
\ v
3603
- >[ ] <-- exit block(s) .
3574
+ >[ ] <-- exit block.
3604
3575
...
3605
3576
*/
3606
3577
@@ -4021,18 +3992,13 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
4021
3992
// Forget the original basic block.
4022
3993
PSE.getSE ()->forgetLoop (OrigLoop);
4023
3994
4024
- // If we inserted an edge from the middle block to the unique exit block,
4025
- // update uses outside the loop (phis) to account for the newly inserted
4026
- // edge.
4027
- if (!Cost->requiresScalarEpilogue ()) {
4028
- // Fix-up external users of the induction variables.
4029
- for (auto &Entry : Legal->getInductionVars ())
4030
- fixupIVUsers (Entry.first , Entry.second ,
4031
- getOrCreateVectorTripCount (LI->getLoopFor (LoopVectorBody)),
4032
- IVEndValues[Entry.first ], LoopMiddleBlock);
3995
+ // Fix-up external users of the induction variables.
3996
+ for (auto &Entry : Legal->getInductionVars ())
3997
+ fixupIVUsers (Entry.first , Entry.second ,
3998
+ getOrCreateVectorTripCount (LI->getLoopFor (LoopVectorBody)),
3999
+ IVEndValues[Entry.first ], LoopMiddleBlock);
4033
4000
4034
- fixLCSSAPHIs ();
4035
- }
4001
+ fixLCSSAPHIs ();
4036
4002
for (Instruction *PI : PredicatedInstructions)
4037
4003
sinkScalarOperands (&*PI);
4038
4004
@@ -4250,13 +4216,12 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4250
4216
// recurrence in the exit block, and then add an edge for the middle block.
4251
4217
// Note that LCSSA does not imply single entry when the original scalar loop
4252
4218
// had multiple exiting edges (as we always run the last iteration in the
4253
- // scalar epilogue); in that case, there is no edge from middle to exit and
4254
- // and thus no phis which needed updated.
4255
- if (!Cost->requiresScalarEpilogue ())
4256
- for (PHINode &LCSSAPhi : LoopExitBlock->phis ())
4257
- if (any_of (LCSSAPhi.incoming_values (),
4258
- [Phi](Value *V) { return V == Phi; }))
4259
- LCSSAPhi.addIncoming (ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
4219
+ // scalar epilogue); in that case, the exiting path through middle will be
4220
+ // dynamically dead and the value picked for the phi doesn't matter.
4221
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis ())
4222
+ if (any_of (LCSSAPhi.incoming_values (),
4223
+ [Phi](Value *V) { return V == Phi; }))
4224
+ LCSSAPhi.addIncoming (ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
4260
4225
}
4261
4226
4262
4227
void InnerLoopVectorizer::fixReduction (PHINode *Phi) {
@@ -4421,11 +4386,10 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4421
4386
// We know that the loop is in LCSSA form. We need to update the PHI nodes
4422
4387
// in the exit blocks. See comment on analogous loop in
4423
4388
// fixFirstOrderRecurrence for a more complete explaination of the logic.
4424
- if (!Cost->requiresScalarEpilogue ())
4425
- for (PHINode &LCSSAPhi : LoopExitBlock->phis ())
4426
- if (any_of (LCSSAPhi.incoming_values (),
4427
- [LoopExitInst](Value *V) { return V == LoopExitInst; }))
4428
- LCSSAPhi.addIncoming (ReducedPartRdx, LoopMiddleBlock);
4389
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis ())
4390
+ if (any_of (LCSSAPhi.incoming_values (),
4391
+ [LoopExitInst](Value *V) { return V == LoopExitInst; }))
4392
+ LCSSAPhi.addIncoming (ReducedPartRdx, LoopMiddleBlock);
4429
4393
4430
4394
// Fix the scalar loop reduction variable with the incoming reduction sum
4431
4395
// from the vector body and from the backedge value.
@@ -8074,11 +8038,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
8074
8038
8075
8039
// Update dominator for Bypass & LoopExit.
8076
8040
DT->changeImmediateDominator (Bypass, TCCheckBlock);
8077
- if (!Cost->requiresScalarEpilogue ())
8078
- // For loops with multiple exits, there's no edge from the middle block
8079
- // to exit blocks (as the epilogue must run) and thus no need to update
8080
- // the immediate dominator of the exit blocks.
8081
- DT->changeImmediateDominator (LoopExitBlock, TCCheckBlock);
8041
+ DT->changeImmediateDominator (LoopExitBlock, TCCheckBlock);
8082
8042
8083
8043
LoopBypassBlocks.push_back (TCCheckBlock);
8084
8044
@@ -8142,12 +8102,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
8142
8102
8143
8103
DT->changeImmediateDominator (LoopScalarPreHeader,
8144
8104
EPI.EpilogueIterationCountCheck );
8145
- if (!Cost->requiresScalarEpilogue ())
8146
- // If there is an epilogue which must run, there's no edge from the
8147
- // middle block to exit blocks and thus no need to update the immediate
8148
- // dominator of the exit blocks.
8149
- DT->changeImmediateDominator (LoopExitBlock,
8150
- EPI.EpilogueIterationCountCheck );
8105
+ DT->changeImmediateDominator (LoopExitBlock, EPI.EpilogueIterationCountCheck );
8151
8106
8152
8107
// Keep track of bypass blocks, as they feed start values to the induction
8153
8108
// phis in the scalar loop preheader.
0 commit comments