@@ -467,11 +467,12 @@ class InnerLoopVectorizer {
467
467
ElementCount MinProfitableTripCount,
468
468
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
469
469
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
470
- ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
470
+ ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
471
+ VPlan &Plan)
471
472
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
472
473
AC (AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
473
474
Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
474
- PSI(PSI), RTChecks(RTChecks) {
475
+ PSI(PSI), RTChecks(RTChecks), Plan(Plan) {
475
476
// Query this against the original loop and save it here because the profile
476
477
// of the original loop header may change as the transformation happens.
477
478
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize (
@@ -498,7 +499,7 @@ class InnerLoopVectorizer {
498
499
createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs);
499
500
500
501
// / Fix the vectorized code, taking care of header phi's, live-outs, and more.
501
- void fixVectorizedLoop (VPTransformState &State, VPlan &Plan );
502
+ void fixVectorizedLoop (VPTransformState &State);
502
503
503
504
// Return true if any runtime check is added.
504
505
bool areSafetyChecksAdded () { return AddedSafetyChecks; }
@@ -513,7 +514,7 @@ class InnerLoopVectorizer {
513
514
VPTransformState &State);
514
515
515
516
// / Fix the non-induction PHIs in \p Plan.
516
- void fixNonInductionPHIs (VPlan &Plan, VPTransformState &State);
517
+ void fixNonInductionPHIs (VPTransformState &State);
517
518
518
519
// / Create a new phi node for the induction variable \p OrigPhi to resume
519
520
// / iteration count in the scalar epilogue, from where the vectorized loop
@@ -541,8 +542,7 @@ class InnerLoopVectorizer {
541
542
// / Set up the values of the IVs correctly when exiting the vector loop.
542
543
virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
543
544
Value *VectorTripCount, Value *EndValue,
544
- BasicBlock *MiddleBlock, VPlan &Plan,
545
- VPTransformState &State);
545
+ BasicBlock *MiddleBlock, VPTransformState &State);
546
546
547
547
// / Iteratively sink the scalarized operands of a predicated instruction into
548
548
// / the block that was created for it.
@@ -674,6 +674,8 @@ class InnerLoopVectorizer {
674
674
// / Structure to hold information about generated runtime checks, responsible
675
675
// / for cleaning the checks, if vectorization turns out unprofitable.
676
676
GeneratedRTChecks &RTChecks;
677
+
678
+ VPlan &Plan;
677
679
};
678
680
679
681
// / Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -715,10 +717,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
715
717
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
716
718
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
717
719
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
718
- GeneratedRTChecks &Checks)
720
+ GeneratedRTChecks &Checks, VPlan &Plan )
719
721
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
720
722
EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
721
- CM, BFI, PSI, Checks),
723
+ CM, BFI, PSI, Checks, Plan ),
722
724
EPI (EPI) {}
723
725
724
726
// Override this function to handle the more complex control flow around the
@@ -755,9 +757,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
755
757
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
756
758
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
757
759
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
758
- GeneratedRTChecks &Check)
760
+ GeneratedRTChecks &Check, VPlan &Plan )
759
761
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
760
- EPI, LVL, CM, BFI, PSI, Check) {}
762
+ EPI, LVL, CM, BFI, PSI, Check, Plan ) {}
761
763
// / Implements the interface for creating a vectorized skeleton using the
762
764
// / *main loop* strategy (ie the first pass of vplan execution).
763
765
std::pair<BasicBlock *, Value *>
@@ -773,7 +775,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
773
775
774
776
void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
775
777
Value *VectorTripCount, Value *EndValue,
776
- BasicBlock *MiddleBlock, VPlan &Plan,
778
+ BasicBlock *MiddleBlock,
777
779
VPTransformState &State) override {};
778
780
};
779
781
@@ -789,9 +791,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
789
791
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
790
792
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
791
793
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
792
- GeneratedRTChecks &Checks)
794
+ GeneratedRTChecks &Checks, VPlan &Plan )
793
795
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
794
- EPI, LVL, CM, BFI, PSI, Checks) {
796
+ EPI, LVL, CM, BFI, PSI, Checks, Plan ) {
795
797
TripCount = EPI.TripCount ;
796
798
}
797
799
// / Implements the interface for creating a vectorized skeleton using the
@@ -2751,7 +2753,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
2751
2753
void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2752
2754
const InductionDescriptor &II,
2753
2755
Value *VectorTripCount, Value *EndValue,
2754
- BasicBlock *MiddleBlock, VPlan &Plan,
2756
+ BasicBlock *MiddleBlock,
2755
2757
VPTransformState &State) {
2756
2758
// There are two kinds of external IV usages - those that use the value
2757
2759
// computed in the last iteration (the PHI) and those that use the penultimate
@@ -2931,11 +2933,10 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
2931
2933
TargetTransformInfo::TCK_RecipThroughput);
2932
2934
}
2933
2935
2934
- void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State,
2935
- VPlan &Plan) {
2936
+ void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State) {
2936
2937
// Fix widened non-induction PHIs by setting up the PHI operands.
2937
2938
if (EnableVPlanNativePath)
2938
- fixNonInductionPHIs (Plan, State);
2939
+ fixNonInductionPHIs (State);
2939
2940
2940
2941
// Forget the original basic block.
2941
2942
PSE.getSE ()->forgetLoop (OrigLoop);
@@ -2966,7 +2967,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2966
2967
for (const auto &Entry : Legal->getInductionVars ())
2967
2968
fixupIVUsers (Entry.first , Entry.second ,
2968
2969
getOrCreateVectorTripCount (nullptr ),
2969
- IVEndValues[Entry.first ], LoopMiddleBlock, Plan, State);
2970
+ IVEndValues[Entry.first ], LoopMiddleBlock, State);
2970
2971
}
2971
2972
2972
2973
// Fix live-out phis not already fixed earlier.
@@ -3077,8 +3078,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
3077
3078
} while (Changed);
3078
3079
}
3079
3080
3080
- void InnerLoopVectorizer::fixNonInductionPHIs (VPlan &Plan,
3081
- VPTransformState &State) {
3081
+ void InnerLoopVectorizer::fixNonInductionPHIs (VPTransformState &State) {
3082
3082
auto Iter = vp_depth_first_deep (Plan.getEntry ());
3083
3083
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
3084
3084
for (VPRecipeBase &P : VPBB->phis ()) {
@@ -7744,7 +7744,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7744
7744
7745
7745
// 3. Fix the vectorized code: take care of header phi's, live-outs,
7746
7746
// predication, updating analyses.
7747
- ILV.fixVectorizedLoop (State, BestVPlan );
7747
+ ILV.fixVectorizedLoop (State);
7748
7748
7749
7749
ILV.printDebugTracesAtEnd ();
7750
7750
@@ -9727,7 +9727,7 @@ static bool processLoopInVPlanNativePath(
9727
9727
GeneratedRTChecks Checks (PSE, DT, LI, TTI, F->getDataLayout (),
9728
9728
AddBranchWeights);
9729
9729
InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
9730
- VF.Width , 1 , LVL, &CM, BFI, PSI, Checks);
9730
+ VF.Width , 1 , LVL, &CM, BFI, PSI, Checks, BestPlan );
9731
9731
LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
9732
9732
<< L->getHeader ()->getParent ()->getName () << " \"\n " );
9733
9733
LVP.executePlan (VF.Width , 1 , BestPlan, LB, DT, false );
@@ -10215,11 +10215,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10215
10215
assert (IC > 1 && " interleave count should not be 1 or 0" );
10216
10216
// If we decided that it is not legal to vectorize the loop, then
10217
10217
// interleave it.
10218
+ VPlan &BestPlan = LVP.getPlanFor (VF.Width );
10218
10219
InnerLoopVectorizer Unroller (
10219
10220
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed (1 ),
10220
- ElementCount::getFixed (1 ), IC, &LVL, &CM, BFI, PSI, Checks);
10221
+ ElementCount::getFixed (1 ), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan );
10221
10222
10222
- VPlan &BestPlan = LVP.getPlanFor (VF.Width );
10223
10223
LVP.executePlan (VF.Width , IC, BestPlan, Unroller, DT, false );
10224
10224
10225
10225
ORE->emit ([&]() {
@@ -10236,15 +10236,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10236
10236
VectorizationFactor EpilogueVF =
10237
10237
LVP.selectEpilogueVectorizationFactor (VF.Width , IC);
10238
10238
if (EpilogueVF.Width .isVector ()) {
10239
+ std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
10239
10240
10240
10241
// The first pass vectorizes the main loop and creates a scalar epilogue
10241
10242
// to be vectorized by executing the plan (potentially with a different
10242
10243
// factor) again shortly afterwards.
10243
10244
EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 );
10244
10245
EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TLI, TTI, AC, ORE,
10245
- EPI, &LVL, &CM, BFI, PSI, Checks);
10246
+ EPI, &LVL, &CM, BFI, PSI, Checks,
10247
+ *BestMainPlan);
10246
10248
10247
- std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
10248
10249
auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10249
10250
*BestMainPlan, MainILV, DT, false );
10250
10251
++LoopsVectorized;
@@ -10253,11 +10254,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10253
10254
// edges from the first pass.
10254
10255
EPI.MainLoopVF = EPI.EpilogueVF ;
10255
10256
EPI.MainLoopUF = EPI.EpilogueUF ;
10257
+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10256
10258
EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
10257
10259
ORE, EPI, &LVL, &CM, BFI, PSI,
10258
- Checks);
10260
+ Checks, BestEpiPlan );
10259
10261
10260
- VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10261
10262
VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion ();
10262
10263
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock ();
10263
10264
Header->setName (" vec.epilog.vector.body" );
@@ -10340,7 +10341,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10340
10341
} else {
10341
10342
InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
10342
10343
VF.MinProfitableTripCount , IC, &LVL, &CM, BFI,
10343
- PSI, Checks);
10344
+ PSI, Checks, BestPlan );
10344
10345
LVP.executePlan (VF.Width , IC, BestPlan, LB, DT, false );
10345
10346
++LoopsVectorized;
10346
10347
0 commit comments