@@ -494,7 +494,7 @@ class InnerLoopVectorizer {
494
494
bool InvariantCond, VPTransformState &State);
495
495
496
496
// / Fix the vectorized code, taking care of header phi's, live-outs, and more.
497
- void fixVectorizedLoop ();
497
+ void fixVectorizedLoop (VPTransformState &State );
498
498
499
499
// Return true if any runtime check is added.
500
500
bool areSafetyChecksAdded () { return AddedSafetyChecks; }
@@ -559,6 +559,10 @@ class InnerLoopVectorizer {
559
559
VectorLoopValueMap.setVectorValue (Scalar, Part, Vector);
560
560
}
561
561
562
+ void resetVectorValue (Value *Scalar, unsigned Part, Value *Vector) {
563
+ VectorLoopValueMap.resetVectorValue (Scalar, Part, Vector);
564
+ }
565
+
562
566
void setScalarValue (Value *Scalar, const VPIteration &Instance, Value *V) {
563
567
VectorLoopValueMap.setScalarValue (Scalar, Instance, V);
564
568
}
@@ -598,7 +602,7 @@ class InnerLoopVectorizer {
598
602
void setDebugLocFromInst (IRBuilder<> &B, const Value *Ptr);
599
603
600
604
// / Fix the non-induction PHIs in the OrigPHIsToFix vector.
601
- void fixNonInductionPHIs (void );
605
+ void fixNonInductionPHIs (VPTransformState &State );
602
606
603
607
// / Create a broadcast instruction. This method generates a broadcast
604
608
// / instruction (shuffle) for loop invariant values and for the induction
@@ -629,15 +633,15 @@ class InnerLoopVectorizer {
629
633
Value *Step, Instruction *DL);
630
634
631
635
// / Handle all cross-iteration phis in the header.
632
- void fixCrossIterationPHIs ();
636
+ void fixCrossIterationPHIs (VPTransformState &State );
633
637
634
638
// / Fix a first-order recurrence. This is the second phase of vectorizing
635
639
// / this phi node.
636
- void fixFirstOrderRecurrence (PHINode *Phi);
640
+ void fixFirstOrderRecurrence (PHINode *Phi, VPTransformState &State );
637
641
638
642
// / Fix a reduction cross-iteration phi. This is the second phase of
639
643
// / vectorizing this phi node.
640
- void fixReduction (PHINode *Phi);
644
+ void fixReduction (PHINode *Phi, VPTransformState &State );
641
645
642
646
// / Clear NSW/NUW flags from reduction instructions if necessary.
643
647
void clearReductionWrapFlags (RecurrenceDescriptor &RdxDesc);
@@ -647,7 +651,7 @@ class InnerLoopVectorizer {
647
651
// / block as exiting edges from the scalar epilogue loop (if present) are
648
652
// / already in place, and we exit the vector loop exclusively to the middle
649
653
// / block.
650
- void fixLCSSAPHIs ();
654
+ void fixLCSSAPHIs (VPTransformState &State );
651
655
652
656
// / Iteratively sink the scalarized operands of a predicated instruction into
653
657
// / the block that was created for it.
@@ -3970,7 +3974,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
3970
3974
}
3971
3975
}
3972
3976
3973
- void InnerLoopVectorizer::fixVectorizedLoop () {
3977
+ void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State ) {
3974
3978
// Insert truncates and extends for any truncated instructions as hints to
3975
3979
// InstCombine.
3976
3980
if (VF.isVector ())
@@ -3980,14 +3984,14 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
3980
3984
if (OrigPHIsToFix.size ()) {
3981
3985
assert (EnableVPlanNativePath &&
3982
3986
" Unexpected non-induction PHIs for fixup in non VPlan-native path" );
3983
- fixNonInductionPHIs ();
3987
+ fixNonInductionPHIs (State );
3984
3988
}
3985
3989
3986
3990
// At this point every instruction in the original loop is widened to a
3987
3991
// vector form. Now we need to fix the recurrences in the loop. These PHI
3988
3992
// nodes are currently empty because we did not want to introduce cycles.
3989
3993
// This is the second stage of vectorizing recurrences.
3990
- fixCrossIterationPHIs ();
3994
+ fixCrossIterationPHIs (State );
3991
3995
3992
3996
// Forget the original basic block.
3993
3997
PSE.getSE ()->forgetLoop (OrigLoop);
@@ -3998,7 +4002,7 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
3998
4002
getOrCreateVectorTripCount (LI->getLoopFor (LoopVectorBody)),
3999
4003
IVEndValues[Entry.first ], LoopMiddleBlock);
4000
4004
4001
- fixLCSSAPHIs ();
4005
+ fixLCSSAPHIs (State );
4002
4006
for (Instruction *PI : PredicatedInstructions)
4003
4007
sinkScalarOperands (&*PI);
4004
4008
@@ -4023,7 +4027,7 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
4023
4027
LI->getLoopFor (LoopScalarBody), VF.getKnownMinValue () * UF);
4024
4028
}
4025
4029
4026
- void InnerLoopVectorizer::fixCrossIterationPHIs () {
4030
+ void InnerLoopVectorizer::fixCrossIterationPHIs (VPTransformState &State ) {
4027
4031
// In order to support recurrences we need to be able to vectorize Phi nodes.
4028
4032
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
4029
4033
// stage #2: We now need to fix the recurrences by adding incoming edges to
@@ -4033,13 +4037,14 @@ void InnerLoopVectorizer::fixCrossIterationPHIs() {
4033
4037
for (PHINode &Phi : OrigLoop->getHeader ()->phis ()) {
4034
4038
// Handle first-order recurrences and reductions that need to be fixed.
4035
4039
if (Legal->isFirstOrderRecurrence (&Phi))
4036
- fixFirstOrderRecurrence (&Phi);
4040
+ fixFirstOrderRecurrence (&Phi, State );
4037
4041
else if (Legal->isReductionVariable (&Phi))
4038
- fixReduction (&Phi);
4042
+ fixReduction (&Phi, State );
4039
4043
}
4040
4044
}
4041
4045
4042
- void InnerLoopVectorizer::fixFirstOrderRecurrence (PHINode *Phi) {
4046
+ void InnerLoopVectorizer::fixFirstOrderRecurrence (PHINode *Phi,
4047
+ VPTransformState &State) {
4043
4048
// This is the second phase of vectorizing first-order recurrences. An
4044
4049
// overview of the transformation is described below. Suppose we have the
4045
4050
// following loop.
@@ -4107,10 +4112,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4107
4112
Builder.getInt32 (VF.getKnownMinValue () - 1 ), " vector.recur.init" );
4108
4113
}
4109
4114
4115
+ VPValue *PhiDef = State.Plan ->getVPValue (Phi);
4116
+ VPValue *PreviousDef = State.Plan ->getVPValue (Previous);
4110
4117
// We constructed a temporary phi node in the first phase of vectorization.
4111
4118
// This phi node will eventually be deleted.
4112
- Builder.SetInsertPoint (
4113
- cast<Instruction>(VectorLoopValueMap.getVectorValue (Phi, 0 )));
4119
+ Builder.SetInsertPoint (cast<Instruction>(State.get (PhiDef, 0 )));
4114
4120
4115
4121
// Create a phi node for the new recurrence. The current value will either be
4116
4122
// the initial value inserted into a vector or loop-varying vector value.
@@ -4119,7 +4125,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4119
4125
4120
4126
// Get the vectorized previous value of the last part UF - 1. It appears last
4121
4127
// among all unrolled iterations, due to the order of their construction.
4122
- Value *PreviousLastPart = getOrCreateVectorValue (Previous , UF - 1 );
4128
+ Value *PreviousLastPart = State. get (PreviousDef , UF - 1 );
4123
4129
4124
4130
// Find and set the insertion point after the previous value if it is an
4125
4131
// instruction.
@@ -4157,15 +4163,15 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4157
4163
4158
4164
// Shuffle the current and previous vector and update the vector parts.
4159
4165
for (unsigned Part = 0 ; Part < UF; ++Part) {
4160
- Value *PreviousPart = getOrCreateVectorValue (Previous , Part);
4161
- Value *PhiPart = VectorLoopValueMap. getVectorValue (Phi , Part);
4166
+ Value *PreviousPart = State. get (PreviousDef , Part);
4167
+ Value *PhiPart = State. get (PhiDef , Part);
4162
4168
auto *Shuffle =
4163
4169
VF.isVector ()
4164
4170
? Builder.CreateShuffleVector (Incoming, PreviousPart, ShuffleMask)
4165
4171
: Incoming;
4166
4172
PhiPart->replaceAllUsesWith (Shuffle);
4167
4173
cast<Instruction>(PhiPart)->eraseFromParent ();
4168
- VectorLoopValueMap. resetVectorValue (Phi, Part , Shuffle);
4174
+ State. reset (PhiDef, Phi , Shuffle, Part );
4169
4175
Incoming = PreviousPart;
4170
4176
}
4171
4177
@@ -4196,7 +4202,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4196
4202
// `Incoming`. This is analogous to the vectorized case above: extracting the
4197
4203
// second last element when VF > 1.
4198
4204
else if (UF > 1 )
4199
- ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue (Previous , UF - 2 );
4205
+ ExtractForPhiUsedOutsideLoop = State. get (PreviousDef , UF - 2 );
4200
4206
4201
4207
// Fix the initial value of the original recurrence in the scalar loop.
4202
4208
Builder.SetInsertPoint (&*LoopScalarPreHeader->begin ());
@@ -4224,7 +4230,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4224
4230
LCSSAPhi.addIncoming (ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
4225
4231
}
4226
4232
4227
- void InnerLoopVectorizer::fixReduction (PHINode *Phi) {
4233
+ void InnerLoopVectorizer::fixReduction (PHINode *Phi, VPTransformState &State ) {
4228
4234
// Get it's reduction variable descriptor.
4229
4235
assert (Legal->isReductionVariable (Phi) &&
4230
4236
" Unable to find the reduction variable" );
@@ -4236,8 +4242,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4236
4242
setDebugLocFromInst (Builder, ReductionStartValue);
4237
4243
bool IsInLoopReductionPhi = Cost->isInLoopReduction (Phi);
4238
4244
4245
+ VPValue *LoopExitInstDef = State.Plan ->getVPValue (LoopExitInst);
4239
4246
// This is the vector-clone of the value that leaves the loop.
4240
- Type *VecTy = getOrCreateVectorValue (LoopExitInst , 0 )->getType ();
4247
+ Type *VecTy = State. get (LoopExitInstDef , 0 )->getType ();
4241
4248
4242
4249
// Wrap flags are in general invalid after vectorization, clear them.
4243
4250
clearReductionWrapFlags (RdxDesc);
@@ -4250,8 +4257,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4250
4257
Value *LoopVal = Phi->getIncomingValueForBlock (Latch);
4251
4258
4252
4259
for (unsigned Part = 0 ; Part < UF; ++Part) {
4253
- Value *VecRdxPhi = getOrCreateVectorValue ( Phi, Part);
4254
- Value *Val = getOrCreateVectorValue ( LoopVal, Part);
4260
+ Value *VecRdxPhi = State. get (State. Plan -> getVPValue ( Phi) , Part);
4261
+ Value *Val = State. get (State. Plan -> getVPValue ( LoopVal) , Part);
4255
4262
cast<PHINode>(VecRdxPhi)
4256
4263
->addIncoming (Val, LI->getLoopFor (LoopVectorBody)->getLoopLatch ());
4257
4264
}
@@ -4270,8 +4277,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4270
4277
// be predicated, and does not need to be handled here.
4271
4278
if (Cost->foldTailByMasking () && !IsInLoopReductionPhi) {
4272
4279
for (unsigned Part = 0 ; Part < UF; ++Part) {
4273
- Value *VecLoopExitInst =
4274
- VectorLoopValueMap.getVectorValue (LoopExitInst, Part);
4280
+ Value *VecLoopExitInst = State.get (LoopExitInstDef, Part);
4275
4281
Value *Sel = nullptr ;
4276
4282
for (User *U : VecLoopExitInst->users ()) {
4277
4283
if (isa<SelectInst>(U)) {
@@ -4281,7 +4287,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4281
4287
assert (isa<PHINode>(U) && " Reduction exit must feed Phi's or select" );
4282
4288
}
4283
4289
assert (Sel && " Reduction exit feeds no select" );
4284
- VectorLoopValueMap. resetVectorValue (LoopExitInst, Part , Sel);
4290
+ State. reset (LoopExitInstDef, LoopExitInst , Sel, Part );
4285
4291
4286
4292
// If the target can create a predicated operator for the reduction at no
4287
4293
// extra cost in the loop (for example a predicated vadd), it can be
@@ -4293,7 +4299,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4293
4299
TTI->preferPredicatedReductionSelect (
4294
4300
RdxDesc.getOpcode (), Phi->getType (),
4295
4301
TargetTransformInfo::ReductionFlags ())) {
4296
- auto *VecRdxPhi = cast<PHINode>(getOrCreateVectorValue (Phi, Part));
4302
+ auto *VecRdxPhi =
4303
+ cast<PHINode>(State.get (State.Plan ->getVPValue (Phi), Part));
4297
4304
VecRdxPhi->setIncomingValueForBlock (
4298
4305
LI->getLoopFor (LoopVectorBody)->getLoopLatch (), Sel);
4299
4306
}
@@ -4311,7 +4318,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4311
4318
LI->getLoopFor (LoopVectorBody)->getLoopLatch ()->getTerminator ());
4312
4319
VectorParts RdxParts (UF);
4313
4320
for (unsigned Part = 0 ; Part < UF; ++Part) {
4314
- RdxParts[Part] = VectorLoopValueMap. getVectorValue (LoopExitInst , Part);
4321
+ RdxParts[Part] = State. get (LoopExitInstDef , Part);
4315
4322
Value *Trunc = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
4316
4323
Value *Extnd = RdxDesc.isSigned () ? Builder.CreateSExt (Trunc, VecTy)
4317
4324
: Builder.CreateZExt (Trunc, VecTy);
@@ -4327,12 +4334,12 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4327
4334
Builder.SetInsertPoint (&*LoopMiddleBlock->getFirstInsertionPt ());
4328
4335
for (unsigned Part = 0 ; Part < UF; ++Part) {
4329
4336
RdxParts[Part] = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
4330
- VectorLoopValueMap. resetVectorValue (LoopExitInst, Part , RdxParts[Part]);
4337
+ State. reset (LoopExitInstDef, LoopExitInst , RdxParts[Part], Part );
4331
4338
}
4332
4339
}
4333
4340
4334
4341
// Reduce all of the unrolled parts into a single vector.
4335
- Value *ReducedPartRdx = VectorLoopValueMap. getVectorValue (LoopExitInst , 0 );
4342
+ Value *ReducedPartRdx = State. get (LoopExitInstDef , 0 );
4336
4343
unsigned Op = RecurrenceDescriptor::getOpcode (RK);
4337
4344
4338
4345
// The middle block terminator has already been assigned a DebugLoc here (the
@@ -4348,7 +4355,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4348
4355
IRBuilderBase::FastMathFlagGuard FMFG (Builder);
4349
4356
Builder.setFastMathFlags (RdxDesc.getFastMathFlags ());
4350
4357
for (unsigned Part = 1 ; Part < UF; ++Part) {
4351
- Value *RdxPart = VectorLoopValueMap. getVectorValue (LoopExitInst , Part);
4358
+ Value *RdxPart = State. get (LoopExitInstDef , Part);
4352
4359
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
4353
4360
ReducedPartRdx = Builder.CreateBinOp (
4354
4361
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, " bin.rdx" );
@@ -4432,7 +4439,7 @@ void InnerLoopVectorizer::clearReductionWrapFlags(
4432
4439
}
4433
4440
}
4434
4441
4435
- void InnerLoopVectorizer::fixLCSSAPHIs () {
4442
+ void InnerLoopVectorizer::fixLCSSAPHIs (VPTransformState &State ) {
4436
4443
for (PHINode &LCSSAPhi : LoopExitBlock->phis ()) {
4437
4444
if (LCSSAPhi.getBasicBlockIndex (LoopMiddleBlock) != -1 )
4438
4445
// Some phis were already hand updated by the reduction and recurrence
@@ -4453,7 +4460,10 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
4453
4460
// extracted from the vectorized loop.
4454
4461
Builder.SetInsertPoint (LoopMiddleBlock->getTerminator ());
4455
4462
Value *lastIncomingValue =
4456
- getOrCreateScalarValue (IncomingValue, VPIteration (UF - 1 , LastLane));
4463
+ OrigLoop->isLoopInvariant (IncomingValue)
4464
+ ? IncomingValue
4465
+ : State.get (State.Plan ->getVPValue (IncomingValue),
4466
+ VPIteration (UF - 1 , LastLane));
4457
4467
LCSSAPhi.addIncoming (lastIncomingValue, LoopMiddleBlock);
4458
4468
}
4459
4469
}
@@ -4522,10 +4532,10 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
4522
4532
} while (Changed);
4523
4533
}
4524
4534
4525
- void InnerLoopVectorizer::fixNonInductionPHIs () {
4535
+ void InnerLoopVectorizer::fixNonInductionPHIs (VPTransformState &State ) {
4526
4536
for (PHINode *OrigPhi : OrigPHIsToFix) {
4527
4537
PHINode *NewPhi =
4528
- cast<PHINode>(VectorLoopValueMap. getVectorValue ( OrigPhi, 0 ));
4538
+ cast<PHINode>(State. get (State. Plan -> getVPValue ( OrigPhi) , 0 ));
4529
4539
unsigned NumIncomingValues = OrigPhi->getNumIncomingValues ();
4530
4540
4531
4541
SmallVector<BasicBlock *, 2 > ScalarBBPredecessors (
@@ -7777,14 +7787,12 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
7777
7787
VPCallbackILV CallbackILV (ILV);
7778
7788
7779
7789
assert (BestVF.hasValue () && " Vectorization Factor is missing" );
7790
+ assert (VPlans.size () == 1 && " Not a single VPlan to execute." );
7780
7791
7781
- VPTransformState State{*BestVF,
7782
- BestUF,
7783
- LI,
7784
- DT,
7785
- ILV.Builder ,
7786
- ILV.VectorLoopValueMap ,
7787
- &ILV,
7792
+ VPTransformState State{*BestVF, BestUF,
7793
+ LI, DT,
7794
+ ILV.Builder , ILV.VectorLoopValueMap ,
7795
+ &ILV, VPlans.front ().get (),
7788
7796
CallbackILV};
7789
7797
State.CFG .PrevBB = ILV.createVectorizedLoopSkeleton ();
7790
7798
State.TripCount = ILV.getOrCreateTripCount (nullptr );
@@ -7801,12 +7809,11 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
7801
7809
// ===------------------------------------------------===//
7802
7810
7803
7811
// 2. Copy and widen instructions from the old loop into the new loop.
7804
- assert (VPlans.size () == 1 && " Not a single VPlan to execute." );
7805
7812
VPlans.front ()->execute (&State);
7806
7813
7807
7814
// 3. Fix the vectorized code: take care of header phi's, live-outs,
7808
7815
// predication, updating analyses.
7809
- ILV.fixVectorizedLoop ();
7816
+ ILV.fixVectorizedLoop (State );
7810
7817
7811
7818
ILV.printDebugTracesAtEnd ();
7812
7819
}
@@ -9288,6 +9295,12 @@ void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
9288
9295
ILV->setVectorValue (IRDef, Part, V);
9289
9296
}
9290
9297
9298
+ void VPTransformState::reset (VPValue *Def, Value *IRDef, Value *V,
9299
+ unsigned Part) {
9300
+ set (Def, V, Part);
9301
+ ILV->resetVectorValue (IRDef, Part, V);
9302
+ }
9303
+
9291
9304
Value *VPTransformState::get (VPValue *Def, unsigned Part) {
9292
9305
// If Values have been set for this Def return the one relevant for \p Part.
9293
9306
if (hasVectorValue (Def, Part))
0 commit comments