Skip to content

Commit 3bb6dc0

Browse files
committed
[LV] Replace some uses of VectorLoopValueMap with VPTransformState (NFC)
This patch updates some places where VectorLoopValueMap is accessed directly to instead go through VPTransformState. As we move towards managing created values exclusively in VPTransformState, this ensures the use always can fetch the correct value. This is in preparation for D92285, which switches to managing scalarized values through VPValues. In the future, the various fix* functions should be moved directly into the VPlan codegen stage. Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D95757
1 parent a14a59f commit 3bb6dc0

File tree

2 files changed

+65
-48
lines changed

2 files changed

+65
-48
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 59 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ class InnerLoopVectorizer {
494494
bool InvariantCond, VPTransformState &State);
495495

496496
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
497-
void fixVectorizedLoop();
497+
void fixVectorizedLoop(VPTransformState &State);
498498

499499
// Return true if any runtime check is added.
500500
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
@@ -559,6 +559,10 @@ class InnerLoopVectorizer {
559559
VectorLoopValueMap.setVectorValue(Scalar, Part, Vector);
560560
}
561561

562+
void resetVectorValue(Value *Scalar, unsigned Part, Value *Vector) {
563+
VectorLoopValueMap.resetVectorValue(Scalar, Part, Vector);
564+
}
565+
562566
void setScalarValue(Value *Scalar, const VPIteration &Instance, Value *V) {
563567
VectorLoopValueMap.setScalarValue(Scalar, Instance, V);
564568
}
@@ -598,7 +602,7 @@ class InnerLoopVectorizer {
598602
void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
599603

600604
/// Fix the non-induction PHIs in the OrigPHIsToFix vector.
601-
void fixNonInductionPHIs(void);
605+
void fixNonInductionPHIs(VPTransformState &State);
602606

603607
/// Create a broadcast instruction. This method generates a broadcast
604608
/// instruction (shuffle) for loop invariant values and for the induction
@@ -629,15 +633,15 @@ class InnerLoopVectorizer {
629633
Value *Step, Instruction *DL);
630634

631635
/// Handle all cross-iteration phis in the header.
632-
void fixCrossIterationPHIs();
636+
void fixCrossIterationPHIs(VPTransformState &State);
633637

634638
/// Fix a first-order recurrence. This is the second phase of vectorizing
635639
/// this phi node.
636-
void fixFirstOrderRecurrence(PHINode *Phi);
640+
void fixFirstOrderRecurrence(PHINode *Phi, VPTransformState &State);
637641

638642
/// Fix a reduction cross-iteration phi. This is the second phase of
639643
/// vectorizing this phi node.
640-
void fixReduction(PHINode *Phi);
644+
void fixReduction(PHINode *Phi, VPTransformState &State);
641645

642646
/// Clear NSW/NUW flags from reduction instructions if necessary.
643647
void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc);
@@ -647,7 +651,7 @@ class InnerLoopVectorizer {
647651
/// block as exiting edges from the scalar epilogue loop (if present) are
648652
/// already in place, and we exit the vector loop exclusively to the middle
649653
/// block.
650-
void fixLCSSAPHIs();
654+
void fixLCSSAPHIs(VPTransformState &State);
651655

652656
/// Iteratively sink the scalarized operands of a predicated instruction into
653657
/// the block that was created for it.
@@ -3970,7 +3974,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
39703974
}
39713975
}
39723976

3973-
void InnerLoopVectorizer::fixVectorizedLoop() {
3977+
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
39743978
// Insert truncates and extends for any truncated instructions as hints to
39753979
// InstCombine.
39763980
if (VF.isVector())
@@ -3980,14 +3984,14 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
39803984
if (OrigPHIsToFix.size()) {
39813985
assert(EnableVPlanNativePath &&
39823986
"Unexpected non-induction PHIs for fixup in non VPlan-native path");
3983-
fixNonInductionPHIs();
3987+
fixNonInductionPHIs(State);
39843988
}
39853989

39863990
// At this point every instruction in the original loop is widened to a
39873991
// vector form. Now we need to fix the recurrences in the loop. These PHI
39883992
// nodes are currently empty because we did not want to introduce cycles.
39893993
// This is the second stage of vectorizing recurrences.
3990-
fixCrossIterationPHIs();
3994+
fixCrossIterationPHIs(State);
39913995

39923996
// Forget the original basic block.
39933997
PSE.getSE()->forgetLoop(OrigLoop);
@@ -3998,7 +4002,7 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
39984002
getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
39994003
IVEndValues[Entry.first], LoopMiddleBlock);
40004004

4001-
fixLCSSAPHIs();
4005+
fixLCSSAPHIs(State);
40024006
for (Instruction *PI : PredicatedInstructions)
40034007
sinkScalarOperands(&*PI);
40044008

@@ -4023,7 +4027,7 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
40234027
LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF);
40244028
}
40254029

4026-
void InnerLoopVectorizer::fixCrossIterationPHIs() {
4030+
void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
40274031
// In order to support recurrences we need to be able to vectorize Phi nodes.
40284032
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
40294033
// stage #2: We now need to fix the recurrences by adding incoming edges to
@@ -4033,13 +4037,14 @@ void InnerLoopVectorizer::fixCrossIterationPHIs() {
40334037
for (PHINode &Phi : OrigLoop->getHeader()->phis()) {
40344038
// Handle first-order recurrences and reductions that need to be fixed.
40354039
if (Legal->isFirstOrderRecurrence(&Phi))
4036-
fixFirstOrderRecurrence(&Phi);
4040+
fixFirstOrderRecurrence(&Phi, State);
40374041
else if (Legal->isReductionVariable(&Phi))
4038-
fixReduction(&Phi);
4042+
fixReduction(&Phi, State);
40394043
}
40404044
}
40414045

4042-
void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
4046+
void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi,
4047+
VPTransformState &State) {
40434048
// This is the second phase of vectorizing first-order recurrences. An
40444049
// overview of the transformation is described below. Suppose we have the
40454050
// following loop.
@@ -4107,10 +4112,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
41074112
Builder.getInt32(VF.getKnownMinValue() - 1), "vector.recur.init");
41084113
}
41094114

4115+
VPValue *PhiDef = State.Plan->getVPValue(Phi);
4116+
VPValue *PreviousDef = State.Plan->getVPValue(Previous);
41104117
// We constructed a temporary phi node in the first phase of vectorization.
41114118
// This phi node will eventually be deleted.
4112-
Builder.SetInsertPoint(
4113-
cast<Instruction>(VectorLoopValueMap.getVectorValue(Phi, 0)));
4119+
Builder.SetInsertPoint(cast<Instruction>(State.get(PhiDef, 0)));
41144120

41154121
// Create a phi node for the new recurrence. The current value will either be
41164122
// the initial value inserted into a vector or loop-varying vector value.
@@ -4119,7 +4125,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
41194125

41204126
// Get the vectorized previous value of the last part UF - 1. It appears last
41214127
// among all unrolled iterations, due to the order of their construction.
4122-
Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1);
4128+
Value *PreviousLastPart = State.get(PreviousDef, UF - 1);
41234129

41244130
// Find and set the insertion point after the previous value if it is an
41254131
// instruction.
@@ -4157,15 +4163,15 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
41574163

41584164
// Shuffle the current and previous vector and update the vector parts.
41594165
for (unsigned Part = 0; Part < UF; ++Part) {
4160-
Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
4161-
Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
4166+
Value *PreviousPart = State.get(PreviousDef, Part);
4167+
Value *PhiPart = State.get(PhiDef, Part);
41624168
auto *Shuffle =
41634169
VF.isVector()
41644170
? Builder.CreateShuffleVector(Incoming, PreviousPart, ShuffleMask)
41654171
: Incoming;
41664172
PhiPart->replaceAllUsesWith(Shuffle);
41674173
cast<Instruction>(PhiPart)->eraseFromParent();
4168-
VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);
4174+
State.reset(PhiDef, Phi, Shuffle, Part);
41694175
Incoming = PreviousPart;
41704176
}
41714177

@@ -4196,7 +4202,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
41964202
// `Incoming`. This is analogous to the vectorized case above: extracting the
41974203
// second last element when VF > 1.
41984204
else if (UF > 1)
4199-
ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2);
4205+
ExtractForPhiUsedOutsideLoop = State.get(PreviousDef, UF - 2);
42004206

42014207
// Fix the initial value of the original recurrence in the scalar loop.
42024208
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
@@ -4224,7 +4230,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
42244230
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
42254231
}
42264232

4227-
void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
4233+
void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
42284234
// Get it's reduction variable descriptor.
42294235
assert(Legal->isReductionVariable(Phi) &&
42304236
"Unable to find the reduction variable");
@@ -4236,8 +4242,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
42364242
setDebugLocFromInst(Builder, ReductionStartValue);
42374243
bool IsInLoopReductionPhi = Cost->isInLoopReduction(Phi);
42384244

4245+
VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst);
42394246
// This is the vector-clone of the value that leaves the loop.
4240-
Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
4247+
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
42414248

42424249
// Wrap flags are in general invalid after vectorization, clear them.
42434250
clearReductionWrapFlags(RdxDesc);
@@ -4250,8 +4257,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
42504257
Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
42514258

42524259
for (unsigned Part = 0; Part < UF; ++Part) {
4253-
Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
4254-
Value *Val = getOrCreateVectorValue(LoopVal, Part);
4260+
Value *VecRdxPhi = State.get(State.Plan->getVPValue(Phi), Part);
4261+
Value *Val = State.get(State.Plan->getVPValue(LoopVal), Part);
42554262
cast<PHINode>(VecRdxPhi)
42564263
->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
42574264
}
@@ -4270,8 +4277,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
42704277
// be predicated, and does not need to be handled here.
42714278
if (Cost->foldTailByMasking() && !IsInLoopReductionPhi) {
42724279
for (unsigned Part = 0; Part < UF; ++Part) {
4273-
Value *VecLoopExitInst =
4274-
VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
4280+
Value *VecLoopExitInst = State.get(LoopExitInstDef, Part);
42754281
Value *Sel = nullptr;
42764282
for (User *U : VecLoopExitInst->users()) {
42774283
if (isa<SelectInst>(U)) {
@@ -4281,7 +4287,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
42814287
assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");
42824288
}
42834289
assert(Sel && "Reduction exit feeds no select");
4284-
VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, Sel);
4290+
State.reset(LoopExitInstDef, LoopExitInst, Sel, Part);
42854291

42864292
// If the target can create a predicated operator for the reduction at no
42874293
// extra cost in the loop (for example a predicated vadd), it can be
@@ -4293,7 +4299,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
42934299
TTI->preferPredicatedReductionSelect(
42944300
RdxDesc.getOpcode(), Phi->getType(),
42954301
TargetTransformInfo::ReductionFlags())) {
4296-
auto *VecRdxPhi = cast<PHINode>(getOrCreateVectorValue(Phi, Part));
4302+
auto *VecRdxPhi =
4303+
cast<PHINode>(State.get(State.Plan->getVPValue(Phi), Part));
42974304
VecRdxPhi->setIncomingValueForBlock(
42984305
LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel);
42994306
}
@@ -4311,7 +4318,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
43114318
LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
43124319
VectorParts RdxParts(UF);
43134320
for (unsigned Part = 0; Part < UF; ++Part) {
4314-
RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
4321+
RdxParts[Part] = State.get(LoopExitInstDef, Part);
43154322
Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
43164323
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
43174324
: Builder.CreateZExt(Trunc, VecTy);
@@ -4327,12 +4334,12 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
43274334
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
43284335
for (unsigned Part = 0; Part < UF; ++Part) {
43294336
RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
4330-
VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]);
4337+
State.reset(LoopExitInstDef, LoopExitInst, RdxParts[Part], Part);
43314338
}
43324339
}
43334340

43344341
// Reduce all of the unrolled parts into a single vector.
4335-
Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
4342+
Value *ReducedPartRdx = State.get(LoopExitInstDef, 0);
43364343
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
43374344

43384345
// The middle block terminator has already been assigned a DebugLoc here (the
@@ -4348,7 +4355,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
43484355
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
43494356
Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
43504357
for (unsigned Part = 1; Part < UF; ++Part) {
4351-
Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
4358+
Value *RdxPart = State.get(LoopExitInstDef, Part);
43524359
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
43534360
ReducedPartRdx = Builder.CreateBinOp(
43544361
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
@@ -4432,7 +4439,7 @@ void InnerLoopVectorizer::clearReductionWrapFlags(
44324439
}
44334440
}
44344441

4435-
void InnerLoopVectorizer::fixLCSSAPHIs() {
4442+
void InnerLoopVectorizer::fixLCSSAPHIs(VPTransformState &State) {
44364443
for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
44374444
if (LCSSAPhi.getBasicBlockIndex(LoopMiddleBlock) != -1)
44384445
// Some phis were already hand updated by the reduction and recurrence
@@ -4453,7 +4460,10 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
44534460
// extracted from the vectorized loop.
44544461
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
44554462
Value *lastIncomingValue =
4456-
getOrCreateScalarValue(IncomingValue, VPIteration(UF - 1, LastLane));
4463+
OrigLoop->isLoopInvariant(IncomingValue)
4464+
? IncomingValue
4465+
: State.get(State.Plan->getVPValue(IncomingValue),
4466+
VPIteration(UF - 1, LastLane));
44574467
LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
44584468
}
44594469
}
@@ -4522,10 +4532,10 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
45224532
} while (Changed);
45234533
}
45244534

4525-
void InnerLoopVectorizer::fixNonInductionPHIs() {
4535+
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
45264536
for (PHINode *OrigPhi : OrigPHIsToFix) {
45274537
PHINode *NewPhi =
4528-
cast<PHINode>(VectorLoopValueMap.getVectorValue(OrigPhi, 0));
4538+
cast<PHINode>(State.get(State.Plan->getVPValue(OrigPhi), 0));
45294539
unsigned NumIncomingValues = OrigPhi->getNumIncomingValues();
45304540

45314541
SmallVector<BasicBlock *, 2> ScalarBBPredecessors(
@@ -7777,14 +7787,12 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
77777787
VPCallbackILV CallbackILV(ILV);
77787788

77797789
assert(BestVF.hasValue() && "Vectorization Factor is missing");
7790+
assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
77807791

7781-
VPTransformState State{*BestVF,
7782-
BestUF,
7783-
LI,
7784-
DT,
7785-
ILV.Builder,
7786-
ILV.VectorLoopValueMap,
7787-
&ILV,
7792+
VPTransformState State{*BestVF, BestUF,
7793+
LI, DT,
7794+
ILV.Builder, ILV.VectorLoopValueMap,
7795+
&ILV, VPlans.front().get(),
77887796
CallbackILV};
77897797
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
77907798
State.TripCount = ILV.getOrCreateTripCount(nullptr);
@@ -7801,12 +7809,11 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
78017809
//===------------------------------------------------===//
78027810

78037811
// 2. Copy and widen instructions from the old loop into the new loop.
7804-
assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
78057812
VPlans.front()->execute(&State);
78067813

78077814
// 3. Fix the vectorized code: take care of header phi's, live-outs,
78087815
// predication, updating analyses.
7809-
ILV.fixVectorizedLoop();
7816+
ILV.fixVectorizedLoop(State);
78107817

78117818
ILV.printDebugTracesAtEnd();
78127819
}
@@ -9288,6 +9295,12 @@ void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
92889295
ILV->setVectorValue(IRDef, Part, V);
92899296
}
92909297

9298+
void VPTransformState::reset(VPValue *Def, Value *IRDef, Value *V,
9299+
unsigned Part) {
9300+
set(Def, V, Part);
9301+
ILV->resetVectorValue(IRDef, Part, V);
9302+
}
9303+
92919304
Value *VPTransformState::get(VPValue *Def, unsigned Part) {
92929305
// If Values have been set for this Def return the one relevant for \p Part.
92939306
if (hasVectorValue(Def, Part))

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,9 @@ struct VPTransformState {
253253
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
254254
DominatorTree *DT, IRBuilder<> &Builder,
255255
VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
256-
VPCallback &Callback)
256+
VPlan *Plan, VPCallback &Callback)
257257
: VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
258-
ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
258+
ValueMap(ValueMap), ILV(ILV), Plan(Plan), Callback(Callback) {}
259259

260260
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
261261
ElementCount VF;
@@ -312,6 +312,7 @@ struct VPTransformState {
312312
Data.PerPartOutput[Def][Part] = V;
313313
}
314314
void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
315+
void reset(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
315316
void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance);
316317

317318
void set(VPValue *Def, Value *V, const VPIteration &Instance) {
@@ -376,6 +377,9 @@ struct VPTransformState {
376377
/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
377378
InnerLoopVectorizer *ILV;
378379

380+
/// Pointer to the VPlan code is generated for.
381+
VPlan *Plan;
382+
379383
VPCallback &Callback;
380384
};
381385

0 commit comments

Comments
 (0)