Skip to content

Commit d0e7dae

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent fbec1c2 commit d0e7dae

File tree

8 files changed

+53
-132
lines changed

8 files changed

+53
-132
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2952,10 +2952,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29522952
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29532953
}
29542954

2955-
// Fix live-out phis not already fixed earlier.
2956-
for (const auto &KV : Plan.getLiveOuts())
2957-
KV.second->fixPhi(Plan, State);
2958-
29592955
for (Instruction *PI : PredicatedInstructions)
29602956
sinkScalarOperands(&*PI);
29612957

@@ -8818,7 +8814,14 @@ static void addLiveOutsForFirstOrderRecurrences(
88188814
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
88198815
"scalar.recur.init");
88208816
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8821-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8817+
for (VPRecipeBase &R :
8818+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8819+
auto *IRI = cast<VPIRInstruction>(&R);
8820+
if (&IRI->getInstruction() == FORPhi) {
8821+
IRI->addOperand(ResumePhiRecipe);
8822+
break;
8823+
}
8824+
}
88228825

88238826
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
88248827
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
456456
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457457
executeRecipes(State, getIRBasicBlock());
458458
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
459+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
460+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
461+
getIRBasicBlock()->getSingleSuccessor()) {
462+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
463+
->setOperand(0, nullptr);
464+
} else {
465+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
466+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
467+
Br->setOperand(0, nullptr);
468+
getIRBasicBlock()->getTerminator()->eraseFromParent();
469+
}
463470
}
464471

465472
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
843850
#endif
844851

845852
VPlan::~VPlan() {
846-
for (auto &KV : LiveOuts)
847-
delete KV.second;
848-
LiveOuts.clear();
849-
850853
if (Entry) {
851854
VPValue DummyValue;
852855
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -901,6 +904,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
901904
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
902905

903906
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
907+
VPBasicBlock *ScalarHeader = createVPIRBasicBlockFor(TheLoop->getHeader());
908+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
904909
if (!RequiresScalarEpilogueCheck) {
905910
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
906911
return Plan;
@@ -1050,6 +1055,8 @@ void VPlan::execute(VPTransformState *State) {
10501055
BrInst->insertBefore(MiddleBB->getTerminator());
10511056
MiddleBB->getTerminator()->eraseFromParent();
10521057
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1058+
State->CFG.DTU.applyUpdates(
1059+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10531060

10541061
// Generate code in the loop pre-header and body.
10551062
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1168,12 +1175,6 @@ void VPlan::print(raw_ostream &O) const {
11681175
Block->print(O, "", SlotTracker);
11691176
}
11701177

1171-
if (!LiveOuts.empty())
1172-
O << "\n";
1173-
for (const auto &KV : LiveOuts) {
1174-
KV.second->print(O, SlotTracker);
1175-
}
1176-
11771178
O << "}\n";
11781179
}
11791180

@@ -1210,11 +1211,6 @@ LLVM_DUMP_METHOD
12101211
void VPlan::dump() const { print(dbgs()); }
12111212
#endif
12121213

1213-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1214-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1215-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1216-
}
1217-
12181214
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12191215
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12201216
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1282,10 +1278,6 @@ VPlan *VPlan::duplicate() {
12821278
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12831279
remapOperands(Entry, NewEntry, Old2NewVPValues);
12841280

1285-
// Clone live-outs.
1286-
for (const auto &[_, LO] : LiveOuts)
1287-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1288-
12891281
// Initialize remaining fields of cloned VPlan.
12901282
NewPlan->VFs = VFs;
12911283
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -655,48 +655,6 @@ class VPBlockBase {
655655
virtual VPBlockBase *clone() = 0;
656656
};
657657

658-
/// A value that is used outside the VPlan. The operand of the user needs to be
659-
/// added to the associated phi node. The incoming block from VPlan is
660-
/// determined by where the VPValue is defined: if it is defined by a recipe
661-
/// outside a region, its parent block is used, otherwise the middle block is
662-
/// used.
663-
class VPLiveOut : public VPUser {
664-
PHINode *Phi;
665-
666-
public:
667-
VPLiveOut(PHINode *Phi, VPValue *Op)
668-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
669-
670-
static inline bool classof(const VPUser *U) {
671-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
672-
}
673-
674-
/// Fix the wrapped phi node. This means adding an incoming value to exit
675-
/// block phi's from the vector loop via middle block (values from scalar loop
676-
/// already reach these phi's), and updating the value to scalar header phi's
677-
/// from the scalar preheader.
678-
void fixPhi(VPlan &Plan, VPTransformState &State);
679-
680-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
681-
bool usesScalars(const VPValue *Op) const override {
682-
assert(is_contained(operands(), Op) &&
683-
"Op must be an operand of the recipe");
684-
return true;
685-
}
686-
687-
PHINode *getPhi() const { return Phi; }
688-
689-
/// Live-outs are marked as only using the first part during the transition
690-
/// to unrolling directly on VPlan.
691-
/// TODO: Remove after unroller transition.
692-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
693-
694-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695-
/// Print the VPLiveOut to \p O.
696-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
697-
#endif
698-
};
699-
700658
/// Struct to hold various analysis needed for cost computations.
701659
struct VPCostContext {
702660
const TargetTransformInfo &TTI;
@@ -3446,11 +3404,6 @@ class VPlan {
34463404
/// definitions are VPValues that hold a pointer to their underlying IR.
34473405
SmallVector<VPValue *, 16> VPLiveInsToFree;
34483406

3449-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3450-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3451-
/// live-outs are fixed via VPLiveOut::fixPhi.
3452-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3453-
34543407
/// Mapping from SCEVs to the VPValues representing their expansions.
34553408
/// NOTE: This mapping is temporary and will be removed once all users have
34563409
/// been modeled in VPlan directly.
@@ -3630,12 +3583,6 @@ class VPlan {
36303583
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
36313584
}
36323585

3633-
void addLiveOut(PHINode *PN, VPValue *V);
3634-
3635-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3636-
return LiveOuts;
3637-
}
3638-
36393586
VPValue *getSCEVExpansion(const SCEV *S) const {
36403587
return SCEVToExpansion.lookup(S);
36413588
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -197,35 +197,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
197197
}
198198
}
199199

200-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
201-
VPValue *ExitValue = getOperand(0);
202-
VPBasicBlock *MiddleVPBB =
203-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
204-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
205-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
206-
// Values leaving the vector loop reach live out phi's in the exiting block
207-
// via middle block.
208-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
209-
? MiddleVPBB
210-
: ExitingVPBB;
211-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
212-
Value *V = State.get(ExitValue, VPLane(0));
213-
if (Phi->getBasicBlockIndex(PredBB) != -1)
214-
Phi->setIncomingValueForBlock(PredBB, V);
215-
else
216-
Phi->addIncoming(V, PredBB);
217-
}
218-
219-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
220-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
221-
O << "Live-out ";
222-
getPhi()->printAsOperand(O);
223-
O << " = ";
224-
getOperand(0)->printAsOperand(O, SlotTracker);
225-
O << "\n";
226-
}
227-
#endif
228-
229200
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
230201
assert(!Parent && "Recipe already in some VPBasicBlock");
231202
assert(InsertPos->getParent() &&
@@ -858,7 +829,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
858829
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
859830
Value *V = State.get(ExitValue, VPLane(Lane));
860831
auto *Phi = cast<PHINode>(&I);
861-
Phi->addIncoming(V, PredBB);
832+
if (Phi->getBasicBlockIndex(PredBB) == -1)
833+
Phi->addIncoming(V, PredBB);
834+
else
835+
Phi->setIncomingValueForBlock(PredBB, V);
862836
}
863837

864838
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
378378
// Don't fold the exit block of the Plan into its single predecessor for
379379
// now.
380380
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
381-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
381+
if (!VPBB->getParent())
382382
continue;
383383
auto *PredVPBB =
384384
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
264264
return;
265265

266266
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
267+
VPValue *Op0, *Op1;
268+
if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
269+
m_VPValue(Op1)))) {
270+
addUniformForAllParts(VPI);
271+
return;
272+
}
273+
267274
if (vputils::onlyFirstPartUsed(VPI)) {
268275
addUniformForAllParts(VPI);
269276
return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
449456
Part++;
450457
}
451458

452-
// Remap the operand of live-outs to the last part.
453-
for (const auto &[_, LO] : Plan.getLiveOuts()) {
454-
VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
455-
LO->setOperand(0, In);
456-
}
457-
458459
VPlanTransforms::removeDeadRecipes(Plan);
459460
}

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
244244
return false;
245245
}
246246

247-
VPBlockBase *MiddleBB =
248-
IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
249-
if (IRBB != IRBB->getPlan()->getPreheader() &&
250-
IRBB->getSinglePredecessor() != MiddleBB) {
251-
errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
252-
"middle-block at the moment!\n";
253-
return false;
254-
}
255247
return true;
256248
}
257249

@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
416408
return false;
417409
}
418410

419-
for (const auto &KV : Plan.getLiveOuts())
420-
if (KV.second->getNumOperands() != 1) {
421-
errs() << "live outs must have a single operand\n";
422-
return false;
423-
}
424-
425411
return true;
426412
}
427413

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
10771077
; CHECK-NEXT: No successors
10781078
; CHECK-EMPTY:
10791079
; CHECK-NEXT: scalar.ph
1080+
; CHECK-NEXT: Successor(s): ir-bb<loop>
1081+
; CHECK-EMPTY:
1082+
; CHECK-NEXT: ir-bb<loop>:
1083+
; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
1084+
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
1085+
; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
1086+
; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
1087+
; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
1088+
; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
1089+
; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
1090+
; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
10801091
; CHECK-NEXT: No successors
10811092
; CHECK-NEXT: }
10821093
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
11561167
; CHECK-NEXT: No successors
11571168
; CHECK-EMPTY:
11581169
; CHECK-NEXT: scalar.ph:
1170+
; CHECK-NEXT: Successor(s): ir-bb<loop.header>
1171+
; CHECK-EMPTY:
1172+
; CHECK-NEXT: ir-bb<loop.header>:
1173+
; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1174+
; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
1175+
; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
1176+
; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
11591177
; CHECK-NEXT: No successors
11601178
; CHECK-NEXT: }
11611179
;

0 commit comments

Comments
 (0)