Skip to content

Commit ec402a2

Browse files
authored
[VPlan] Implement cloning of VPlans. (#73158)
This patch implements cloning for VPlans and recipes. Cloning is used in the epilogue vectorization path, to clone the VPlan for the main vector loop. This means we won't re-use a VPlan when executing the VPlan for the epilogue vector loop, which in turn will enable us to perform optimizations based on UF & VF.
1 parent 3e29e52 commit ec402a2

File tree

4 files changed

+316
-6
lines changed

4 files changed

+316
-6
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10141,7 +10141,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1014110141
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
1014210142
EPI, &LVL, &CM, BFI, PSI, Checks);
1014310143

10144-
VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
10144+
VPlan &BestMainPlan = *LVP.getBestPlanFor(EPI.MainLoopVF).duplicate();
1014510145
const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
1014610146
EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, DT, true);
1014710147
++LoopsVectorized;

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,54 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
615615
}
616616
#endif
617617

618+
static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry);
619+
620+
// Clone the CFG for all nodes in the single-entry-single-exit region reachable
621+
// from \p Entry, this includes cloning the blocks and their recipes. Operands
622+
// of cloned recipes will NOT be updated. Remapping of operands must be done
623+
// separately. Returns a pair with the the new entry and exiting blocks of the
624+
// cloned region.
625+
static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry) {
626+
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
627+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
628+
Entry);
629+
for (VPBlockBase *BB : RPOT) {
630+
VPBlockBase *NewBB = BB->clone();
631+
for (VPBlockBase *Pred : BB->getPredecessors())
632+
VPBlockUtils::connectBlocks(Old2NewVPBlocks[Pred], NewBB);
633+
634+
Old2NewVPBlocks[BB] = NewBB;
635+
}
636+
637+
#if !defined(NDEBUG)
638+
// Verify that the order of predecessors and successors matches in the cloned
639+
// version.
640+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
641+
NewRPOT(Old2NewVPBlocks[Entry]);
642+
for (const auto &[OldBB, NewBB] : zip(RPOT, NewRPOT)) {
643+
for (const auto &[OldPred, NewPred] :
644+
zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
645+
assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
646+
647+
for (const auto &[OldSucc, NewSucc] :
648+
zip(OldBB->successors(), NewBB->successors()))
649+
assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
650+
}
651+
#endif
652+
653+
return std::make_pair(Old2NewVPBlocks[Entry],
654+
Old2NewVPBlocks[*reverse(RPOT).begin()]);
655+
}
656+
657+
VPRegionBlock *VPRegionBlock::clone() {
658+
const auto &[NewEntry, NewExiting] = cloneSESE(getEntry());
659+
auto *NewRegion =
660+
new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
661+
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
662+
Block->setParent(NewRegion);
663+
return NewRegion;
664+
}
665+
618666
void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
619667
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
620668
// Drop all references in VPBasicBlocks and replace all uses with
@@ -983,6 +1031,87 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
9831031
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
9841032
}
9851033

1034+
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1035+
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1036+
// Update the operands of all cloned recipes starting at NewEntry. This
1037+
// traverses all reachable blocks. This is done in two steps, to handle cycles
1038+
// in PHI recipes.
1039+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1040+
OldDeepRPOT(Entry);
1041+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1042+
NewDeepRPOT(NewEntry);
1043+
// First, collect all mappings from old to new VPValues defined by cloned
1044+
// recipes.
1045+
for (const auto &[OldBB, NewBB] :
1046+
zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1047+
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1048+
assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1049+
"blocks must have the same number of recipes");
1050+
for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1051+
assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1052+
"recipes must have the same number of operands");
1053+
assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1054+
"recipes must define the same number of operands");
1055+
for (const auto &[OldV, NewV] :
1056+
zip(OldR.definedValues(), NewR.definedValues()))
1057+
Old2NewVPValues[OldV] = NewV;
1058+
}
1059+
}
1060+
1061+
// Update all operands to use cloned VPValues.
1062+
for (VPBasicBlock *NewBB :
1063+
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1064+
for (VPRecipeBase &NewR : *NewBB)
1065+
for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1066+
VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1067+
NewR.setOperand(I, NewOp);
1068+
}
1069+
}
1070+
}
1071+
1072+
VPlan *VPlan::duplicate() {
1073+
// Clone blocks.
1074+
VPBasicBlock *NewPreheader = Preheader->clone();
1075+
const auto &[NewEntry, __] = cloneSESE(Entry);
1076+
1077+
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1078+
auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
1079+
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1080+
for (VPValue *OldLiveIn : VPLiveInsToFree) {
1081+
VPValue *NewLiveIn = new VPValue(OldLiveIn->getLiveInIRValue());
1082+
NewPlan->VPLiveInsToFree.push_back(NewLiveIn);
1083+
Old2NewVPValues[OldLiveIn] = NewLiveIn;
1084+
}
1085+
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1086+
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1087+
if (BackedgeTakenCount) {
1088+
NewPlan->BackedgeTakenCount = new VPValue();
1089+
Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1090+
}
1091+
assert(TripCount && "trip count must be set");
1092+
if (TripCount->isLiveIn())
1093+
Old2NewVPValues[TripCount] = new VPValue(TripCount->getLiveInIRValue());
1094+
// else NewTripCount will be created and inserted into Old2NewVPValues when
1095+
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
1096+
1097+
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
1098+
remapOperands(Entry, NewEntry, Old2NewVPValues);
1099+
1100+
// Clone live-outs.
1101+
for (const auto &[_, LO] : LiveOuts)
1102+
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1103+
1104+
// Initialize remaining fields of cloned VPlan.
1105+
NewPlan->VFs = VFs;
1106+
NewPlan->UFs = UFs;
1107+
// TODO: Adjust names.
1108+
NewPlan->Name = Name;
1109+
assert(Old2NewVPValues.contains(TripCount) &&
1110+
"TripCount must have been added to Old2NewVPValues");
1111+
NewPlan->TripCount = Old2NewVPValues[TripCount];
1112+
return NewPlan;
1113+
}
1114+
9861115
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
9871116

9881117
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {

0 commit comments

Comments
 (0)