Skip to content

Commit 13a26e8

Browse files
committed
[VPlan] Implement cloning of VPlans.
This patch implements cloning for VPlans and recipes. Cloning is used in the epilogue vectorization path, to clone the VPlan for the main vector loop. This means we won't re-use a VPlan when executing the VPlan for the epilogue vector loop, which in turn will enable us to perform optimizations based on UF & VF.
1 parent 2eb7a82 commit 13a26e8

File tree

4 files changed

+309
-1
lines changed

4 files changed

+309
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10078,7 +10078,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1007810078
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
1007910079
EPI, &LVL, &CM, BFI, PSI, Checks);
1008010080

10081-
VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
10081+
VPlan &BestMainPlan = *LVP.getBestPlanFor(EPI.MainLoopVF).clone();
1008210082
const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
1008310083
EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, DT, true);
1008410084
++LoopsVectorized;

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,18 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
615615
}
616616
#endif
617617

618+
VPBlockBase *VPRegionBlock::clone() {
619+
DenseMap<VPBlockBase *, VPBlockBase *> Old2New;
620+
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
621+
VPBlockBase *NewEntry =
622+
VPBlockUtils::cloneCFG(Entry, Old2New, Old2NewVPValues);
623+
auto *NewR =
624+
new VPRegionBlock(NewEntry, Old2New[Exiting], getName(), isReplicator());
625+
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
626+
Block->setParent(NewR);
627+
return NewR;
628+
}
629+
618630
void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
619631
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
620632
// Drop all references in VPBasicBlocks and replace all uses with
@@ -982,6 +994,65 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
982994
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
983995
}
984996

997+
static void remapVPValues(VPBasicBlock *OldBB, VPBasicBlock *NewBB,
998+
DenseMap<VPValue *, VPValue *> &Old2NewVPValues,
999+
bool Full = false) {
1000+
for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1001+
for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1002+
VPValue *NewOp = Old2NewVPValues.lookup(OldR.getOperand(I));
1003+
if (!Full)
1004+
continue;
1005+
NewR.setOperand(I, NewOp);
1006+
}
1007+
for (const auto &[OldV, NewV] :
1008+
zip(OldR.definedValues(), NewR.definedValues()))
1009+
Old2NewVPValues[OldV] = NewV;
1010+
}
1011+
}
1012+
1013+
VPlan *VPlan::clone() {
1014+
DenseMap<VPBlockBase *, VPBlockBase *> Old2New;
1015+
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1016+
1017+
auto *NewPlan = new VPlan();
1018+
SmallVector<VPValue *, 16> NewLiveIns;
1019+
for (VPValue *LI : VPLiveInsToFree) {
1020+
VPValue *NewLI = new VPValue(LI->getLiveInIRValue());
1021+
NewPlan->VPLiveInsToFree.push_back(NewLI);
1022+
Old2NewVPValues[LI] = NewLI;
1023+
}
1024+
1025+
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1026+
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1027+
if (BackedgeTakenCount) {
1028+
Old2NewVPValues[BackedgeTakenCount] = new VPValue();
1029+
NewPlan->BackedgeTakenCount = Old2NewVPValues[BackedgeTakenCount];
1030+
}
1031+
1032+
auto NewPH = cast<VPBasicBlock>(Preheader->clone());
1033+
remapVPValues(cast<VPBasicBlock>(Preheader), cast<VPBasicBlock>(NewPH),
1034+
Old2NewVPValues, /*Full*/ true);
1035+
VPValue *NewTC = Old2NewVPValues.lookup(TripCount);
1036+
if (!NewTC)
1037+
Old2NewVPValues[TripCount] = new VPValue(TripCount->getLiveInIRValue());
1038+
NewPlan->TripCount = Old2NewVPValues[TripCount];
1039+
1040+
auto *NewEntry = cast<VPBasicBlock>(VPBlockUtils::cloneCFG(
1041+
getEntry(), Old2New, Old2NewVPValues, /*FullRemapping*/ true));
1042+
1043+
NewPlan->Entry = NewEntry;
1044+
NewPlan->Preheader = NewPH;
1045+
NewEntry->setPlan(NewPlan);
1046+
NewPH->setPlan(NewPlan);
1047+
NewPlan->VFs = VFs;
1048+
NewPlan->UFs = UFs;
1049+
NewPlan->Name = Name;
1050+
1051+
for (const auto &[_, LO] : LiveOuts)
1052+
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1053+
return NewPlan;
1054+
}
1055+
9851056
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
9861057

9871058
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1200,6 +1271,59 @@ void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
12001271
}
12011272
#endif
12021273

1274+
VPBlockBase *VPBlockUtils::cloneCFG(
1275+
VPBlockBase *Entry, DenseMap<VPBlockBase *, VPBlockBase *> &Old2New,
1276+
DenseMap<VPValue *, VPValue *> &Old2NewVPValues, bool FullRemapping) {
1277+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
1278+
Entry);
1279+
VPBlockBase *NewEntry = nullptr;
1280+
for (VPBlockBase *BB : RPOT) {
1281+
VPBlockBase *NewBB = BB->clone();
1282+
if (!NewEntry)
1283+
NewEntry = NewBB;
1284+
1285+
for (VPBlockBase *Pred : BB->getPredecessors())
1286+
connectBlocks(Old2New[Pred], NewBB);
1287+
1288+
Old2New[BB] = NewBB;
1289+
1290+
if (!isa<VPBasicBlock>(BB))
1291+
continue;
1292+
}
1293+
1294+
// Update the operands of all cloned recipes starting at NewEntry. This
1295+
// traverses all reachable blocks. This is done in two steps, to handle cycles
1296+
// in PHI recipes.
1297+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1298+
OldDeepRPOT(Entry);
1299+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1300+
NewDeepRPOT(NewEntry);
1301+
// First, collect all mappings from old to new VPValues defined by cloned
1302+
// recipes.
1303+
for (const auto &[OldBB, NewBB] :
1304+
zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1305+
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1306+
for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB))
1307+
for (const auto &[OldV, NewV] :
1308+
zip(OldR.definedValues(), NewR.definedValues()))
1309+
Old2NewVPValues[OldV] = NewV;
1310+
}
1311+
1312+
// Update all operands to use cloned VPValues.
1313+
for (VPBasicBlock *NewBB :
1314+
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1315+
for (VPRecipeBase &NewR : *NewBB)
1316+
for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1317+
VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1318+
if (!FullRemapping)
1319+
continue;
1320+
NewR.setOperand(I, NewOp);
1321+
}
1322+
}
1323+
1324+
return NewEntry;
1325+
}
1326+
12031327
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
12041328
Old2NewTy &Old2New,
12051329
InterleavedAccessInfo &IAI) {

0 commit comments

Comments
 (0)