Skip to content

Commit 945e4fe

Browse files
committed
!fixup address latest comments, update on top of current main.
1 parent de00ebd commit 945e4fe

File tree

2 files changed

+134
-133
lines changed

2 files changed

+134
-133
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 114 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -614,17 +614,60 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
614614
printSuccessors(O, Indent);
615615
}
616616
#endif
617+
static void cloneCFG(VPBlockBase *Entry,
618+
DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewVPBlocks);
619+
620+
static VPBlockBase *cloneVPB(VPBlockBase *BB) {
621+
if (auto *VPBB = dyn_cast<VPBasicBlock>(BB)) {
622+
auto *NewBlock = new VPBasicBlock(VPBB->getName());
623+
for (VPRecipeBase &R : *VPBB)
624+
NewBlock->appendRecipe(R.clone());
625+
return NewBlock;
626+
}
617627

618-
VPBlockBase *VPRegionBlock::clone() {
619-
DenseMap<VPBlockBase *, VPBlockBase *> Old2New;
628+
auto *VPR = cast<VPRegionBlock>(BB);
629+
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
620630
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
621-
VPBlockBase *NewEntry =
622-
VPBlockUtils::cloneCFG(Entry, Old2New, Old2NewVPValues);
623-
auto *NewR =
624-
new VPRegionBlock(NewEntry, Old2New[Exiting], getName(), isReplicator());
631+
cloneCFG(VPR->getEntry(), Old2NewVPBlocks);
632+
VPBlockBase *NewEntry = Old2NewVPBlocks[VPR->getEntry()];
633+
auto *NewRegion =
634+
new VPRegionBlock(NewEntry, Old2NewVPBlocks[VPR->getExiting()],
635+
VPR->getName(), VPR->isReplicator());
625636
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
626-
Block->setParent(NewR);
627-
return NewR;
637+
Block->setParent(NewRegion);
638+
return NewRegion;
639+
}
640+
641+
// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
642+
// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
643+
// Remapping of operands must be done separately.
644+
static void cloneCFG(VPBlockBase *Entry,
645+
DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewVPBlocks) {
646+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
647+
Entry);
648+
for (VPBlockBase *BB : RPOT) {
649+
VPBlockBase *NewBB = cloneVPB(BB);
650+
for (VPBlockBase *Pred : BB->getPredecessors())
651+
VPBlockUtils::connectBlocks(Old2NewVPBlocks[Pred], NewBB);
652+
653+
Old2NewVPBlocks[BB] = NewBB;
654+
}
655+
656+
#if !defined(NDEBUG)
657+
// Verify that the order of predecessors and successors matches in the cloned
658+
// version.
659+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
660+
NewRPOT(Old2NewVPBlocks[Entry]);
661+
for (const auto &[OldBB, NewBB] : zip(RPOT, NewRPOT)) {
662+
for (const auto &[OldPred, NewPred] :
663+
zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
664+
assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
665+
666+
for (const auto &[OldSucc, NewSucc] :
667+
zip(OldBB->successors(), NewBB->successors()))
668+
assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
669+
}
670+
#endif
628671
}
629672

630673
void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
@@ -994,62 +1037,91 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
9941037
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
9951038
}
9961039

997-
static void remapVPValues(VPBasicBlock *OldBB, VPBasicBlock *NewBB,
998-
DenseMap<VPValue *, VPValue *> &Old2NewVPValues,
999-
bool Full = false) {
1000-
for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1001-
for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1002-
VPValue *NewOp = Old2NewVPValues.lookup(OldR.getOperand(I));
1003-
if (!Full)
1004-
continue;
1005-
NewR.setOperand(I, NewOp);
1040+
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1041+
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1042+
// Update the operands of all cloned recipes starting at NewEntry. This
1043+
// traverses all reachable blocks. This is done in two steps, to handle cycles
1044+
// in PHI recipes.
1045+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1046+
OldDeepRPOT(Entry);
1047+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1048+
NewDeepRPOT(NewEntry);
1049+
// First, collect all mappings from old to new VPValues defined by cloned
1050+
// recipes.
1051+
for (const auto &[OldBB, NewBB] :
1052+
zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1053+
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1054+
assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1055+
"blocks must have the same number of recipes");
1056+
1057+
for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1058+
assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1059+
"recipes must have the same number of operands");
1060+
assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1061+
"recipes must define the same number of operands");
1062+
for (const auto &[OldV, NewV] :
1063+
zip(OldR.definedValues(), NewR.definedValues()))
1064+
Old2NewVPValues[OldV] = NewV;
10061065
}
1007-
for (const auto &[OldV, NewV] :
1008-
zip(OldR.definedValues(), NewR.definedValues()))
1009-
Old2NewVPValues[OldV] = NewV;
1066+
}
1067+
1068+
// Update all operands to use cloned VPValues.
1069+
for (VPBasicBlock *NewBB :
1070+
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1071+
for (VPRecipeBase &NewR : *NewBB)
1072+
for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1073+
VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1074+
NewR.setOperand(I, NewOp);
1075+
}
10101076
}
10111077
}
10121078

10131079
VPlan *VPlan::clone() {
1014-
DenseMap<VPBlockBase *, VPBlockBase *> Old2New;
1080+
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
10151081
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
10161082

10171083
auto *NewPlan = new VPlan();
1084+
1085+
// Clone live-ins.
10181086
SmallVector<VPValue *, 16> NewLiveIns;
1019-
for (VPValue *LI : VPLiveInsToFree) {
1020-
VPValue *NewLI = new VPValue(LI->getLiveInIRValue());
1021-
NewPlan->VPLiveInsToFree.push_back(NewLI);
1022-
Old2NewVPValues[LI] = NewLI;
1087+
for (VPValue *OldLiveIn : VPLiveInsToFree) {
1088+
VPValue *NewLiveIn = new VPValue(OldLiveIn->getLiveInIRValue());
1089+
NewPlan->VPLiveInsToFree.push_back(NewLiveIn);
1090+
Old2NewVPValues[OldLiveIn] = NewLiveIn;
10231091
}
1024-
10251092
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
10261093
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
10271094
if (BackedgeTakenCount) {
1028-
Old2NewVPValues[BackedgeTakenCount] = new VPValue();
1029-
NewPlan->BackedgeTakenCount = Old2NewVPValues[BackedgeTakenCount];
1095+
NewPlan->BackedgeTakenCount = new VPValue();
1096+
Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
10301097
}
1031-
1032-
auto NewPH = cast<VPBasicBlock>(Preheader->clone());
1033-
remapVPValues(cast<VPBasicBlock>(Preheader), cast<VPBasicBlock>(NewPH),
1034-
Old2NewVPValues, /*Full*/ true);
1035-
VPValue *NewTC = Old2NewVPValues.lookup(TripCount);
1036-
if (!NewTC)
1098+
assert(TripCount && "trip count must be set");
1099+
if (TripCount->isLiveIn())
10371100
Old2NewVPValues[TripCount] = new VPValue(TripCount->getLiveInIRValue());
1038-
NewPlan->TripCount = Old2NewVPValues[TripCount];
10391101

1040-
auto *NewEntry = cast<VPBasicBlock>(VPBlockUtils::cloneCFG(
1041-
getEntry(), Old2New, Old2NewVPValues, /*FullRemapping*/ true));
1102+
// Clone blocks.
1103+
cloneCFG(Preheader, Old2NewVPBlocks);
1104+
cloneCFG(getEntry(), Old2NewVPBlocks);
1105+
1106+
auto *NewPreheader = cast<VPBasicBlock>(Old2NewVPBlocks[Preheader]);
1107+
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
1108+
auto *NewEntry = cast<VPBasicBlock>(Old2NewVPBlocks[Entry]);
1109+
remapOperands(Entry, NewEntry, Old2NewVPValues);
1110+
1111+
// Clone live-outs.
1112+
for (const auto &[_, LO] : LiveOuts)
1113+
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
10421114

1115+
// Initialize fields of cloned VPlan.
10431116
NewPlan->Entry = NewEntry;
1044-
NewPlan->Preheader = NewPH;
1117+
NewPlan->Preheader = NewPreheader;
10451118
NewEntry->setPlan(NewPlan);
1046-
NewPH->setPlan(NewPlan);
1119+
NewPreheader->setPlan(NewPlan);
10471120
NewPlan->VFs = VFs;
10481121
NewPlan->UFs = UFs;
1122+
// TODO: Adjust names.
10491123
NewPlan->Name = Name;
1050-
1051-
for (const auto &[_, LO] : LiveOuts)
1052-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1124+
NewPlan->TripCount = Old2NewVPValues[TripCount];
10531125
return NewPlan;
10541126
}
10551127

@@ -1271,59 +1343,6 @@ void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
12711343
}
12721344
#endif
12731345

1274-
VPBlockBase *VPBlockUtils::cloneCFG(
1275-
VPBlockBase *Entry, DenseMap<VPBlockBase *, VPBlockBase *> &Old2New,
1276-
DenseMap<VPValue *, VPValue *> &Old2NewVPValues, bool FullRemapping) {
1277-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
1278-
Entry);
1279-
VPBlockBase *NewEntry = nullptr;
1280-
for (VPBlockBase *BB : RPOT) {
1281-
VPBlockBase *NewBB = BB->clone();
1282-
if (!NewEntry)
1283-
NewEntry = NewBB;
1284-
1285-
for (VPBlockBase *Pred : BB->getPredecessors())
1286-
connectBlocks(Old2New[Pred], NewBB);
1287-
1288-
Old2New[BB] = NewBB;
1289-
1290-
if (!isa<VPBasicBlock>(BB))
1291-
continue;
1292-
}
1293-
1294-
// Update the operands of all cloned recipes starting at NewEntry. This
1295-
// traverses all reachable blocks. This is done in two steps, to handle cycles
1296-
// in PHI recipes.
1297-
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1298-
OldDeepRPOT(Entry);
1299-
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1300-
NewDeepRPOT(NewEntry);
1301-
// First, collect all mappings from old to new VPValues defined by cloned
1302-
// recipes.
1303-
for (const auto &[OldBB, NewBB] :
1304-
zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1305-
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1306-
for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB))
1307-
for (const auto &[OldV, NewV] :
1308-
zip(OldR.definedValues(), NewR.definedValues()))
1309-
Old2NewVPValues[OldV] = NewV;
1310-
}
1311-
1312-
// Update all operands to use cloned VPValues.
1313-
for (VPBasicBlock *NewBB :
1314-
VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1315-
for (VPRecipeBase &NewR : *NewBB)
1316-
for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1317-
VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1318-
if (!FullRemapping)
1319-
continue;
1320-
NewR.setOperand(I, NewOp);
1321-
}
1322-
}
1323-
1324-
return NewEntry;
1325-
}
1326-
13271346
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
13281347
Old2NewTy &Old2New,
13291348
InterleavedAccessInfo &IAI) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,6 @@ class VPBlockBase {
480480

481481
using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
482482

483-
virtual VPBlockBase *clone() = 0;
484-
485483
virtual ~VPBlockBase() = default;
486484

487485
const std::string &getName() const { return Name; }
@@ -1393,7 +1391,8 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
13931391

13941392
VPRecipeBase *clone() override {
13951393
return new VPWidenCallRecipe(*cast<CallInst>(getUnderlyingInstr()),
1396-
operands(), VectorIntrinsicID, Variant);
1394+
operands(), VectorIntrinsicID, getDebugLoc(),
1395+
Variant);
13971396
}
13981397

13991398
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
@@ -1508,7 +1507,7 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
15081507

15091508
VPRecipeBase *clone() override {
15101509
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1511-
getDebugLoc());
1510+
isInBounds(), getDebugLoc());
15121511
}
15131512

15141513
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2468,28 +2467,28 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
24682467
/// for floating point inductions.
24692468
const FPMathOperator *FPBinOp;
24702469

2471-
public:
2472-
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2470+
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
2471+
const FPMathOperator *FPBinOp, VPValue *Start,
24732472
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
24742473
Type *TruncResultTy)
24752474
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
2476-
TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
2477-
FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
2478-
}
2479-
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
2480-
const FPMathOperator *FPBinOp, VPValue *Start,
2475+
TruncResultTy(TruncResultTy), Kind(Kind), FPBinOp(FPBinOp) {}
2476+
2477+
public:
2478+
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
24812479
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
24822480
Type *TruncResultTy)
2483-
: VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
2484-
VPValue(this), TruncResultTy(TruncResultTy), Kind(Kind),
2485-
FPBinOp(FPBinOp) {}
2481+
: VPDerivedIVRecipe(
2482+
IndDesc.getKind(),
2483+
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2484+
Start, CanonicalIV, Step, TruncResultTy) {}
24862485

24872486
~VPDerivedIVRecipe() override = default;
24882487

24892488
VPRecipeBase *clone() override {
2490-
return new VPDerivedIVRecipe(Kind, FPBinOp, getOperand(0),
2491-
cast<VPCanonicalIVPHIRecipe>(getOperand(1)),
2492-
getOperand(2), TruncResultTy);
2489+
return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(),
2490+
getCanonicalIV(), getStepValue(),
2491+
TruncResultTy);
24932492
}
24942493

24952494
VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
@@ -2510,7 +2509,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
25102509
}
25112510

25122511
VPValue *getStartValue() const { return getOperand(0); }
2513-
VPValue *getCanonicalIV() const { return getOperand(1); }
2512+
VPCanonicalIVPHIRecipe *getCanonicalIV() const {
2513+
return cast<VPCanonicalIVPHIRecipe>(getOperand(1));
2514+
}
25142515
VPValue *getStepValue() const { return getOperand(2); }
25152516

25162517
/// Returns true if the recipe only uses the first lane of operand \p Op.
@@ -2593,13 +2594,6 @@ class VPBasicBlock : public VPBlockBase {
25932594
Recipes.pop_back();
25942595
}
25952596

2596-
VPBlockBase *clone() override {
2597-
auto *NewBlock = new VPBasicBlock(getName());
2598-
for (VPRecipeBase &R : *this)
2599-
NewBlock->appendRecipe(R.clone());
2600-
return NewBlock;
2601-
}
2602-
26032597
/// Instruction iterators...
26042598
using iterator = RecipeListTy::iterator;
26052599
using const_iterator = RecipeListTy::const_iterator;
@@ -2738,8 +2732,6 @@ class VPRegionBlock : public VPBlockBase {
27382732
}
27392733
}
27402734

2741-
VPBlockBase *clone() override;
2742-
27432735
/// Method to support type inquiry through isa, cast, and dyn_cast.
27442736
static inline bool classof(const VPBlockBase *V) {
27452737
return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
@@ -3052,6 +3044,7 @@ class VPlan {
30523044
VPBasicBlock *getPreheader() { return Preheader; }
30533045
const VPBasicBlock *getPreheader() const { return Preheader; }
30543046

3047+
/// Clone the current VPlan and return it.
30553048
VPlan *clone();
30563049

30573050
private:
@@ -3216,17 +3209,6 @@ class VPBlockUtils {
32163209
return cast<BlockTy>(&Block);
32173210
});
32183211
}
3219-
3220-
/// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
3221-
/// the blocks and their recipes. Operands of cloned recipes will be updated
3222-
/// to use new VPValues from \p Old2NewValues. If \p FullRemapping is set to
3223-
/// true, then all old VPValues from outside the cloned nodes must be mapped
3224-
/// in \p Old2NewValues.
3225-
static VPBlockBase *
3226-
cloneCFG(VPBlockBase *Entry,
3227-
DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewBBs,
3228-
DenseMap<VPValue *, VPValue *> &Old2NewValues,
3229-
bool FullRemapping = false);
32303212
};
32313213

32323214
class VPInterleavedAccessInfo {

0 commit comments

Comments
 (0)