Skip to content

Commit 51aac5b

Browse files
committed
[SLP][NFCI]Improve compile time for phis with large number of incoming values.
Added a limit of 128 incoming values at max for PHIs nodes to be vectorized plus improved performance by using logarithmic search instead of linear if the number of incoming values is > 4.
1 parent 19f4d68 commit 51aac5b

File tree

1 file changed

+93
-20
lines changed

1 file changed

+93
-20
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 93 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ static const unsigned MaxMemDepDistance = 160;
216216
/// regions to be handled.
217217
static const int MinScheduleRegionSize = 16;
218218

219+
/// Maximum allowed number of operands in the PHI nodes.
220+
static const unsigned MaxPHINumOperands = 128;
221+
219222
/// Predicate for the element types that the SLP vectorizer supports.
220223
///
221224
/// The most important thing to filter here are types which are invalid in LLVM
@@ -6001,6 +6004,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
60016004
auto *VL0 = cast<Instruction>(S.OpValue);
60026005
switch (ShuffleOrOp) {
60036006
case Instruction::PHI: {
6007+
// Too many operands - gather, most probably won't be vectorized.
6008+
if (VL0->getNumOperands() > MaxPHINumOperands)
6009+
return TreeEntry::NeedToGather;
60046010
// Check for terminator values (e.g. invoke).
60056011
for (Value *V : VL)
60066012
for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
@@ -6307,6 +6313,85 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
63076313
}
63086314
}
63096315

6316+
namespace {
6317+
/// Allows to correctly handle operands of the phi nodes based on the \p Main
6318+
/// PHINode order of incoming basic blocks/values.
6319+
class PHIHandler {
6320+
DominatorTree &DT;
6321+
PHINode *Main = nullptr;
6322+
SmallVector<Value *> Phis;
6323+
SmallVector<SmallVector<Value *>> Operands;
6324+
6325+
public:
6326+
PHIHandler() = delete;
6327+
PHIHandler(DominatorTree &DT, PHINode *Main, ArrayRef<Value *> Phis)
6328+
: DT(DT), Main(Main), Phis(Phis),
6329+
Operands(Main->getNumIncomingValues(),
6330+
SmallVector<Value *>(Phis.size(), nullptr)) {}
6331+
void buildOperands() {
6332+
constexpr unsigned FastLimit = 4;
6333+
if (Main->getNumIncomingValues() <= FastLimit) {
6334+
for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
6335+
BasicBlock *InBB = Main->getIncomingBlock(I);
6336+
if (!DT.isReachableFromEntry(InBB)) {
6337+
Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
6338+
continue;
6339+
}
6340+
// Prepare the operand vector.
6341+
for (auto [Idx, V] : enumerate(Phis)) {
6342+
auto *P = cast<PHINode>(V);
6343+
if (P->getIncomingBlock(I) == InBB)
6344+
Operands[I][Idx] = P->getIncomingValue(I);
6345+
else
6346+
Operands[I][Idx] = P->getIncomingValueForBlock(InBB);
6347+
}
6348+
}
6349+
return;
6350+
}
6351+
SmallDenseMap<BasicBlock *, SmallVector<unsigned>, 4> Blocks;
6352+
for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
6353+
BasicBlock *InBB = Main->getIncomingBlock(I);
6354+
if (!DT.isReachableFromEntry(InBB)) {
6355+
Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
6356+
continue;
6357+
}
6358+
Blocks.try_emplace(InBB).first->second.push_back(I);
6359+
}
6360+
for (auto [Idx, V] : enumerate(Phis)) {
6361+
auto *P = cast<PHINode>(V);
6362+
for (unsigned I : seq<unsigned>(0, P->getNumIncomingValues())) {
6363+
BasicBlock *InBB = P->getIncomingBlock(I);
6364+
if (InBB == Main->getIncomingBlock(I)) {
6365+
if (isa_and_nonnull<PoisonValue>(Operands[I][Idx]))
6366+
continue;
6367+
Operands[I][Idx] = P->getIncomingValue(I);
6368+
continue;
6369+
}
6370+
auto It = Blocks.find(InBB);
6371+
if (It == Blocks.end())
6372+
continue;
6373+
Operands[It->second.front()][Idx] = P->getIncomingValue(I);
6374+
}
6375+
}
6376+
for (const auto &P : Blocks) {
6377+
if (P.getSecond().size() <= 1)
6378+
continue;
6379+
unsigned BasicI = P.getSecond().front();
6380+
for (unsigned I : ArrayRef(P.getSecond()).drop_front()) {
6381+
assert(all_of(enumerate(Operands[I]),
6382+
[&](const auto &Data) {
6383+
return !Data.value() ||
6384+
Data.value() == Operands[BasicI][Data.index()];
6385+
}) &&
6386+
"Expected empty operands list.");
6387+
Operands[I] = Operands[BasicI];
6388+
}
6389+
}
6390+
}
6391+
ArrayRef<Value *> getOperands(unsigned I) const { return Operands[I]; }
6392+
};
6393+
} // namespace
6394+
63106395
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
63116396
const EdgeInfo &UserTreeIdx) {
63126397
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
@@ -6675,24 +6760,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
66756760
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
66766761

66776762
// Keeps the reordered operands to avoid code duplication.
6678-
SmallVector<ValueList, 2> OperandsVec;
6679-
for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
6680-
if (!DT->isReachableFromEntry(PH->getIncomingBlock(I))) {
6681-
ValueList Operands(VL.size(), PoisonValue::get(PH->getType()));
6682-
TE->setOperand(I, Operands);
6683-
OperandsVec.push_back(Operands);
6684-
continue;
6685-
}
6686-
ValueList Operands;
6687-
// Prepare the operand vector.
6688-
for (Value *V : VL)
6689-
Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
6690-
PH->getIncomingBlock(I)));
6691-
TE->setOperand(I, Operands);
6692-
OperandsVec.push_back(Operands);
6693-
}
6694-
for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
6695-
buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
6763+
PHIHandler Handler(*DT, PH, VL);
6764+
Handler.buildOperands();
6765+
for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
6766+
TE->setOperand(I, Handler.getOperands(I));
6767+
for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
6768+
buildTree_rec(Handler.getOperands(I), Depth + 1, {TE, I});
66966769
return;
66976770
}
66986771
case Instruction::ExtractValue:
@@ -18166,8 +18239,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
1816618239
// Collect the incoming values from the PHIs.
1816718240
Incoming.clear();
1816818241
for (Instruction &I : *BB) {
18169-
PHINode *P = dyn_cast<PHINode>(&I);
18170-
if (!P)
18242+
auto *P = dyn_cast<PHINode>(&I);
18243+
if (!P || P->getNumIncomingValues() > MaxPHINumOperands)
1817118244
break;
1817218245

1817318246
// No need to analyze deleted, vectorized and non-vectorizable

0 commit comments

Comments
 (0)