@@ -216,6 +216,9 @@ static const unsigned MaxMemDepDistance = 160;
216
216
/// regions to be handled.
217
217
static const int MinScheduleRegionSize = 16;
218
218
219
+ /// Maximum allowed number of operands in the PHI nodes.
220
+ static const unsigned MaxPHINumOperands = 128;
221
+
219
222
/// Predicate for the element types that the SLP vectorizer supports.
220
223
///
221
224
/// The most important thing to filter here are types which are invalid in LLVM
@@ -6001,6 +6004,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
6001
6004
auto *VL0 = cast<Instruction>(S.OpValue);
6002
6005
switch (ShuffleOrOp) {
6003
6006
case Instruction::PHI: {
6007
+ // Too many operands - gather, most probably won't be vectorized.
6008
+ if (VL0->getNumOperands() > MaxPHINumOperands)
6009
+ return TreeEntry::NeedToGather;
6004
6010
// Check for terminator values (e.g. invoke).
6005
6011
for (Value *V : VL)
6006
6012
for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
@@ -6307,6 +6313,85 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
6307
6313
}
6308
6314
}
6309
6315
6316
+ namespace {
6317
+ /// Allows to correctly handle operands of the phi nodes based on the \p Main
6318
+ /// PHINode order of incoming basic blocks/values.
6319
+ class PHIHandler {
6320
+ DominatorTree &DT;
6321
+ PHINode *Main = nullptr;
6322
+ SmallVector<Value *> Phis;
6323
+ SmallVector<SmallVector<Value *>> Operands;
6324
+
6325
+ public:
6326
+ PHIHandler() = delete;
6327
+ PHIHandler(DominatorTree &DT, PHINode *Main, ArrayRef<Value *> Phis)
6328
+ : DT(DT), Main(Main), Phis(Phis),
6329
+ Operands(Main->getNumIncomingValues(),
6330
+ SmallVector<Value *>(Phis.size(), nullptr)) {}
6331
+ void buildOperands() {
6332
+ constexpr unsigned FastLimit = 4;
6333
+ if (Main->getNumIncomingValues() <= FastLimit) {
6334
+ for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
6335
+ BasicBlock *InBB = Main->getIncomingBlock(I);
6336
+ if (!DT.isReachableFromEntry(InBB)) {
6337
+ Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
6338
+ continue;
6339
+ }
6340
+ // Prepare the operand vector.
6341
+ for (auto [Idx, V] : enumerate(Phis)) {
6342
+ auto *P = cast<PHINode>(V);
6343
+ if (P->getIncomingBlock(I) == InBB)
6344
+ Operands[I][Idx] = P->getIncomingValue(I);
6345
+ else
6346
+ Operands[I][Idx] = P->getIncomingValueForBlock(InBB);
6347
+ }
6348
+ }
6349
+ return;
6350
+ }
6351
+ SmallDenseMap<BasicBlock *, SmallVector<unsigned>, 4> Blocks;
6352
+ for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
6353
+ BasicBlock *InBB = Main->getIncomingBlock(I);
6354
+ if (!DT.isReachableFromEntry(InBB)) {
6355
+ Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
6356
+ continue;
6357
+ }
6358
+ Blocks.try_emplace(InBB).first->second.push_back(I);
6359
+ }
6360
+ for (auto [Idx, V] : enumerate(Phis)) {
6361
+ auto *P = cast<PHINode>(V);
6362
+ for (unsigned I : seq<unsigned>(0, P->getNumIncomingValues())) {
6363
+ BasicBlock *InBB = P->getIncomingBlock(I);
6364
+ if (InBB == Main->getIncomingBlock(I)) {
6365
+ if (isa_and_nonnull<PoisonValue>(Operands[I][Idx]))
6366
+ continue;
6367
+ Operands[I][Idx] = P->getIncomingValue(I);
6368
+ continue;
6369
+ }
6370
+ auto It = Blocks.find(InBB);
6371
+ if (It == Blocks.end())
6372
+ continue;
6373
+ Operands[It->second.front()][Idx] = P->getIncomingValue(I);
6374
+ }
6375
+ }
6376
+ for (const auto &P : Blocks) {
6377
+ if (P.getSecond().size() <= 1)
6378
+ continue;
6379
+ unsigned BasicI = P.getSecond().front();
6380
+ for (unsigned I : ArrayRef(P.getSecond()).drop_front()) {
6381
+ assert(all_of(enumerate(Operands[I]),
6382
+ [&](const auto &Data) {
6383
+ return !Data.value() ||
6384
+ Data.value() == Operands[BasicI][Data.index()];
6385
+ }) &&
6386
+ "Expected empty operands list.");
6387
+ Operands[I] = Operands[BasicI];
6388
+ }
6389
+ }
6390
+ }
6391
+ ArrayRef<Value *> getOperands(unsigned I) const { return Operands[I]; }
6392
+ };
6393
+ } // namespace
6394
+
6310
6395
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
6311
6396
const EdgeInfo &UserTreeIdx) {
6312
6397
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
@@ -6675,24 +6760,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
6675
6760
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
6676
6761
6677
6762
// Keeps the reordered operands to avoid code duplication.
6678
- SmallVector<ValueList, 2> OperandsVec;
6679
- for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
6680
- if (!DT->isReachableFromEntry(PH->getIncomingBlock(I))) {
6681
- ValueList Operands(VL.size(), PoisonValue::get(PH->getType()));
6682
- TE->setOperand(I, Operands);
6683
- OperandsVec.push_back(Operands);
6684
- continue;
6685
- }
6686
- ValueList Operands;
6687
- // Prepare the operand vector.
6688
- for (Value *V : VL)
6689
- Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
6690
- PH->getIncomingBlock(I)));
6691
- TE->setOperand(I, Operands);
6692
- OperandsVec.push_back(Operands);
6693
- }
6694
- for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
6695
- buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
6763
+ PHIHandler Handler(*DT, PH, VL);
6764
+ Handler.buildOperands();
6765
+ for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
6766
+ TE->setOperand(I, Handler.getOperands(I));
6767
+ for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
6768
+ buildTree_rec(Handler.getOperands(I), Depth + 1, {TE, I});
6696
6769
return;
6697
6770
}
6698
6771
case Instruction::ExtractValue:
@@ -18166,8 +18239,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
18166
18239
// Collect the incoming values from the PHIs.
18167
18240
Incoming.clear();
18168
18241
for (Instruction &I : *BB) {
18169
- PHINode *P = dyn_cast<PHINode>(&I);
18170
- if (!P)
18242
+ auto *P = dyn_cast<PHINode>(&I);
18243
+ if (!P || P->getNumIncomingValues() > MaxPHINumOperands )
18171
18244
break;
18172
18245
18173
18246
// No need to analyze deleted, vectorized and non-vectorizable
0 commit comments