@@ -1186,6 +1186,12 @@ class BoUpSLP {
1186
1186
return VectorizableTree.front()->Scalars;
1187
1187
}
1188
1188
1189
+ /// Checks if the root graph node can be emitted with narrower bitwidth at
1190
+ /// codegen and returns it signedness, if so.
1191
+ bool isSignedMinBitwidthRootNode() const {
1192
+ return MinBWs.at(VectorizableTree.front().get()).second;
1193
+ }
1194
+
1189
1195
/// Builds external uses of the vectorized scalars, i.e. the list of
1190
1196
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
1191
1197
/// ExternallyUsedValues contains additional list of external uses to handle
@@ -2453,6 +2459,90 @@ class BoUpSLP {
2453
2459
DeletedInstructions.insert(I);
2454
2460
}
2455
2461
2462
+ /// Remove instructions from the parent function and clear the operands of \p
2463
+ /// DeadVals instructions, marking for deletion trivially dead operands.
2464
+ template <typename T>
2465
+ void removeInstructionsAndOperands(ArrayRef<T *> DeadVals) {
2466
+ SmallVector<WeakTrackingVH> DeadInsts;
2467
+ for (T *V : DeadVals) {
2468
+ auto *I = cast<Instruction>(V);
2469
+ DeletedInstructions.insert(I);
2470
+ }
2471
+ for (T *V : DeadVals) {
2472
+ if (!V)
2473
+ continue;
2474
+ auto *I = cast<Instruction>(V);
2475
+ salvageDebugInfo(*I);
2476
+ SmallVector<const TreeEntry *> Entries;
2477
+ if (const TreeEntry *Entry = getTreeEntry(I)) {
2478
+ Entries.push_back(Entry);
2479
+ auto It = MultiNodeScalars.find(I);
2480
+ if (It != MultiNodeScalars.end())
2481
+ Entries.append(It->second.begin(), It->second.end());
2482
+ }
2483
+ for (Use &U : I->operands()) {
2484
+ if (auto *OpI = dyn_cast_if_present<Instruction>(U.get());
2485
+ OpI && !DeletedInstructions.contains(OpI) && OpI->hasOneUser() &&
2486
+ wouldInstructionBeTriviallyDead(OpI, TLI) &&
2487
+ (Entries.empty() || none_of(Entries, [&](const TreeEntry *Entry) {
2488
+ return Entry->VectorizedValue == OpI;
2489
+ })))
2490
+ DeadInsts.push_back(OpI);
2491
+ }
2492
+ I->dropAllReferences();
2493
+ }
2494
+ for (T *V : DeadVals) {
2495
+ auto *I = cast<Instruction>(V);
2496
+ if (!I->getParent())
2497
+ continue;
2498
+ assert((I->use_empty() || all_of(I->uses(),
2499
+ [&](Use &U) {
2500
+ return isDeleted(
2501
+ cast<Instruction>(U.getUser()));
2502
+ })) &&
2503
+ "trying to erase instruction with users.");
2504
+ I->removeFromParent();
2505
+ SE->forgetValue(I);
2506
+ }
2507
+ // Process the dead instruction list until empty.
2508
+ while (!DeadInsts.empty()) {
2509
+ Value *V = DeadInsts.pop_back_val();
2510
+ Instruction *VI = cast_or_null<Instruction>(V);
2511
+ if (!VI || !VI->getParent())
2512
+ continue;
2513
+ assert(isInstructionTriviallyDead(VI, TLI) &&
2514
+ "Live instruction found in dead worklist!");
2515
+ assert(VI->use_empty() && "Instructions with uses are not dead.");
2516
+
2517
+ // Don't lose the debug info while deleting the instructions.
2518
+ salvageDebugInfo(*VI);
2519
+
2520
+ // Null out all of the instruction's operands to see if any operand
2521
+ // becomes dead as we go.
2522
+ for (Use &OpU : VI->operands()) {
2523
+ Value *OpV = OpU.get();
2524
+ if (!OpV)
2525
+ continue;
2526
+ OpU.set(nullptr);
2527
+
2528
+ if (!OpV->use_empty())
2529
+ continue;
2530
+
2531
+ // If the operand is an instruction that became dead as we nulled out
2532
+ // the operand, and if it is 'trivially' dead, delete it in a future
2533
+ // loop iteration.
2534
+ if (auto *OpI = dyn_cast<Instruction>(OpV))
2535
+ if (!DeletedInstructions.contains(OpI) &&
2536
+ isInstructionTriviallyDead(OpI, TLI))
2537
+ DeadInsts.push_back(OpI);
2538
+ }
2539
+
2540
+ VI->removeFromParent();
2541
+ DeletedInstructions.insert(VI);
2542
+ SE->forgetValue(VI);
2543
+ }
2544
+ }
2545
+
2456
2546
/// Checks if the instruction was already analyzed for being possible
2457
2547
/// reduction root.
2458
2548
bool isAnalyzedReductionRoot(Instruction *I) const {
@@ -3987,6 +4077,10 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
3987
4077
BoUpSLP::~BoUpSLP() {
3988
4078
SmallVector<WeakTrackingVH> DeadInsts;
3989
4079
for (auto *I : DeletedInstructions) {
4080
+ if (!I->getParent()) {
4081
+ I->insertBefore(F->getEntryBlock().getTerminator());
4082
+ continue;
4083
+ }
3990
4084
for (Use &U : I->operands()) {
3991
4085
auto *Op = dyn_cast<Instruction>(U.get());
3992
4086
if (Op && !DeletedInstructions.count(Op) && Op->hasOneUser() &&
@@ -14075,11 +14169,8 @@ Value *BoUpSLP::vectorizeTree(
14075
14169
}
14076
14170
#endif
14077
14171
LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
14078
- eraseInstruction(cast<Instruction>(Scalar));
14079
- // Retain to-be-deleted instructions for some debug-info
14080
- // bookkeeping. NOTE: eraseInstruction only marks the instruction for
14081
- // deletion - instructions are not deleted until later.
14082
- RemovedInsts.push_back(cast<Instruction>(Scalar));
14172
+ auto *I = cast<Instruction>(Scalar);
14173
+ RemovedInsts.push_back(I);
14083
14174
}
14084
14175
}
14085
14176
@@ -14088,6 +14179,22 @@ Value *BoUpSLP::vectorizeTree(
14088
14179
if (auto *V = dyn_cast<Instruction>(VectorizableTree[0]->VectorizedValue))
14089
14180
V->mergeDIAssignID(RemovedInsts);
14090
14181
14182
+ // Clear up reduction references, if any.
14183
+ if (UserIgnoreList) {
14184
+ for (Instruction *I : RemovedInsts) {
14185
+ if (getTreeEntry(I)->Idx != 0)
14186
+ continue;
14187
+ I->replaceUsesWithIf(PoisonValue::get(I->getType()), [&](Use &U) {
14188
+ return UserIgnoreList->contains(U.getUser());
14189
+ });
14190
+ }
14191
+ }
14192
+ // Retain to-be-deleted instructions for some debug-info bookkeeping and alias
14193
+ // cache correctness.
14194
+ // NOTE: removeInstructionAndOperands only marks the instruction for deletion
14195
+ // - instructions are not deleted until later.
14196
+ removeInstructionsAndOperands(ArrayRef(RemovedInsts));
14197
+
14091
14198
Builder.ClearInsertionPoint();
14092
14199
InstrElementSize.clear();
14093
14200
@@ -16137,15 +16244,18 @@ bool SLPVectorizerPass::vectorizeStores(
16137
16244
Res.first = Idx;
16138
16245
Res.second.emplace(Idx, 0);
16139
16246
};
16140
- StoreInst *PrevStore = Stores.front() ;
16247
+ Type *PrevValTy = nullptr ;
16141
16248
for (auto [I, SI] : enumerate(Stores)) {
16249
+ if (R.isDeleted(SI))
16250
+ continue;
16251
+ if (!PrevValTy)
16252
+ PrevValTy = SI->getValueOperand()->getType();
16142
16253
// Check that we do not try to vectorize stores of different types.
16143
- if (PrevStore->getValueOperand()->getType() !=
16144
- SI->getValueOperand()->getType()) {
16254
+ if (PrevValTy != SI->getValueOperand()->getType()) {
16145
16255
for (auto &Set : SortedStores)
16146
16256
TryToVectorize(Set.second);
16147
16257
SortedStores.clear();
16148
- PrevStore = SI;
16258
+ PrevValTy = SI->getValueOperand()->getType() ;
16149
16259
}
16150
16260
FillStoresSet(I, SI);
16151
16261
}
@@ -17028,9 +17138,12 @@ class HorizontalReduction {
17028
17138
Value *VectorizedTree = nullptr;
17029
17139
bool CheckForReusedReductionOps = false;
17030
17140
// Try to vectorize elements based on their type.
17141
+ SmallVector<InstructionsState> States;
17142
+ for (ArrayRef<Value *> RV : ReducedVals)
17143
+ States.push_back(getSameOpcode(RV, TLI));
17031
17144
for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) {
17032
17145
ArrayRef<Value *> OrigReducedVals = ReducedVals[I];
17033
- InstructionsState S = getSameOpcode(OrigReducedVals, TLI) ;
17146
+ InstructionsState S = States[I] ;
17034
17147
SmallVector<Value *> Candidates;
17035
17148
Candidates.reserve(2 * OrigReducedVals.size());
17036
17149
DenseMap<Value *, Value *> TrackedToOrig(2 * OrigReducedVals.size());
@@ -17355,14 +17468,11 @@ class HorizontalReduction {
17355
17468
Value *ReducedSubTree =
17356
17469
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
17357
17470
if (ReducedSubTree->getType() != VL.front()->getType()) {
17358
- ReducedSubTree = Builder.CreateIntCast(
17359
- ReducedSubTree, VL.front()->getType(), any_of(VL, [&](Value *R) {
17360
- KnownBits Known = computeKnownBits(
17361
- R, cast<Instruction>(ReductionOps.front().front())
17362
- ->getModule()
17363
- ->getDataLayout());
17364
- return !Known.isNonNegative();
17365
- }));
17471
+ assert(ReducedSubTree->getType() != VL.front()->getType() &&
17472
+ "Expected different reduction type.");
17473
+ ReducedSubTree =
17474
+ Builder.CreateIntCast(ReducedSubTree, VL.front()->getType(),
17475
+ V.isSignedMinBitwidthRootNode());
17366
17476
}
17367
17477
17368
17478
// Improved analysis for add/fadd/xor reductions with same scale factor
@@ -17524,11 +17634,11 @@ class HorizontalReduction {
17524
17634
}
17525
17635
#endif
17526
17636
if (!Ignore->use_empty()) {
17527
- Value *Undef = UndefValue ::get(Ignore->getType());
17528
- Ignore->replaceAllUsesWith(Undef );
17637
+ Value *P = PoisonValue ::get(Ignore->getType());
17638
+ Ignore->replaceAllUsesWith(P );
17529
17639
}
17530
- V.eraseInstruction(cast<Instruction>(Ignore));
17531
17640
}
17641
+ V.removeInstructionsAndOperands(RdxOps);
17532
17642
}
17533
17643
} else if (!CheckForReusedReductionOps) {
17534
17644
for (ReductionOpsType &RdxOps : ReductionOps)
@@ -18076,6 +18186,8 @@ bool SLPVectorizerPass::vectorizeHorReduction(
18076
18186
Stack.emplace(I, Level);
18077
18187
continue;
18078
18188
}
18189
+ if (R.isDeleted(Inst))
18190
+ continue;
18079
18191
} else {
18080
18192
// We could not vectorize `Inst` so try to use it as a future seed.
18081
18193
if (!TryAppendToPostponedInsts(Inst)) {
@@ -18161,15 +18273,28 @@ static bool tryToVectorizeSequence(
18161
18273
18162
18274
// Try to vectorize elements base on their type.
18163
18275
SmallVector<T *> Candidates;
18164
- for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) {
18276
+ SmallVector<T *> VL;
18277
+ for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;
18278
+ VL.clear()) {
18165
18279
// Look for the next elements with the same type, parent and operand
18166
18280
// kinds.
18281
+ auto *I = dyn_cast<Instruction>(*IncIt);
18282
+ if (!I || R.isDeleted(I)) {
18283
+ ++IncIt;
18284
+ continue;
18285
+ }
18167
18286
auto *SameTypeIt = IncIt;
18168
- while (SameTypeIt != E && AreCompatible(*SameTypeIt, *IncIt))
18287
+ while (SameTypeIt != E && (!isa<Instruction>(*SameTypeIt) ||
18288
+ R.isDeleted(cast<Instruction>(*SameTypeIt)) ||
18289
+ AreCompatible(*SameTypeIt, *IncIt))) {
18290
+ auto *I = dyn_cast<Instruction>(*SameTypeIt);
18169
18291
++SameTypeIt;
18292
+ if (I && !R.isDeleted(I))
18293
+ VL.push_back(cast<T>(I));
18294
+ }
18170
18295
18171
18296
// Try to vectorize them.
18172
- unsigned NumElts = (SameTypeIt - IncIt );
18297
+ unsigned NumElts = VL.size( );
18173
18298
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes ("
18174
18299
<< NumElts << ")\n");
18175
18300
// The vectorization is a 3-state attempt:
@@ -18181,10 +18306,15 @@ static bool tryToVectorizeSequence(
18181
18306
// 3. Final attempt to try to vectorize all instructions with the
18182
18307
// same/alternate ops only, this may result in some extra final
18183
18308
// vectorization.
18184
- if (NumElts > 1 &&
18185
- TryToVectorizeHelper(ArrayRef(IncIt, NumElts), MaxVFOnly)) {
18309
+ if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(VL), MaxVFOnly)) {
18186
18310
// Success start over because instructions might have been changed.
18187
18311
Changed = true;
18312
+ VL.swap(Candidates);
18313
+ Candidates.clear();
18314
+ for (T *V : VL) {
18315
+ if (auto *I = dyn_cast<Instruction>(V); I && !R.isDeleted(I))
18316
+ Candidates.push_back(V);
18317
+ }
18188
18318
} else {
18189
18319
/// \Returns the minimum number of elements that we will attempt to
18190
18320
/// vectorize.
@@ -18195,7 +18325,10 @@ static bool tryToVectorizeSequence(
18195
18325
if (NumElts < GetMinNumElements(*IncIt) &&
18196
18326
(Candidates.empty() ||
18197
18327
Candidates.front()->getType() == (*IncIt)->getType())) {
18198
- Candidates.append(IncIt, std::next(IncIt, NumElts));
18328
+ for (T *V : VL) {
18329
+ if (auto *I = dyn_cast<Instruction>(V); I && !R.isDeleted(I))
18330
+ Candidates.push_back(V);
18331
+ }
18199
18332
}
18200
18333
}
18201
18334
// Final attempt to vectorize instructions with the same types.
@@ -18206,13 +18339,26 @@ static bool tryToVectorizeSequence(
18206
18339
Changed = true;
18207
18340
} else if (MaxVFOnly) {
18208
18341
// Try to vectorize using small vectors.
18209
- for (auto *It = Candidates.begin(), *End = Candidates.end();
18210
- It != End;) {
18342
+ SmallVector<T *> VL;
18343
+ for (auto *It = Candidates.begin(), *End = Candidates.end(); It != End;
18344
+ VL.clear()) {
18345
+ auto *I = dyn_cast<Instruction>(*It);
18346
+ if (!I || R.isDeleted(I)) {
18347
+ ++It;
18348
+ continue;
18349
+ }
18211
18350
auto *SameTypeIt = It;
18212
- while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
18351
+ while (SameTypeIt != End &&
18352
+ (!isa<Instruction>(*SameTypeIt) ||
18353
+ R.isDeleted(cast<Instruction>(*SameTypeIt)) ||
18354
+ AreCompatible(*SameTypeIt, *It))) {
18355
+ auto *I = dyn_cast<Instruction>(*SameTypeIt);
18213
18356
++SameTypeIt;
18214
- unsigned NumElts = (SameTypeIt - It);
18215
- if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(It, NumElts),
18357
+ if (I && !R.isDeleted(I))
18358
+ VL.push_back(cast<T>(I));
18359
+ }
18360
+ unsigned NumElts = VL.size();
18361
+ if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(VL),
18216
18362
/*MaxVFOnly=*/false))
18217
18363
Changed = true;
18218
18364
It = SameTypeIt;
@@ -18486,7 +18632,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
18486
18632
}
18487
18633
return false;
18488
18634
};
18489
- auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) {
18635
+ auto AreCompatiblePHIs = [&PHIToOpcodes, this, &R ](Value *V1, Value *V2) {
18490
18636
if (V1 == V2)
18491
18637
return true;
18492
18638
if (V1->getType() != V2->getType())
@@ -18501,6 +18647,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
18501
18647
continue;
18502
18648
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
18503
18649
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
18650
+ if (R.isDeleted(I1) || R.isDeleted(I2))
18651
+ return false;
18504
18652
if (I1->getParent() != I2->getParent())
18505
18653
return false;
18506
18654
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
@@ -18721,8 +18869,13 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
18721
18869
// are trying to vectorize the index computations, so the maximum number of
18722
18870
// elements is based on the size of the index expression, rather than the
18723
18871
// size of the GEP itself (the target's pointer size).
18872
+ auto *It = find_if(Entry.second, [&](GetElementPtrInst *GEP) {
18873
+ return !R.isDeleted(GEP);
18874
+ });
18875
+ if (It == Entry.second.end())
18876
+ continue;
18724
18877
unsigned MaxVecRegSize = R.getMaxVecRegSize();
18725
- unsigned EltSize = R.getVectorElementSize(*Entry.second[0] ->idx_begin());
18878
+ unsigned EltSize = R.getVectorElementSize(*(*It) ->idx_begin());
18726
18879
if (MaxVecRegSize < EltSize)
18727
18880
continue;
18728
18881
0 commit comments