@@ -1283,11 +1283,8 @@ class BoUpSLP {
1283
1283
/// Vectorize the tree but with the list of externally used values \p
1284
1284
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
1285
1285
/// generated extractvalue instructions.
1286
- /// \param ReplacedExternals containd list of replaced external values
1287
- /// {scalar, replace} after emitting extractelement for external uses.
1288
1286
Value *
1289
1287
vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
1290
- SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
1291
1288
Instruction *ReductionRoot = nullptr);
1292
1289
1293
1290
/// \returns the cost incurred by unwanted spills and fills, caused by
@@ -14222,14 +14219,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
14222
14219
14223
14220
Value *BoUpSLP::vectorizeTree() {
14224
14221
ExtraValueToDebugLocsMap ExternallyUsedValues;
14225
- SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
14226
- return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
14222
+ return vectorizeTree(ExternallyUsedValues);
14227
14223
}
14228
14224
14229
- Value *BoUpSLP::vectorizeTree(
14230
- const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14231
- SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
14232
- Instruction *ReductionRoot) {
14225
+ Value *
14226
+ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14227
+ Instruction *ReductionRoot) {
14233
14228
// All blocks must be scheduled before any instructions are inserted.
14234
14229
for (auto &BSIter : BlocksSchedules) {
14235
14230
scheduleBlock(BSIter.second.get());
@@ -14373,6 +14368,7 @@ Value *BoUpSLP::vectorizeTree(
14373
14368
SmallDenseSet<Value *, 4> UsedInserts;
14374
14369
DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
14375
14370
SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
14371
+ SmallDenseSet<ExtractElementInst *, 4> IgnoredExtracts;
14376
14372
// Extract all of the elements with the external uses.
14377
14373
for (const auto &ExternalUse : ExternalUses) {
14378
14374
Value *Scalar = ExternalUse.Scalar;
@@ -14426,11 +14422,16 @@ Value *BoUpSLP::vectorizeTree(
14426
14422
if (ReplaceInst) {
14427
14423
// Leave the instruction as is, if it cheaper extracts and all
14428
14424
// operands are scalar.
14429
- auto *CloneInst = Inst->clone();
14430
- CloneInst->insertBefore(Inst);
14431
- if (Inst->hasName())
14432
- CloneInst->takeName(Inst);
14433
- Ex = CloneInst;
14425
+ if (auto *EE = dyn_cast<ExtractElementInst>(Inst)) {
14426
+ IgnoredExtracts.insert(EE);
14427
+ Ex = EE;
14428
+ } else {
14429
+ auto *CloneInst = Inst->clone();
14430
+ CloneInst->insertBefore(Inst);
14431
+ if (Inst->hasName())
14432
+ CloneInst->takeName(Inst);
14433
+ Ex = CloneInst;
14434
+ }
14434
14435
} else if (auto *ES = dyn_cast<ExtractElementInst>(Scalar);
14435
14436
ES && isa<Instruction>(Vec)) {
14436
14437
Value *V = ES->getVectorOperand();
@@ -14530,8 +14531,12 @@ Value *BoUpSLP::vectorizeTree(
14530
14531
}
14531
14532
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
14532
14533
// Required to update internally referenced instructions.
14533
- Scalar->replaceAllUsesWith(NewInst);
14534
- ReplacedExternals.emplace_back(Scalar, NewInst);
14534
+ if (Scalar != NewInst) {
14535
+ assert((!isa<ExtractElementInst>(Scalar) ||
14536
+ !IgnoredExtracts.contains(cast<ExtractElementInst>(Scalar))) &&
14537
+ "Extractelements should not be replaced.");
14538
+ Scalar->replaceAllUsesWith(NewInst);
14539
+ }
14535
14540
continue;
14536
14541
}
14537
14542
@@ -14757,6 +14762,9 @@ Value *BoUpSLP::vectorizeTree(
14757
14762
if (Entry->getOpcode() == Instruction::GetElementPtr &&
14758
14763
!isa<GetElementPtrInst>(Scalar))
14759
14764
continue;
14765
+ if (auto *EE = dyn_cast<ExtractElementInst>(Scalar);
14766
+ EE && IgnoredExtracts.contains(EE))
14767
+ continue;
14760
14768
#ifndef NDEBUG
14761
14769
Type *Ty = Scalar->getType();
14762
14770
if (!Ty->isVoidTy()) {
@@ -17660,7 +17668,6 @@ class HorizontalReduction {
17660
17668
// because of the vectorization.
17661
17669
DenseMap<Value *, WeakTrackingVH> TrackedVals(ReducedVals.size() *
17662
17670
ReducedVals.front().size());
17663
- SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
17664
17671
17665
17672
// The compare instruction of a min/max is the insertion point for new
17666
17673
// instructions and may be replaced with a new compare instruction.
@@ -17956,6 +17963,8 @@ class HorizontalReduction {
17956
17963
if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
17957
17964
continue;
17958
17965
Value *RdxVal = Candidates[Cnt];
17966
+ if (auto It = TrackedVals.find(RdxVal); It != TrackedVals.end())
17967
+ RdxVal = It->second;
17959
17968
if (!Visited.insert(RdxVal).second)
17960
17969
continue;
17961
17970
// Check if the scalar was vectorized as part of the vectorization
@@ -18024,8 +18033,8 @@ class HorizontalReduction {
18024
18033
InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
18025
18034
18026
18035
// Vectorize a tree.
18027
- Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
18028
- ReplacedExternals , InsertPt);
18036
+ Value *VectorizedRoot =
18037
+ V.vectorizeTree(LocalExternallyUsedValues , InsertPt);
18029
18038
18030
18039
Builder.SetInsertPoint(InsertPt);
18031
18040
0 commit comments