@@ -10653,6 +10653,17 @@ static T *performExtractsShuffleAction(
10653
10653
return Prev;
10654
10654
}
10655
10655
10656
+ namespace {
10657
+ /// Data type for handling buildvector sequences with the reused scalars from
10658
+ /// other tree entries.
10659
+ template <typename T> struct ShuffledInsertData {
10660
+ /// List of insertelements to be replaced by shuffles.
10661
+ SmallVector<InsertElementInst *> InsertElements;
10662
+ /// The parent vectors and shuffle mask for the given list of inserts.
10663
+ MapVector<T, SmallVector<int>> ValueMasks;
10664
+ };
10665
+ } // namespace
10666
+
10656
10667
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10657
10668
InstructionCost Cost = 0;
10658
10669
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -10694,8 +10705,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10694
10705
10695
10706
SmallPtrSet<Value *, 16> ExtractCostCalculated;
10696
10707
InstructionCost ExtractCost = 0;
10697
- SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
10698
- SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
10708
+ SmallVector<ShuffledInsertData<const TreeEntry *>> ShuffledInserts;
10699
10709
SmallVector<APInt> DemandedElts;
10700
10710
SmallDenseSet<Value *, 4> UsedInserts;
10701
10711
DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
@@ -10732,48 +10742,24 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10732
10742
if (InsertIdx) {
10733
10743
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
10734
10744
auto *It = find_if(
10735
- FirstUsers,
10736
- [this, VU](const std::pair<Value *, const TreeEntry *> &Pair) {
10745
+ ShuffledInserts,
10746
+ [this, VU](const ShuffledInsertData<const TreeEntry *> &Data) {
10747
+ // Checks if 2 insertelements are from the same buildvector.
10748
+ InsertElementInst *VecInsert = Data.InsertElements.front();
10737
10749
return areTwoInsertFromSameBuildVector(
10738
- VU, cast<InsertElementInst>(Pair.first),
10739
- [this](InsertElementInst *II) -> Value * {
10750
+ VU, VecInsert, [this](InsertElementInst *II) -> Value * {
10740
10751
Value *Op0 = II->getOperand(0);
10741
10752
if (getTreeEntry(II) && !getTreeEntry(Op0))
10742
10753
return nullptr;
10743
10754
return Op0;
10744
10755
});
10745
10756
});
10746
10757
int VecId = -1;
10747
- if (It == FirstUsers.end()) {
10748
- (void)ShuffleMasks.emplace_back();
10749
- SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
10750
- if (Mask.empty())
10751
- Mask.assign(FTy->getNumElements(), PoisonMaskElem);
10752
- // Find the insertvector, vectorized in tree, if any.
10753
- Value *Base = VU;
10754
- while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
10755
- if (IEBase != EU.User &&
10756
- (!IEBase->hasOneUse() ||
10757
- getElementIndex(IEBase).value_or(*InsertIdx) == *InsertIdx))
10758
- break;
10759
- // Build the mask for the vectorized insertelement instructions.
10760
- if (const TreeEntry *E = getTreeEntry(IEBase)) {
10761
- VU = IEBase;
10762
- do {
10763
- IEBase = cast<InsertElementInst>(Base);
10764
- int Idx = *getElementIndex(IEBase);
10765
- assert(Mask[Idx] == PoisonMaskElem &&
10766
- "InsertElementInstruction used already.");
10767
- Mask[Idx] = Idx;
10768
- Base = IEBase->getOperand(0);
10769
- } while (E == getTreeEntry(Base));
10770
- break;
10771
- }
10772
- Base = cast<InsertElementInst>(Base)->getOperand(0);
10773
- }
10774
- FirstUsers.emplace_back(VU, ScalarTE);
10758
+ if (It == ShuffledInserts.end()) {
10759
+ auto &Data = ShuffledInserts.emplace_back();
10760
+ Data.InsertElements.emplace_back(VU);
10775
10761
DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
10776
- VecId = FirstUsers .size() - 1;
10762
+ VecId = ShuffledInserts .size() - 1;
10777
10763
auto It = MinBWs.find(ScalarTE);
10778
10764
if (It != MinBWs.end() &&
10779
10765
VectorCasts
@@ -10799,12 +10785,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10799
10785
Cost += C;
10800
10786
}
10801
10787
} else {
10802
- if (isFirstInsertElement(VU, cast<InsertElementInst>( It->first )))
10803
- It->first = VU;
10804
- VecId = std::distance(FirstUsers .begin(), It);
10788
+ if (isFirstInsertElement(VU, It->InsertElements.front( )))
10789
+ It->InsertElements.front() = VU;
10790
+ VecId = std::distance(ShuffledInserts .begin(), It);
10805
10791
}
10806
10792
int InIdx = *InsertIdx;
10807
- SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
10793
+ SmallVectorImpl<int> &Mask =
10794
+ ShuffledInserts[VecId].ValueMasks[ScalarTE];
10808
10795
if (Mask.empty())
10809
10796
Mask.assign(FTy->getNumElements(), PoisonMaskElem);
10810
10797
Mask[InIdx] = EU.Lane;
@@ -10978,9 +10965,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10978
10965
return std::make_pair(TE, false);
10979
10966
};
10980
10967
// Calculate the cost of the reshuffled vectors, if any.
10981
- for (int I = 0, E = FirstUsers .size(); I < E; ++I) {
10982
- Value *Base = cast<Instruction>(FirstUsers [I].first )->getOperand(0);
10983
- auto Vector = ShuffleMasks [I].takeVector();
10968
+ for (int I = 0, E = ShuffledInserts .size(); I < E; ++I) {
10969
+ Value *Base = ShuffledInserts [I].InsertElements.front( )->getOperand(0);
10970
+ auto Vector = ShuffledInserts [I].ValueMasks .takeVector();
10984
10971
unsigned VF = 0;
10985
10972
auto EstimateShufflesCost = [&](ArrayRef<int> Mask,
10986
10973
ArrayRef<const TreeEntry *> TEs) {
@@ -11031,7 +11018,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
11031
11018
[](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF,
11032
11019
EstimateShufflesCost);
11033
11020
InstructionCost InsertCost = TTI->getScalarizationOverhead(
11034
- cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
11021
+ cast<FixedVectorType>(
11022
+ ShuffledInserts[I].InsertElements.front()->getType()),
11023
+ DemandedElts[I],
11035
11024
/*Insert*/ true, /*Extract*/ false, TTI::TCK_RecipThroughput);
11036
11025
Cost -= InsertCost;
11037
11026
}
@@ -14131,17 +14120,6 @@ Value *BoUpSLP::vectorizeTree() {
14131
14120
return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
14132
14121
}
14133
14122
14134
- namespace {
14135
- /// Data type for handling buildvector sequences with the reused scalars from
14136
- /// other tree entries.
14137
- struct ShuffledInsertData {
14138
- /// List of insertelements to be replaced by shuffles.
14139
- SmallVector<InsertElementInst *> InsertElements;
14140
- /// The parent vectors and shuffle mask for the given list of inserts.
14141
- MapVector<Value *, SmallVector<int>> ValueMasks;
14142
- };
14143
- } // namespace
14144
-
14145
14123
Value *BoUpSLP::vectorizeTree(
14146
14124
const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14147
14125
SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
@@ -14279,7 +14257,7 @@ Value *BoUpSLP::vectorizeTree(
14279
14257
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
14280
14258
<< " values .\n");
14281
14259
14282
- SmallVector<ShuffledInsertData> ShuffledInserts;
14260
+ SmallVector<ShuffledInsertData<Value *> > ShuffledInserts;
14283
14261
// Maps vector instruction to original insertelement instruction
14284
14262
DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
14285
14263
// Maps extract Scalar to the corresponding extractelement instruction in the
@@ -14492,8 +14470,8 @@ Value *BoUpSLP::vectorizeTree(
14492
14470
14493
14471
std::optional<unsigned> InsertIdx = getElementIndex(VU);
14494
14472
if (InsertIdx) {
14495
- auto *It =
14496
- find_if( ShuffledInserts, [VU](const ShuffledInsertData &Data) {
14473
+ auto *It = find_if(
14474
+ ShuffledInserts, [VU](const ShuffledInsertData<Value *> &Data) {
14497
14475
// Checks if 2 insertelements are from the same buildvector.
14498
14476
InsertElementInst *VecInsert = Data.InsertElements.front();
14499
14477
return areTwoInsertFromSameBuildVector(
@@ -14505,36 +14483,6 @@ Value *BoUpSLP::vectorizeTree(
14505
14483
(void)ShuffledInserts.emplace_back();
14506
14484
It = std::next(ShuffledInserts.begin(),
14507
14485
ShuffledInserts.size() - 1);
14508
- SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
14509
- if (Mask.empty())
14510
- Mask.assign(FTy->getNumElements(), PoisonMaskElem);
14511
- // Find the insertvector, vectorized in tree, if any.
14512
- Value *Base = VU;
14513
- while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
14514
- if (IEBase != User &&
14515
- (!IEBase->hasOneUse() ||
14516
- getElementIndex(IEBase).value_or(Idx) == Idx))
14517
- break;
14518
- // Build the mask for the vectorized insertelement instructions.
14519
- if (const TreeEntry *E = getTreeEntry(IEBase)) {
14520
- do {
14521
- IEBase = cast<InsertElementInst>(Base);
14522
- int IEIdx = *getElementIndex(IEBase);
14523
- assert(Mask[IEIdx] == PoisonMaskElem &&
14524
- "InsertElementInstruction used already.");
14525
- Mask[IEIdx] = IEIdx;
14526
- Base = IEBase->getOperand(0);
14527
- } while (E == getTreeEntry(Base));
14528
- break;
14529
- }
14530
- Base = cast<InsertElementInst>(Base)->getOperand(0);
14531
- // After the vectorization the def-use chain has changed, need
14532
- // to look through original insertelement instructions, if they
14533
- // get replaced by vector instructions.
14534
- auto It = VectorToInsertElement.find(Base);
14535
- if (It != VectorToInsertElement.end())
14536
- Base = It->second;
14537
- }
14538
14486
}
14539
14487
SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
14540
14488
if (Mask.empty())
0 commit comments