@@ -19945,6 +19945,41 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
19945
19945
return Dev * 96 / (Mean * Mean) == 0;
19946
19946
}
19947
19947
19948
+ namespace {
19949
+
19950
+ /// A group of instructions that we'll try to bundle together using vector ops.
19951
+ /// They are ordered using the signed distance of their address operand to the
19952
+ /// address of this group's BaseInstr.
19953
+ struct RelatedStoreInsts {
19954
+ RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
19955
+ void reset(unsigned NewBaseInstr) {
19956
+ BaseInstrIdx = NewBaseInstr;
19957
+ Instrs.clear();
19958
+ insert(NewBaseInstr, 0);
19959
+ }
19960
+
19961
+ void insert(unsigned InstrIdx, int PtrDist) {
19962
+ Instrs.emplace(PtrDist, InstrIdx);
19963
+ }
19964
+
19965
+ /// Return the instruction with a pointer distance of \p PtrDist, or nullopt.
19966
+ std::optional<unsigned> getInstIdx(int PtrDist) const {
19967
+ auto It = Instrs.find(PtrDist);
19968
+ if (It != Instrs.end())
19969
+ return It->second;
19970
+ return std::nullopt;
19971
+ }
19972
+
19973
+ /// The index of the Base instruction, i.e. the one with a 0 pointer distance.
19974
+ unsigned BaseInstrIdx;
19975
+
19976
+ /// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
19977
+ using DistToInstMap = std::map<int, unsigned>;
19978
+ DistToInstMap Instrs;
19979
+ };
19980
+
19981
+ } // end anonymous namespace
19982
+
19948
19983
bool SLPVectorizerPass::vectorizeStores(
19949
19984
ArrayRef<StoreInst *> Stores, BoUpSLP &R,
19950
19985
DenseSet<std::tuple<Value *, Value *, Value *, Value *, unsigned>>
@@ -19954,31 +19989,22 @@ bool SLPVectorizerPass::vectorizeStores(
19954
19989
BoUpSLP::ValueSet VectorizedStores;
19955
19990
bool Changed = false;
19956
19991
19957
- struct StoreDistCompare {
19958
- bool operator()(const std::pair<unsigned, int> &Op1,
19959
- const std::pair<unsigned, int> &Op2) const {
19960
- return Op1.second < Op2.second;
19961
- }
19962
- };
19963
- // A set of pairs (index of store in Stores array ref, Distance of the store
19964
- // address relative to base store address in units).
19965
- using StoreIndexToDistSet =
19966
- std::set<std::pair<unsigned, int>, StoreDistCompare>;
19967
- auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
19992
+ auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
19968
19993
int PrevDist = -1;
19969
19994
BoUpSLP::ValueList Operands;
19970
19995
// Collect the chain into a list.
19971
- for (auto [Idx, Data] : enumerate(Set)) {
19972
- if (Operands.empty() || Data.second - PrevDist == 1) {
19973
- Operands.push_back(Stores[Data.first]);
19974
- PrevDist = Data.second;
19975
- if (Idx != Set.size() - 1)
19996
+ for (auto [Idx, Data] : enumerate(StoreSeq)) {
19997
+ auto &[Dist, InstIdx] = Data;
19998
+ if (Operands.empty() || Dist - PrevDist == 1) {
19999
+ Operands.push_back(Stores[InstIdx]);
20000
+ PrevDist = Dist;
20001
+ if (Idx != StoreSeq.size() - 1)
19976
20002
continue;
19977
20003
}
19978
- auto E = make_scope_exit([&, &DataVar = Data ]() {
20004
+ auto E = make_scope_exit([&, &Dist = Dist, &InstIdx = InstIdx ]() {
19979
20005
Operands.clear();
19980
- Operands.push_back(Stores[DataVar.first ]);
19981
- PrevDist = DataVar.second ;
20006
+ Operands.push_back(Stores[InstIdx ]);
20007
+ PrevDist = Dist ;
19982
20008
});
19983
20009
19984
20010
if (Operands.size() <= 1 ||
@@ -20245,7 +20271,8 @@ bool SLPVectorizerPass::vectorizeStores(
20245
20271
// Need to store the index of the very first store separately, since the set
20246
20272
// may be reordered after the insertion and the first store may be moved. This
20247
20273
// container allows to reduce number of calls of getPointersDiff() function.
20248
- SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
20274
+ SmallVector<RelatedStoreInsts> SortedStores;
20275
+
20249
20276
// Inserts the specified store SI with the given index Idx to the set of the
20250
20277
// stores. If the store with the same distance is found already - stop
20251
20278
// insertion, try to vectorize already found stores. If some stores from this
@@ -20279,56 +20306,51 @@ bool SLPVectorizerPass::vectorizeStores(
20279
20306
// dependencies and no need to waste compile time to try to vectorize them.
20280
20307
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
20281
20308
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
20282
- for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
20309
+ for (RelatedStoreInsts &StoreSeq : SortedStores) {
20283
20310
std::optional<int> Diff = getPointersDiff(
20284
- Stores[Set.first ]->getValueOperand()->getType(),
20285
- Stores[Set.first ]->getPointerOperand(),
20311
+ Stores[StoreSeq.BaseInstrIdx ]->getValueOperand()->getType(),
20312
+ Stores[StoreSeq.BaseInstrIdx ]->getPointerOperand(),
20286
20313
SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
20287
20314
/*StrictCheck=*/true);
20288
20315
if (!Diff)
20289
20316
continue;
20290
- auto It = Set.second.find(std::make_pair(Idx, * Diff) );
20291
- if (It == Set.second.end() ) {
20292
- Set.second.emplace (Idx, *Diff);
20317
+ std::optional<unsigned> PrevInst = StoreSeq.getInstIdx(/*PtrDist=*/* Diff);
20318
+ if (!PrevInst ) {
20319
+ StoreSeq.insert (Idx, *Diff);
20293
20320
return;
20294
20321
}
20295
20322
// Try to vectorize the first found set to avoid duplicate analysis.
20296
- TryToVectorize(Set.second);
20297
- unsigned ItIdx = It->first;
20298
- int ItDist = It->second;
20299
- StoreIndexToDistSet PrevSet;
20300
- copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()),
20301
- [&](const std::pair<unsigned, int> &Pair) {
20302
- return Pair.first > ItIdx;
20323
+ TryToVectorize(StoreSeq.Instrs);
20324
+ RelatedStoreInsts::DistToInstMap PrevSet;
20325
+ copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
20326
+ [&](const std::pair<int, unsigned> &DistAndIdx) {
20327
+ return DistAndIdx.second > *PrevInst;
20303
20328
});
20304
- Set.second.clear();
20305
- Set.first = Idx;
20306
- Set.second.emplace(Idx, 0);
20329
+ StoreSeq.reset(Idx);
20307
20330
// Insert stores that followed previous match to try to vectorize them
20308
20331
// with this store.
20309
- unsigned StartIdx = ItIdx + 1;
20332
+ unsigned StartIdx = *PrevInst + 1;
20310
20333
SmallBitVector UsedStores(Idx - StartIdx);
20311
20334
// Distances to previously found dup store (or this store, since they
20312
20335
// store to the same addresses).
20313
20336
SmallVector<int> Dists(Idx - StartIdx, 0);
20314
- for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
20337
+ for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
20315
20338
// Do not try to vectorize sequences, we already tried.
20316
- if (VectorizedStores.contains(Stores[Pair.first ]))
20339
+ if (VectorizedStores.contains(Stores[InstIdx ]))
20317
20340
break;
20318
- unsigned BI = Pair.first - StartIdx;
20341
+ unsigned BI = InstIdx - StartIdx;
20319
20342
UsedStores.set(BI);
20320
- Dists[BI] = Pair.second - ItDist ;
20343
+ Dists[BI] = PtrDist - *Diff ;
20321
20344
}
20322
20345
for (unsigned I = StartIdx; I < Idx; ++I) {
20323
20346
unsigned BI = I - StartIdx;
20324
20347
if (UsedStores.test(BI))
20325
- Set.second.emplace (I, Dists[BI]);
20348
+ StoreSeq.insert (I, Dists[BI]);
20326
20349
}
20327
20350
return;
20328
20351
}
20329
- auto &Res = SortedStores.emplace_back();
20330
- Res.first = Idx;
20331
- Res.second.emplace(Idx, 0);
20352
+ // We did not find a comparable store, start a new sequence.
20353
+ SortedStores.emplace_back(Idx);
20332
20354
};
20333
20355
Type *PrevValTy = nullptr;
20334
20356
for (auto [I, SI] : enumerate(Stores)) {
@@ -20338,17 +20360,17 @@ bool SLPVectorizerPass::vectorizeStores(
20338
20360
PrevValTy = SI->getValueOperand()->getType();
20339
20361
// Check that we do not try to vectorize stores of different types.
20340
20362
if (PrevValTy != SI->getValueOperand()->getType()) {
20341
- for (auto &Set : SortedStores)
20342
- TryToVectorize(Set.second );
20363
+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20364
+ TryToVectorize(StoreSeq.Instrs );
20343
20365
SortedStores.clear();
20344
20366
PrevValTy = SI->getValueOperand()->getType();
20345
20367
}
20346
20368
FillStoresSet(I, SI);
20347
20369
}
20348
20370
20349
20371
// Final vectorization attempt.
20350
- for (auto &Set : SortedStores)
20351
- TryToVectorize(Set.second );
20372
+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20373
+ TryToVectorize(StoreSeq.Instrs );
20352
20374
20353
20375
return Changed;
20354
20376
}
0 commit comments