@@ -19923,31 +19923,51 @@ bool SLPVectorizerPass::vectorizeStores(
19923
19923
BoUpSLP::ValueSet VectorizedStores;
19924
19924
bool Changed = false;
19925
19925
19926
+ /// A store instruction and the distance of its address to a base pointer.
19927
+ struct CandidateInstr {
19928
+ CandidateInstr(unsigned InstrIdx, int DistToBasePtr)
19929
+ : InstrIdx(InstrIdx), DistToBasePtr(DistToBasePtr) {}
19930
+ unsigned InstrIdx;
19931
+ int DistToBasePtr;
19932
+ };
19926
19933
struct StoreDistCompare {
19927
- bool operator()(const std::pair<unsigned, int> &Op1,
19928
- const std::pair<unsigned, int> &Op2) const {
19929
- return Op1.second < Op2.second ;
19934
+ bool operator()(const CandidateInstr &Op1,
19935
+ const CandidateInstr &Op2) const {
19936
+ return Op1.DistToBasePtr < Op2.DistToBasePtr ;
19930
19937
}
19931
19938
};
19932
- // A set of pairs (index of store in Stores array ref, Distance of the store
19933
- // address relative to base store address in units).
19934
- using StoreIndexToDistSet =
19935
- std::set<std::pair<unsigned, int>, StoreDistCompare>;
19936
- auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
19939
+
19940
+ /// A group of store instructions that we'll try to bundle together.
19941
+ /// They are ordered using their signed distance to the address of this
19942
+ /// set's BaseInstr.
19943
+ struct CandidateBundle {
19944
+ CandidateBundle(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
19945
+ void reset(unsigned NewBaseInstr) {
19946
+ BaseInstrIdx = NewBaseInstr;
19947
+ Instrs.clear();
19948
+ Instrs.emplace(NewBaseInstr, 0);
19949
+ }
19950
+ // TODO: This should probably just be an std::map
19951
+ using CandidateSet = std::set<CandidateInstr, StoreDistCompare>;
19952
+ unsigned BaseInstrIdx;
19953
+ CandidateSet Instrs;
19954
+ };
19955
+
19956
+ auto TryToVectorize = [&](const CandidateBundle::CandidateSet &StoreSeq) {
19937
19957
int PrevDist = -1;
19938
19958
BoUpSLP::ValueList Operands;
19939
19959
// Collect the chain into a list.
19940
- for (auto [Idx, Data] : enumerate(Set )) {
19941
- if (Operands.empty() || Data.second - PrevDist == 1) {
19942
- Operands.push_back(Stores[Data.first ]);
19943
- PrevDist = Data.second ;
19944
- if (Idx != Set .size() - 1)
19960
+ for (auto [Idx, Data] : enumerate(StoreSeq )) {
19961
+ if (Operands.empty() || Data.DistToBasePtr - PrevDist == 1) {
19962
+ Operands.push_back(Stores[Data.InstrIdx ]);
19963
+ PrevDist = Data.DistToBasePtr ;
19964
+ if (Idx != StoreSeq .size() - 1)
19945
19965
continue;
19946
19966
}
19947
19967
auto E = make_scope_exit([&, &DataVar = Data]() {
19948
19968
Operands.clear();
19949
- Operands.push_back(Stores[DataVar.first ]);
19950
- PrevDist = DataVar.second ;
19969
+ Operands.push_back(Stores[DataVar.InstrIdx ]);
19970
+ PrevDist = DataVar.DistToBasePtr ;
19951
19971
});
19952
19972
19953
19973
if (Operands.size() <= 1 ||
@@ -20214,7 +20234,8 @@ bool SLPVectorizerPass::vectorizeStores(
20214
20234
// Need to store the index of the very first store separately, since the set
20215
20235
// may be reordered after the insertion and the first store may be moved. This
20216
20236
// container allows to reduce number of calls of getPointersDiff() function.
20217
- SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
20237
+ SmallVector<CandidateBundle> SortedStores;
20238
+
20218
20239
// Inserts the specified store SI with the given index Idx to the set of the
20219
20240
// stores. If the store with the same distance is found already - stop
20220
20241
// insertion, try to vectorize already found stores. If some stores from this
@@ -20248,56 +20269,51 @@ bool SLPVectorizerPass::vectorizeStores(
20248
20269
// dependencies and no need to waste compile time to try to vectorize them.
20249
20270
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
20250
20271
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
20251
- for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
20272
+ for (CandidateBundle &StoreSeq : SortedStores) {
20252
20273
std::optional<int> Diff = getPointersDiff(
20253
- Stores[Set.first ]->getValueOperand()->getType(),
20254
- Stores[Set.first ]->getPointerOperand(),
20274
+ Stores[StoreSeq.BaseInstrIdx ]->getValueOperand()->getType(),
20275
+ Stores[StoreSeq.BaseInstrIdx ]->getPointerOperand(),
20255
20276
SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
20256
20277
/*StrictCheck=*/true);
20257
20278
if (!Diff)
20258
20279
continue;
20259
- auto It = Set.second .find(std::make_pair( Idx, *Diff) );
20260
- if (It == Set.second .end()) {
20261
- Set.second .emplace(Idx, *Diff);
20280
+ auto It = StoreSeq.Instrs .find({ Idx, *Diff} );
20281
+ if (It == StoreSeq.Instrs .end()) {
20282
+ StoreSeq.Instrs .emplace(Idx, *Diff);
20262
20283
return;
20263
20284
}
20264
20285
// Try to vectorize the first found set to avoid duplicate analysis.
20265
- TryToVectorize(Set.second);
20266
- unsigned ItIdx = It->first;
20267
- int ItDist = It->second;
20268
- StoreIndexToDistSet PrevSet;
20269
- copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()),
20270
- [&](const std::pair<unsigned, int> &Pair) {
20271
- return Pair.first > ItIdx;
20272
- });
20273
- Set.second.clear();
20274
- Set.first = Idx;
20275
- Set.second.emplace(Idx, 0);
20286
+ TryToVectorize(StoreSeq.Instrs);
20287
+ unsigned ItIdx = It->InstrIdx;
20288
+ int ItDist = It->DistToBasePtr;
20289
+ CandidateBundle::CandidateSet PrevSet;
20290
+ copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
20291
+ [&](const CandidateInstr &I) { return I.InstrIdx > ItIdx; });
20292
+ StoreSeq.reset(Idx);
20276
20293
// Insert stores that followed previous match to try to vectorize them
20277
20294
// with this store.
20278
20295
unsigned StartIdx = ItIdx + 1;
20279
20296
SmallBitVector UsedStores(Idx - StartIdx);
20280
20297
// Distances to previously found dup store (or this store, since they
20281
20298
// store to the same addresses).
20282
20299
SmallVector<int> Dists(Idx - StartIdx, 0);
20283
- for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
20300
+ for (const CandidateInstr &Store : reverse(PrevSet)) {
20284
20301
// Do not try to vectorize sequences, we already tried.
20285
- if (VectorizedStores.contains(Stores[Pair.first ]))
20302
+ if (VectorizedStores.contains(Stores[Store.InstrIdx ]))
20286
20303
break;
20287
- unsigned BI = Pair.first - StartIdx;
20304
+ unsigned BI = Store.InstrIdx - StartIdx;
20288
20305
UsedStores.set(BI);
20289
- Dists[BI] = Pair.second - ItDist;
20306
+ Dists[BI] = Store.DistToBasePtr - ItDist;
20290
20307
}
20291
20308
for (unsigned I = StartIdx; I < Idx; ++I) {
20292
20309
unsigned BI = I - StartIdx;
20293
20310
if (UsedStores.test(BI))
20294
- Set.second .emplace(I, Dists[BI]);
20311
+ StoreSeq.Instrs .emplace(I, Dists[BI]);
20295
20312
}
20296
20313
return;
20297
20314
}
20298
- auto &Res = SortedStores.emplace_back();
20299
- Res.first = Idx;
20300
- Res.second.emplace(Idx, 0);
20315
+ // We did not find a comparable store, start a new sequence.
20316
+ SortedStores.emplace_back(Idx);
20301
20317
};
20302
20318
Type *PrevValTy = nullptr;
20303
20319
for (auto [I, SI] : enumerate(Stores)) {
@@ -20307,17 +20323,17 @@ bool SLPVectorizerPass::vectorizeStores(
20307
20323
PrevValTy = SI->getValueOperand()->getType();
20308
20324
// Check that we do not try to vectorize stores of different types.
20309
20325
if (PrevValTy != SI->getValueOperand()->getType()) {
20310
- for (auto &Set : SortedStores)
20311
- TryToVectorize(Set.second );
20326
+ for (CandidateBundle &StoreSeq : SortedStores)
20327
+ TryToVectorize(StoreSeq.Instrs );
20312
20328
SortedStores.clear();
20313
20329
PrevValTy = SI->getValueOperand()->getType();
20314
20330
}
20315
20331
FillStoresSet(I, SI);
20316
20332
}
20317
20333
20318
20334
// Final vectorization attempt.
20319
- for (auto &Set : SortedStores)
20320
- TryToVectorize(Set.second );
20335
+ for (CandidateBundle &StoreSeq : SortedStores)
20336
+ TryToVectorize(StoreSeq.Instrs );
20321
20337
20322
20338
return Changed;
20323
20339
}
0 commit comments