@@ -20859,9 +20859,16 @@ namespace {
20859
20859
/// A group of stores that we'll try to bundle together using vector ops.
20860
20860
/// They are ordered using the signed distance of their address operand to the
20861
20861
/// address of this group's BaseInstr.
20862
- struct RelatedStoreInsts {
20863
- RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
20862
+ class RelatedStoreInsts {
20863
+ public:
20864
+ RelatedStoreInsts(unsigned BaseInstrIdx, ArrayRef<StoreInst *> AllStores)
20865
+ : AllStores(AllStores) {
20866
+ reset(BaseInstrIdx);
20867
+ }
20868
+
20864
20869
void reset(unsigned NewBaseInstr) {
20870
+ assert(NewBaseInstr < AllStores.size() &&
20871
+ "Instruction index out of bounds");
20865
20872
BaseInstrIdx = NewBaseInstr;
20866
20873
Instrs.clear();
20867
20874
insertOrLookup(NewBaseInstr, 0);
@@ -20876,12 +20883,58 @@ struct RelatedStoreInsts {
20876
20883
return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
20877
20884
}
20878
20885
20886
+ using DistToInstMap = std::map<int, unsigned>;
20887
+ const DistToInstMap &getStores() const { return Instrs; }
20888
+
20889
+ /// If \p SI is related to this group of stores, return the distance of its
20890
+ /// pointer operand to the one the group's BaseInstr.
20891
+ std::optional<int> getPointerDiff(StoreInst &SI, const DataLayout &DL,
20892
+ ScalarEvolution &SE) const {
20893
+ StoreInst &BaseStore = *AllStores[BaseInstrIdx];
20894
+ return getPointersDiff(
20895
+ BaseStore.getValueOperand()->getType(), BaseStore.getPointerOperand(),
20896
+ SI.getValueOperand()->getType(), SI.getPointerOperand(), DL, SE,
20897
+ /*StrictCheck=*/true);
20898
+ }
20899
+
20900
+ /// Recompute the pointer distances to be based on \p NewBaseInstIdx.
20901
+ /// Stores whose index is less than \p MinSafeIdx will be dropped.
20902
+ void rebase(unsigned MinSafeIdx, unsigned NewBaseInstIdx,
20903
+ int DistFromCurBase) {
20904
+ DistToInstMap PrevSet = std::move(Instrs);
20905
+ reset(NewBaseInstIdx);
20906
+
20907
+ // Re-insert stores that come after MinSafeIdx to try and vectorize them
20908
+ // again. Their distance will be "rebased" to use NewBaseInstIdx as
20909
+ // reference.
20910
+ for (auto [Dist, InstIdx] : PrevSet) {
20911
+ if (InstIdx >= MinSafeIdx)
20912
+ insertOrLookup(InstIdx, Dist - DistFromCurBase);
20913
+ }
20914
+ }
20915
+
20916
+ /// Remove all stores that have been vectorized from this group.
20917
+ void clearVectorizedStores(const BoUpSLP::ValueSet &VectorizedStores) {
20918
+ DistToInstMap::reverse_iterator LastVectorizedStore = find_if(
20919
+ reverse(Instrs), [&](const std::pair<int, unsigned> &DistAndIdx) {
20920
+ return VectorizedStores.contains(AllStores[DistAndIdx.second]);
20921
+ });
20922
+
20923
+ // Get a forward iterator pointing after the last vectorized store and erase
20924
+ // all stores before it so we don't try to vectorize them again.
20925
+ DistToInstMap::iterator VectorizedStoresEnd = LastVectorizedStore.base();
20926
+ Instrs.erase(Instrs.begin(), VectorizedStoresEnd);
20927
+ }
20928
+
20929
+ private:
20879
20930
/// The index of the Base instruction, i.e. the one with a 0 pointer distance.
20880
20931
unsigned BaseInstrIdx;
20881
20932
20882
20933
/// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
20883
- using DistToInstMap = std::map<int, unsigned>;
20884
20934
DistToInstMap Instrs;
20935
+
20936
+ /// Reference to all the stores in the BB being analyzed.
20937
+ ArrayRef<StoreInst *> AllStores;
20885
20938
};
20886
20939
20887
20940
} // end anonymous namespace
@@ -21165,14 +21218,7 @@ bool SLPVectorizerPass::vectorizeStores(
21165
21218
}
21166
21219
};
21167
21220
21168
- // Stores pair (first: index of the store into Stores array ref, address of
21169
- // which taken as base, second: sorted set of pairs {index, dist}, which are
21170
- // indices of stores in the set and their store location distances relative to
21171
- // the base address).
21172
-
21173
- // Need to store the index of the very first store separately, since the set
21174
- // may be reordered after the insertion and the first store may be moved. This
21175
- // container allows to reduce number of calls of getPointersDiff() function.
21221
+ /// Groups of stores to vectorize
21176
21222
SmallVector<RelatedStoreInsts> SortedStores;
21177
21223
21178
21224
// Inserts the specified store SI with the given index Idx to the set of the
@@ -21208,52 +21254,30 @@ bool SLPVectorizerPass::vectorizeStores(
21208
21254
// dependencies and no need to waste compile time to try to vectorize them.
21209
21255
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
21210
21256
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
21211
- for (RelatedStoreInsts &StoreSeq : SortedStores) {
21212
- std::optional<int> Diff = getPointersDiff(
21213
- Stores[StoreSeq.BaseInstrIdx]->getValueOperand()->getType(),
21214
- Stores[StoreSeq.BaseInstrIdx]->getPointerOperand(),
21215
- SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
21216
- /*StrictCheck=*/true);
21217
- if (!Diff)
21218
- continue;
21219
- std::optional<unsigned> PrevInst =
21220
- StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff);
21221
- if (!PrevInst) {
21222
- // No store was associated to that distance. Keep collecting.
21223
- return;
21224
- }
21225
- // Try to vectorize the first found set to avoid duplicate analysis.
21226
- TryToVectorize(StoreSeq.Instrs);
21227
- RelatedStoreInsts::DistToInstMap PrevSet;
21228
- copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
21229
- [&](const std::pair<int, unsigned> &DistAndIdx) {
21230
- return DistAndIdx.second > *PrevInst;
21231
- });
21232
- StoreSeq.reset(Idx);
21233
- // Insert stores that followed previous match to try to vectorize them
21234
- // with this store.
21235
- unsigned StartIdx = *PrevInst + 1;
21236
- SmallBitVector UsedStores(Idx - StartIdx);
21237
- // Distances to previously found dup store (or this store, since they
21238
- // store to the same addresses).
21239
- SmallVector<int> Dists(Idx - StartIdx, 0);
21240
- for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
21241
- // Do not try to vectorize sequences, we already tried.
21242
- if (VectorizedStores.contains(Stores[InstIdx]))
21243
- break;
21244
- unsigned BI = InstIdx - StartIdx;
21245
- UsedStores.set(BI);
21246
- Dists[BI] = PtrDist - *Diff;
21247
- }
21248
- for (unsigned I = StartIdx; I < Idx; ++I) {
21249
- unsigned BI = I - StartIdx;
21250
- if (UsedStores.test(BI))
21251
- StoreSeq.insertOrLookup(I, Dists[BI]);
21252
- }
21257
+ std::optional<int> PtrDist;
21258
+ auto *RelatedStores = find_if(
21259
+ SortedStores, [&PtrDist, SI, this](const RelatedStoreInsts &StoreSeq) {
21260
+ PtrDist = StoreSeq.getPointerDiff(*SI, *DL, *SE);
21261
+ return PtrDist.has_value();
21262
+ });
21263
+
21264
+ // We did not find a comparable store, start a new group.
21265
+ if (RelatedStores == SortedStores.end()) {
21266
+ SortedStores.emplace_back(Idx, Stores);
21253
21267
return;
21254
21268
}
21255
- // We did not find a comparable store, start a new sequence.
21256
- SortedStores.emplace_back(Idx);
21269
+
21270
+ // If there is already a store in the group with the same PtrDiff, try to
21271
+ // vectorize the existing instructions before adding the current store.
21272
+ // Otherwise, insert this store and keep collecting.
21273
+ if (std::optional<unsigned> PrevInst =
21274
+ RelatedStores->insertOrLookup(Idx, *PtrDist)) {
21275
+ TryToVectorize(RelatedStores->getStores());
21276
+ RelatedStores->clearVectorizedStores(VectorizedStores);
21277
+ RelatedStores->rebase(/*MinSafeIdx=*/*PrevInst + 1,
21278
+ /*NewBaseInstIdx=*/Idx,
21279
+ /*DistFromCurBase=*/*PtrDist);
21280
+ }
21257
21281
};
21258
21282
Type *PrevValTy = nullptr;
21259
21283
for (auto [I, SI] : enumerate(Stores)) {
@@ -21264,7 +21288,7 @@ bool SLPVectorizerPass::vectorizeStores(
21264
21288
// Check that we do not try to vectorize stores of different types.
21265
21289
if (PrevValTy != SI->getValueOperand()->getType()) {
21266
21290
for (RelatedStoreInsts &StoreSeq : SortedStores)
21267
- TryToVectorize(StoreSeq.Instrs );
21291
+ TryToVectorize(StoreSeq.getStores() );
21268
21292
SortedStores.clear();
21269
21293
PrevValTy = SI->getValueOperand()->getType();
21270
21294
}
@@ -21273,7 +21297,7 @@ bool SLPVectorizerPass::vectorizeStores(
21273
21297
21274
21298
// Final vectorization attempt.
21275
21299
for (RelatedStoreInsts &StoreSeq : SortedStores)
21276
- TryToVectorize(StoreSeq.Instrs );
21300
+ TryToVectorize(StoreSeq.getStores() );
21277
21301
21278
21302
return Changed;
21279
21303
}
0 commit comments