@@ -19994,6 +19994,38 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
19994
19994
return Dev * 96 / (Mean * Mean) == 0;
19995
19995
}
19996
19996
19997
+ namespace {
19998
+
19999
+ /// A group of instructions that we'll try to bundle together using vector ops.
20000
+ /// They are ordered using the signed distance of their address operand to the
20001
+ /// address of this group's BaseInstr.
20002
+ struct RelatedStoreInsts {
20003
+ RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
20004
+ void reset(unsigned NewBaseInstr) {
20005
+ BaseInstrIdx = NewBaseInstr;
20006
+ Instrs.clear();
20007
+ insertOrLookup(NewBaseInstr, 0);
20008
+ }
20009
+
20010
+ /// Tries to insert \p InstrIdx as the instruction with a pointer distance of
20011
+ /// \p PtrDist.
20012
+ /// Does nothing if there is already an instruction with that \p PtrDist.
20013
+ /// \returns The previously associated Instruction index, or std::nullopt
20014
+ std::optional<unsigned> insertOrLookup(unsigned InstrIdx, int PtrDist) {
20015
+ auto [It, Inserted] = Instrs.emplace(PtrDist, InstrIdx);
20016
+ return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
20017
+ }
20018
+
20019
+ /// The index of the Base instruction, i.e. the one with a 0 pointer distance.
20020
+ unsigned BaseInstrIdx;
20021
+
20022
+ /// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
20023
+ using DistToInstMap = std::map<int, unsigned>;
20024
+ DistToInstMap Instrs;
20025
+ };
20026
+
20027
+ } // end anonymous namespace
20028
+
19997
20029
bool SLPVectorizerPass::vectorizeStores(
19998
20030
ArrayRef<StoreInst *> Stores, BoUpSLP &R,
19999
20031
DenseSet<std::tuple<Value *, Value *, Value *, Value *, unsigned>>
@@ -20003,31 +20035,22 @@ bool SLPVectorizerPass::vectorizeStores(
20003
20035
BoUpSLP::ValueSet VectorizedStores;
20004
20036
bool Changed = false;
20005
20037
20006
- struct StoreDistCompare {
20007
- bool operator()(const std::pair<unsigned, int> &Op1,
20008
- const std::pair<unsigned, int> &Op2) const {
20009
- return Op1.second < Op2.second;
20010
- }
20011
- };
20012
- // A set of pairs (index of store in Stores array ref, Distance of the store
20013
- // address relative to base store address in units).
20014
- using StoreIndexToDistSet =
20015
- std::set<std::pair<unsigned, int>, StoreDistCompare>;
20016
- auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
20038
+ auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
20017
20039
int PrevDist = -1;
20018
20040
BoUpSLP::ValueList Operands;
20019
20041
// Collect the chain into a list.
20020
- for (auto [Idx, Data] : enumerate(Set)) {
20021
- if (Operands.empty() || Data.second - PrevDist == 1) {
20022
- Operands.push_back(Stores[Data.first]);
20023
- PrevDist = Data.second;
20024
- if (Idx != Set.size() - 1)
20042
+ for (auto [Idx, Data] : enumerate(StoreSeq)) {
20043
+ auto &[Dist, InstIdx] = Data;
20044
+ if (Operands.empty() || Dist - PrevDist == 1) {
20045
+ Operands.push_back(Stores[InstIdx]);
20046
+ PrevDist = Dist;
20047
+ if (Idx != StoreSeq.size() - 1)
20025
20048
continue;
20026
20049
}
20027
- auto E = make_scope_exit([&, &DataVar = Data ]() {
20050
+ auto E = make_scope_exit([&, &Dist = Dist, &InstIdx = InstIdx ]() {
20028
20051
Operands.clear();
20029
- Operands.push_back(Stores[DataVar.first ]);
20030
- PrevDist = DataVar.second ;
20052
+ Operands.push_back(Stores[InstIdx ]);
20053
+ PrevDist = Dist ;
20031
20054
});
20032
20055
20033
20056
if (Operands.size() <= 1 ||
@@ -20294,7 +20317,8 @@ bool SLPVectorizerPass::vectorizeStores(
20294
20317
// Need to store the index of the very first store separately, since the set
20295
20318
// may be reordered after the insertion and the first store may be moved. This
20296
20319
// container allows to reduce number of calls of getPointersDiff() function.
20297
- SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
20320
+ SmallVector<RelatedStoreInsts> SortedStores;
20321
+
20298
20322
// Inserts the specified store SI with the given index Idx to the set of the
20299
20323
// stores. If the store with the same distance is found already - stop
20300
20324
// insertion, try to vectorize already found stores. If some stores from this
@@ -20328,56 +20352,52 @@ bool SLPVectorizerPass::vectorizeStores(
20328
20352
// dependencies and no need to waste compile time to try to vectorize them.
20329
20353
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
20330
20354
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
20331
- for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
20355
+ for (RelatedStoreInsts &StoreSeq : SortedStores) {
20332
20356
std::optional<int> Diff = getPointersDiff(
20333
- Stores[Set.first ]->getValueOperand()->getType(),
20334
- Stores[Set.first ]->getPointerOperand(),
20357
+ Stores[StoreSeq.BaseInstrIdx ]->getValueOperand()->getType(),
20358
+ Stores[StoreSeq.BaseInstrIdx ]->getPointerOperand(),
20335
20359
SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
20336
20360
/*StrictCheck=*/true);
20337
20361
if (!Diff)
20338
20362
continue;
20339
- auto It = Set.second.find(std::make_pair(Idx, *Diff));
20340
- if (It == Set.second.end()) {
20341
- Set.second.emplace(Idx, *Diff);
20363
+ std::optional<unsigned> PrevInst =
20364
+ StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff);
20365
+ if (!PrevInst) {
20366
+ // No store was associated to that distance. Keep collecting.
20342
20367
return;
20343
20368
}
20344
20369
// Try to vectorize the first found set to avoid duplicate analysis.
20345
- TryToVectorize(Set.second);
20346
- unsigned ItIdx = It->first;
20347
- int ItDist = It->second;
20348
- StoreIndexToDistSet PrevSet;
20349
- copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()),
20350
- [&](const std::pair<unsigned, int> &Pair) {
20351
- return Pair.first > ItIdx;
20370
+ TryToVectorize(StoreSeq.Instrs);
20371
+ RelatedStoreInsts::DistToInstMap PrevSet;
20372
+ copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
20373
+ [&](const std::pair<int, unsigned> &DistAndIdx) {
20374
+ return DistAndIdx.second > *PrevInst;
20352
20375
});
20353
- Set.second.clear();
20354
- Set.first = Idx;
20355
- Set.second.emplace(Idx, 0);
20376
+ StoreSeq.reset(Idx);
20356
20377
// Insert stores that followed previous match to try to vectorize them
20357
20378
// with this store.
20358
- unsigned StartIdx = ItIdx + 1;
20379
+ unsigned StartIdx = *PrevInst + 1;
20359
20380
SmallBitVector UsedStores(Idx - StartIdx);
20360
20381
// Distances to previously found dup store (or this store, since they
20361
20382
// store to the same addresses).
20362
20383
SmallVector<int> Dists(Idx - StartIdx, 0);
20363
- for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
20384
+ for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
20364
20385
// Do not try to vectorize sequences, we already tried.
20365
- if (VectorizedStores.contains(Stores[Pair.first ]))
20386
+ if (VectorizedStores.contains(Stores[InstIdx ]))
20366
20387
break;
20367
- unsigned BI = Pair.first - StartIdx;
20388
+ unsigned BI = InstIdx - StartIdx;
20368
20389
UsedStores.set(BI);
20369
- Dists[BI] = Pair.second - ItDist ;
20390
+ Dists[BI] = PtrDist - *Diff ;
20370
20391
}
20371
20392
for (unsigned I = StartIdx; I < Idx; ++I) {
20372
20393
unsigned BI = I - StartIdx;
20373
20394
if (UsedStores.test(BI))
20374
- Set.second.emplace (I, Dists[BI]);
20395
+ StoreSeq.insertOrLookup (I, Dists[BI]);
20375
20396
}
20376
20397
return;
20377
20398
}
20378
- auto &Res = SortedStores.emplace_back();
20379
- Res.first = Idx;
20380
- Res.second.emplace(Idx, 0);
20399
+ // We did not find a comparable store, start a new sequence.
20400
+ SortedStores.emplace_back(Idx);
20381
20401
};
20382
20402
Type *PrevValTy = nullptr;
20383
20403
for (auto [I, SI] : enumerate(Stores)) {
@@ -20387,17 +20407,17 @@ bool SLPVectorizerPass::vectorizeStores(
20387
20407
PrevValTy = SI->getValueOperand()->getType();
20388
20408
// Check that we do not try to vectorize stores of different types.
20389
20409
if (PrevValTy != SI->getValueOperand()->getType()) {
20390
- for (auto &Set : SortedStores)
20391
- TryToVectorize(Set.second );
20410
+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20411
+ TryToVectorize(StoreSeq.Instrs );
20392
20412
SortedStores.clear();
20393
20413
PrevValTy = SI->getValueOperand()->getType();
20394
20414
}
20395
20415
FillStoresSet(I, SI);
20396
20416
}
20397
20417
20398
20418
// Final vectorization attempt.
20399
- for (auto &Set : SortedStores)
20400
- TryToVectorize(Set.second );
20419
+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20420
+ TryToVectorize(StoreSeq.Instrs );
20401
20421
20402
20422
return Changed;
20403
20423
}
0 commit comments