@@ -15164,6 +15164,10 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
15164
15164
BoUpSLP::ValueSet VectorizedStores;
15165
15165
bool Changed = false;
15166
15166
15167
+ // Stores the pair of stores (first_store, last_store) in a range, that were
15168
+ // already tried to be vectorized. Allows to skip the store ranges that were
15169
+ // already tried to be vectorized but the attempts were unsuccessful.
15170
+ DenseSet<std::pair<Value *, Value *>> TriedSequences;
15167
15171
struct StoreDistCompare {
15168
15172
bool operator()(const std::pair<unsigned, int> &Op1,
15169
15173
const std::pair<unsigned, int> &Op2) const {
@@ -15205,10 +15209,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
15205
15209
Type *ValueTy = StoreTy;
15206
15210
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
15207
15211
ValueTy = Trunc->getSrcTy();
15208
- unsigned MinVF = std::max<unsigned>(
15209
- 2, PowerOf2Ceil(TTI->getStoreMinimumVF(
15210
- R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy,
15211
- ValueTy)));
15212
+ unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
15213
+ R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy));
15212
15214
15213
15215
if (MaxVF < MinVF) {
15214
15216
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
@@ -15234,74 +15236,40 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
15234
15236
VF = Size > MaxVF ? NonPowerOf2VF : Size;
15235
15237
Size *= 2;
15236
15238
});
15237
- unsigned End = Operands.size();
15238
- unsigned Repeat = 0;
15239
- constexpr unsigned MaxAttempts = 2;
15240
- SmallBitVector Range(Operands.size());
15241
- while (true) {
15242
- ++Repeat;
15243
- for (unsigned Size : CandidateVFs) {
15244
- int StartIdx = Range.find_first_unset();
15245
- while (StartIdx != -1) {
15246
- int EndIdx = Range.find_next(StartIdx);
15247
- unsigned Sz = EndIdx == -1 ? End : EndIdx;
15248
- for (unsigned Cnt = StartIdx; Cnt + Size <= Sz;) {
15249
- ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
15250
- assert(all_of(Slice,
15251
- [&](Value *V) {
15252
- return cast<StoreInst>(V)
15253
- ->getValueOperand()
15254
- ->getType() ==
15255
- cast<StoreInst>(Slice.front())
15256
- ->getValueOperand()
15257
- ->getType();
15258
- }) &&
15259
- "Expected all operands of same type.");
15260
- if (vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
15261
- // Mark the vectorized stores so that we don't vectorize them
15262
- // again.
15263
- VectorizedStores.insert(Slice.begin(), Slice.end());
15264
- // Mark the vectorized stores so that we don't vectorize them
15265
- // again.
15266
- Changed = true;
15267
- // If we vectorized initial block, no need to try to vectorize
15268
- // it again.
15269
- Range.set(Cnt, Cnt + Size);
15270
- if (Cnt < StartIdx + MinVF)
15271
- Range.set(StartIdx, Cnt);
15272
- if (Cnt > EndIdx - Size - MinVF) {
15273
- Range.set(Cnt + Size, EndIdx);
15274
- End = Cnt;
15275
- }
15276
- Cnt += Size;
15277
- continue;
15278
- }
15279
- ++Cnt;
15280
- }
15281
- if (Sz >= End)
15282
- break;
15283
- StartIdx = Range.find_next_unset(EndIdx);
15239
+ unsigned StartIdx = 0;
15240
+ for (unsigned Size : CandidateVFs) {
15241
+ for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
15242
+ ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
15243
+ assert(
15244
+ all_of(
15245
+ Slice,
15246
+ [&](Value *V) {
15247
+ return cast<StoreInst>(V)->getValueOperand()->getType() ==
15248
+ cast<StoreInst>(Slice.front())
15249
+ ->getValueOperand()
15250
+ ->getType();
15251
+ }) &&
15252
+ "Expected all operands of same type.");
15253
+ if (!VectorizedStores.count(Slice.front()) &&
15254
+ !VectorizedStores.count(Slice.back()) &&
15255
+ TriedSequences.insert(std::make_pair(Slice.front(), Slice.back()))
15256
+ .second &&
15257
+ vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
15258
+ // Mark the vectorized stores so that we don't vectorize them again.
15259
+ VectorizedStores.insert(Slice.begin(), Slice.end());
15260
+ Changed = true;
15261
+ // If we vectorized initial block, no need to try to vectorize it
15262
+ // again.
15263
+ if (Cnt == StartIdx)
15264
+ StartIdx += Size;
15265
+ Cnt += Size;
15266
+ continue;
15284
15267
}
15268
+ ++Cnt;
15285
15269
}
15286
- // All values vectorize - exit.
15287
- if (Range.all())
15288
- break;
15289
- // Check if tried all attempts or no need for the last attempts at all.
15290
- if (Repeat >= MaxAttempts)
15291
- break;
15292
- constexpr unsigned MaxVFScale = 4;
15293
- constexpr unsigned StoresLimit = 16;
15294
- const unsigned MaxTotalNum = std::min(
15295
- std::max<unsigned>(StoresLimit, MaxVFScale * MaxVF),
15296
- bit_floor(static_cast<unsigned>(Range.find_last_unset() -
15297
- Range.find_first_unset() + 1)));
15298
- if (MaxVF >= MaxTotalNum)
15270
+ // Check if the whole array was vectorized already - exit.
15271
+ if (StartIdx >= Operands.size())
15299
15272
break;
15300
- // Last attempt to vectorize max number of elements, if all previous
15301
- // attempts were unsuccessful because of the cost issues.
15302
- CandidateVFs.clear();
15303
- for (unsigned Size = MaxTotalNum; Size > MaxVF; Size /= 2)
15304
- CandidateVFs.push_back(Size);
15305
15273
}
15306
15274
}
15307
15275
};
0 commit comments