@@ -21191,25 +21191,30 @@ bool SLPVectorizerPass::vectorizeStores(
21191
21191
++Repeat;
21192
21192
bool RepeatChanged = false;
21193
21193
bool AnyProfitableGraph = false;
21194
- for (unsigned Size : CandidateVFs) {
21194
+ for (unsigned VF : CandidateVFs) {
21195
21195
AnyProfitableGraph = false;
21196
- unsigned StartIdx = std::distance(
21197
- RangeSizes.begin(),
21198
- find_if(RangeSizes,
21199
- std::bind(IsNotVectorized, Size >= MaxRegVF, _1)));
21200
- while (StartIdx < End) {
21201
- unsigned EndIdx = std::distance(
21196
+ unsigned FirstUnvecStore =
21197
+ std::distance(RangeSizes.begin(),
21198
+ find_if(RangeSizes, std::bind(IsNotVectorized,
21199
+ VF >= MaxRegVF, _1)));
21200
+
21201
+ // Form slices of size VF starting from FirstUnvecStore and try to
21202
+ // vectorize them.
21203
+ while (FirstUnvecStore < End) {
21204
+ unsigned FirstVecStore = std::distance(
21202
21205
RangeSizes.begin(),
21203
- find_if(RangeSizes.drop_front(StartIdx),
21204
- std::bind(IsVectorized, Size >= MaxRegVF, _1)));
21205
- unsigned Sz = EndIdx >= End ? End : EndIdx;
21206
- for (unsigned Cnt = StartIdx; Cnt + Size <= Sz;) {
21207
- if (!checkTreeSizes(RangeSizes.slice(Cnt, Size),
21208
- Size >= MaxRegVF)) {
21209
- ++Cnt;
21206
+ find_if(RangeSizes.drop_front(FirstUnvecStore),
21207
+ std::bind(IsVectorized, VF >= MaxRegVF, _1)));
21208
+ unsigned MaxSliceEnd = FirstVecStore >= End ? End : FirstVecStore;
21209
+ for (unsigned SliceStartIdx = FirstUnvecStore;
21210
+ SliceStartIdx + VF <= MaxSliceEnd;) {
21211
+ if (!checkTreeSizes(RangeSizes.slice(SliceStartIdx, VF),
21212
+ VF >= MaxRegVF)) {
21213
+ ++SliceStartIdx;
21210
21214
continue;
21211
21215
}
21212
- ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
21216
+ ArrayRef<Value *> Slice =
21217
+ ArrayRef(Operands).slice(SliceStartIdx, VF);
21213
21218
assert(all_of(Slice,
21214
21219
[&](Value *V) {
21215
21220
return cast<StoreInst>(V)
@@ -21223,19 +21228,23 @@ bool SLPVectorizerPass::vectorizeStores(
21223
21228
if (!NonSchedulable.empty()) {
21224
21229
auto [NonSchedSizeMax, NonSchedSizeMin] =
21225
21230
NonSchedulable.lookup(Slice.front());
21226
- if (NonSchedSizeMax > 0 && NonSchedSizeMin <= Size) {
21227
- Cnt += NonSchedSizeMax;
21231
+ if (NonSchedSizeMax > 0 && NonSchedSizeMin <= VF) {
21232
+ // VF is too ambitious. Try to vectorize another slice before
21233
+ // trying a smaller VF.
21234
+ SliceStartIdx += NonSchedSizeMax;
21228
21235
continue;
21229
21236
}
21230
21237
}
21231
21238
unsigned TreeSize;
21232
21239
std::optional<bool> Res =
21233
- vectorizeStoreChain(Slice, R, Cnt , MinVF, TreeSize);
21240
+ vectorizeStoreChain(Slice, R, SliceStartIdx , MinVF, TreeSize);
21234
21241
if (!Res) {
21242
+ // Update the range of non schedulable VFs for slices starting
21243
+ // at SliceStartIdx.
21235
21244
NonSchedulable
21236
- .try_emplace(Slice.front(), std::make_pair(Size, Size ))
21245
+ .try_emplace(Slice.front(), std::make_pair(VF, VF ))
21237
21246
.first->getSecond()
21238
- .second = Size ;
21247
+ .second = VF ;
21239
21248
} else if (*Res) {
21240
21249
// Mark the vectorized stores so that we don't vectorize them
21241
21250
// again.
@@ -21246,63 +21255,67 @@ bool SLPVectorizerPass::vectorizeStores(
21246
21255
// If we vectorized initial block, no need to try to vectorize
21247
21256
// it again.
21248
21257
for (std::pair<unsigned, unsigned> &P :
21249
- RangeSizes.slice(Cnt, Size ))
21258
+ RangeSizes.slice(SliceStartIdx, VF ))
21250
21259
P.first = P.second = 0;
21251
- if (Cnt < StartIdx + MinVF) {
21252
- for (std::pair<unsigned, unsigned> &P :
21253
- RangeSizes.slice(StartIdx, Cnt - StartIdx ))
21260
+ if (SliceStartIdx < FirstUnvecStore + MinVF) {
21261
+ for (std::pair<unsigned, unsigned> &P : RangeSizes.slice(
21262
+ FirstUnvecStore, SliceStartIdx - FirstUnvecStore ))
21254
21263
P.first = P.second = 0;
21255
- StartIdx = Cnt + Size ;
21264
+ FirstUnvecStore = SliceStartIdx + VF ;
21256
21265
}
21257
- if (Cnt > Sz - Size - MinVF) {
21266
+ if (SliceStartIdx > MaxSliceEnd - VF - MinVF) {
21258
21267
for (std::pair<unsigned, unsigned> &P :
21259
- RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size)))
21268
+ RangeSizes.slice(SliceStartIdx + VF,
21269
+ MaxSliceEnd - (SliceStartIdx + VF)))
21260
21270
P.first = P.second = 0;
21261
- if (Sz == End)
21262
- End = Cnt ;
21263
- Sz = Cnt ;
21271
+ if (MaxSliceEnd == End)
21272
+ End = SliceStartIdx ;
21273
+ MaxSliceEnd = SliceStartIdx ;
21264
21274
}
21265
- Cnt += Size ;
21275
+ SliceStartIdx += VF ;
21266
21276
continue;
21267
21277
}
21268
- if (Size > 2 && Res &&
21269
- !all_of(RangeSizes.slice(Cnt, Size ),
21270
- std::bind(VFIsProfitable, Size >= MaxRegVF, TreeSize,
21278
+ if (VF > 2 && Res &&
21279
+ !all_of(RangeSizes.slice(SliceStartIdx, VF ),
21280
+ std::bind(VFIsProfitable, VF >= MaxRegVF, TreeSize,
21271
21281
_1))) {
21272
- Cnt += Size ;
21282
+ SliceStartIdx += VF ;
21273
21283
continue;
21274
21284
}
21275
21285
// Check for the very big VFs that we're not rebuilding same
21276
21286
// trees, just with larger number of elements.
21277
- if (Size > MaxRegVF && TreeSize > 1 &&
21278
- all_of(RangeSizes.slice(Cnt, Size ),
21287
+ if (VF > MaxRegVF && TreeSize > 1 &&
21288
+ all_of(RangeSizes.slice(SliceStartIdx, VF ),
21279
21289
std::bind(FirstSizeSame, TreeSize, _1))) {
21280
- Cnt += Size;
21281
- while (Cnt != Sz && RangeSizes[Cnt].first == TreeSize)
21282
- ++Cnt;
21290
+ SliceStartIdx += VF;
21291
+ while (SliceStartIdx != MaxSliceEnd &&
21292
+ RangeSizes[SliceStartIdx].first == TreeSize)
21293
+ ++SliceStartIdx;
21283
21294
continue;
21284
21295
}
21285
- if (TreeSize > 1)
21296
+ if (TreeSize > 1) {
21286
21297
for (std::pair<unsigned, unsigned> &P :
21287
- RangeSizes.slice(Cnt, Size )) {
21288
- if (Size >= MaxRegVF)
21298
+ RangeSizes.slice(SliceStartIdx, VF )) {
21299
+ if (VF >= MaxRegVF)
21289
21300
P.second = std::max(P.second, TreeSize);
21290
21301
else
21291
21302
P.first = std::max(P.first, TreeSize);
21292
21303
}
21293
- ++Cnt;
21304
+ }
21305
+ ++SliceStartIdx;
21294
21306
AnyProfitableGraph = true;
21295
21307
}
21296
- if (StartIdx >= End)
21308
+ if (FirstUnvecStore >= End)
21297
21309
break;
21298
- if (Sz - StartIdx < Size && Sz - StartIdx >= MinVF)
21310
+ if (MaxSliceEnd - FirstUnvecStore < VF &&
21311
+ MaxSliceEnd - FirstUnvecStore >= MinVF)
21299
21312
AnyProfitableGraph = true;
21300
- StartIdx = std::distance(
21313
+ FirstUnvecStore = std::distance(
21301
21314
RangeSizes.begin(),
21302
- find_if(RangeSizes.drop_front(Sz ),
21303
- std::bind(IsNotVectorized, Size >= MaxRegVF, _1)));
21315
+ find_if(RangeSizes.drop_front(MaxSliceEnd ),
21316
+ std::bind(IsNotVectorized, VF >= MaxRegVF, _1)));
21304
21317
}
21305
- if (!AnyProfitableGraph && Size >= MaxRegVF && has_single_bit(Size ))
21318
+ if (!AnyProfitableGraph && VF >= MaxRegVF && has_single_bit(VF ))
21306
21319
break;
21307
21320
}
21308
21321
// All values vectorized - exit.
0 commit comments