@@ -260,20 +260,6 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
260
260
VF * getNumElements(ScalarTy));
261
261
}
262
262
263
- /// Returns the number of elements of the given type \p Ty, not less than \p Sz,
264
- /// which forms type, which splits by \p TTI into whole vector types during
265
- /// legalization.
266
- static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI,
267
- Type *Ty, unsigned Sz) {
268
- if (!isValidElementType(Ty))
269
- return PowerOf2Ceil(Sz);
270
- // Find the number of elements, which forms full vectors.
271
- const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz));
272
- if (NumParts == 0 || NumParts >= Sz)
273
- return PowerOf2Ceil(Sz);
274
- return PowerOf2Ceil(divideCeil(Sz, NumParts)) * NumParts;
275
- }
276
-
277
263
static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
278
264
SmallVectorImpl<int> &Mask) {
279
265
// The ShuffleBuilder implementation use shufflevector to splat an "element".
@@ -1237,22 +1223,6 @@ static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
1237
1223
(all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
1238
1224
}
1239
1225
1240
- /// Returns true if widened type of \p Ty elements with size \p Sz represents
1241
- /// full vector type, i.e. adding extra element results in extra parts upon type
1242
- /// legalization.
1243
- static bool hasFullVectorsOnly(const TargetTransformInfo &TTI, Type *Ty,
1244
- unsigned Sz) {
1245
- if (Sz <= 1)
1246
- return false;
1247
- if (!isValidElementType(Ty) && !isa<FixedVectorType>(Ty))
1248
- return false;
1249
- if (has_single_bit(Sz))
1250
- return true;
1251
- const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz));
1252
- return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) &&
1253
- Sz % NumParts == 0;
1254
- }
1255
-
1256
1226
namespace slpvectorizer {
1257
1227
1258
1228
/// Bottom Up SLP Vectorizer.
@@ -2496,9 +2466,7 @@ class BoUpSLP {
2496
2466
}
2497
2467
// TODO: Check if we can remove a check for non-power-2 number of
2498
2468
// scalars after full support of non-power-2 vectorization.
2499
- return UniqueValues.size() != 2 &&
2500
- hasFullVectorsOnly(*R.TTI, (*UniqueValues.begin())->getType(),
2501
- UniqueValues.size());
2469
+ return UniqueValues.size() != 2 && has_single_bit(UniqueValues.size());
2502
2470
};
2503
2471
2504
2472
// If the initial strategy fails for any of the operand indexes, then we
@@ -3307,9 +3275,8 @@ class BoUpSLP {
3307
3275
SmallVectorImpl<Value *> *AltScalars = nullptr) const;
3308
3276
3309
3277
/// Return true if this is a non-power-of-2 node.
3310
- bool isNonPowOf2Vec(const TargetTransformInfo &TTI) const {
3311
- bool IsNonPowerOf2 = !hasFullVectorsOnly(
3312
- TTI, getValueType(Scalars.front()), Scalars.size());
3278
+ bool isNonPowOf2Vec() const {
3279
+ bool IsNonPowerOf2 = !has_single_bit(Scalars.size());
3313
3280
assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) &&
3314
3281
"Reshuffling not supported with non-power-of-2 vectors yet.");
3315
3282
return IsNonPowerOf2;
@@ -3487,7 +3454,7 @@ class BoUpSLP {
3487
3454
3488
3455
if (UserTreeIdx.UserTE) {
3489
3456
Last->UserTreeIndices.push_back(UserTreeIdx);
3490
- assert((!Last->isNonPowOf2Vec(*TTI ) || Last->ReorderIndices.empty()) &&
3457
+ assert((!Last->isNonPowOf2Vec() || Last->ReorderIndices.empty()) &&
3491
3458
"Reordering isn't implemented for non-power-of-2 nodes yet");
3492
3459
}
3493
3460
return Last;
@@ -4393,7 +4360,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
4393
4360
if (!isValidElementType(ScalarTy))
4394
4361
return std::nullopt;
4395
4362
auto *VecTy = getWidenedType(ScalarTy, NumScalars);
4396
- int NumParts = TTI->getRegUsageForType (VecTy);
4363
+ int NumParts = TTI->getNumberOfParts (VecTy);
4397
4364
if (NumParts == 0 || NumParts >= NumScalars)
4398
4365
NumParts = 1;
4399
4366
SmallVector<int> ExtractMask;
@@ -4765,7 +4732,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4765
4732
// Check the order of pointer operands or that all pointers are the same.
4766
4733
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order);
4767
4734
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
4768
- if (!Order.empty() && !hasFullVectorsOnly(*TTI, ScalarTy, Sz )) {
4735
+ if (!Order.empty() && !has_single_bit(VL.size() )) {
4769
4736
assert(VectorizeNonPowerOf2 && "non-power-of-2 number of loads only "
4770
4737
"supported with VectorizeNonPowerOf2");
4771
4738
return LoadsState::Gather;
@@ -4819,13 +4786,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4819
4786
});
4820
4787
});
4821
4788
const unsigned AbsoluteDiff = std::abs(*Diff);
4822
- if (IsPossibleStrided &&
4823
- (IsAnyPointerUsedOutGraph ||
4824
- ((Sz > MinProfitableStridedLoads ||
4825
- (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4826
- hasFullVectorsOnly(*TTI, ScalarTy, AbsoluteDiff))) &&
4827
- AbsoluteDiff > Sz) ||
4828
- *Diff == -(static_cast<int>(Sz) - 1))) {
4789
+ if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
4790
+ ((Sz > MinProfitableStridedLoads ||
4791
+ (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4792
+ has_single_bit(AbsoluteDiff))) &&
4793
+ AbsoluteDiff > Sz) ||
4794
+ *Diff == -(static_cast<int>(Sz) - 1))) {
4829
4795
int Stride = *Diff / static_cast<int>(Sz - 1);
4830
4796
if (*Diff == Stride * static_cast<int>(Sz - 1)) {
4831
4797
Align Alignment =
@@ -5230,7 +5196,7 @@ static bool areTwoInsertFromSameBuildVector(
5230
5196
std::optional<BoUpSLP::OrdersType>
5231
5197
BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5232
5198
// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
5233
- if (TE.isNonPowOf2Vec(*TTI ))
5199
+ if (TE.isNonPowOf2Vec())
5234
5200
return std::nullopt;
5235
5201
5236
5202
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -5264,8 +5230,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5264
5230
}
5265
5231
}
5266
5232
if (Sz == 2 && TE.getVectorFactor() == 4 &&
5267
- TTI->getRegUsageForType (getWidenedType(TE.Scalars.front()->getType(),
5268
- 2 * TE.getVectorFactor())) == 1)
5233
+ TTI->getNumberOfParts (getWidenedType(TE.Scalars.front()->getType(),
5234
+ 2 * TE.getVectorFactor())) == 1)
5269
5235
return std::nullopt;
5270
5236
if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
5271
5237
Sz)) {
@@ -5614,7 +5580,7 @@ void BoUpSLP::reorderTopToBottom() {
5614
5580
5615
5581
// Reorder the graph nodes according to their vectorization factor.
5616
5582
for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
5617
- VF - = 2) {
5583
+ VF / = 2) {
5618
5584
auto It = VFToOrderedEntries.find(VF);
5619
5585
if (It == VFToOrderedEntries.end())
5620
5586
continue;
@@ -5787,7 +5753,7 @@ bool BoUpSLP::canReorderOperands(
5787
5753
ArrayRef<TreeEntry *> ReorderableGathers,
5788
5754
SmallVectorImpl<TreeEntry *> &GatherOps) {
5789
5755
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
5790
- if (UserTE->isNonPowOf2Vec(*TTI ))
5756
+ if (UserTE->isNonPowOf2Vec())
5791
5757
return false;
5792
5758
5793
5759
for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
@@ -5962,7 +5928,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
5962
5928
auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
5963
5929
const auto AllowsReordering = [&](const TreeEntry *TE) {
5964
5930
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
5965
- if (TE->isNonPowOf2Vec(*TTI ))
5931
+ if (TE->isNonPowOf2Vec())
5966
5932
return false;
5967
5933
if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
5968
5934
(TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) ||
@@ -6615,7 +6581,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
6615
6581
case Instruction::ExtractElement: {
6616
6582
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
6617
6583
// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
6618
- if (!hasFullVectorsOnly(*TTI, VL0->getType(), VL.size()))
6584
+ if (!has_single_bit( VL.size()))
6619
6585
return TreeEntry::NeedToGather;
6620
6586
if (Reuse || !CurrentOrder.empty())
6621
6587
return TreeEntry::Vectorize;
@@ -7025,7 +6991,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7025
6991
ReuseShuffleIndices.clear();
7026
6992
} else {
7027
6993
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
7028
- if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec(*TTI )) {
6994
+ if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
7029
6995
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
7030
6996
"for nodes with padding.\n");
7031
6997
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
@@ -7038,18 +7004,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7038
7004
return isa<UndefValue>(V) ||
7039
7005
!isConstant(V);
7040
7006
})) ||
7041
- !hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7042
- NumUniqueScalarValues)) {
7007
+ !llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
7043
7008
if (DoNotFail && UniquePositions.size() > 1 &&
7044
7009
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
7045
7010
all_of(UniqueValues, [=](Value *V) {
7046
7011
return isa<ExtractElementInst>(V) ||
7047
7012
areAllUsersVectorized(cast<Instruction>(V),
7048
7013
UserIgnoreList);
7049
7014
})) {
7050
- // Find the number of elements, which forms full vectors.
7051
- unsigned PWSz = getFullVectorNumberOfElements(
7052
- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
7015
+ unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
7053
7016
if (PWSz == VL.size()) {
7054
7017
ReuseShuffleIndices.clear();
7055
7018
} else {
@@ -9260,7 +9223,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
9260
9223
}
9261
9224
assert(!CommonMask.empty() && "Expected non-empty common mask.");
9262
9225
auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
9263
- unsigned NumParts = TTI.getRegUsageForType (MaskVecTy);
9226
+ unsigned NumParts = TTI.getNumberOfParts (MaskVecTy);
9264
9227
if (NumParts == 0 || NumParts >= Mask.size())
9265
9228
NumParts = 1;
9266
9229
unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
@@ -9277,7 +9240,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
9277
9240
}
9278
9241
assert(!CommonMask.empty() && "Expected non-empty common mask.");
9279
9242
auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
9280
- unsigned NumParts = TTI.getRegUsageForType (MaskVecTy);
9243
+ unsigned NumParts = TTI.getNumberOfParts (MaskVecTy);
9281
9244
if (NumParts == 0 || NumParts >= Mask.size())
9282
9245
NumParts = 1;
9283
9246
unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
@@ -9783,7 +9746,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9783
9746
unsigned const NumElts = SrcVecTy->getNumElements();
9784
9747
unsigned const NumScalars = VL.size();
9785
9748
9786
- unsigned NumOfParts = TTI->getRegUsageForType (SrcVecTy);
9749
+ unsigned NumOfParts = TTI->getNumberOfParts (SrcVecTy);
9787
9750
9788
9751
SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
9789
9752
unsigned OffsetBeg = *getElementIndex(VL.front());
@@ -11027,9 +10990,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
11027
10990
// Keep original scalar if number of externally used instructions in
11028
10991
// the same entry is not power of 2. It may help to do some extra
11029
10992
// vectorization for now.
11030
- KeepScalar =
11031
- ScalarUsesCount <= 1 ||
11032
- !hasFullVectorsOnly(*TTI, EU.Scalar->getType(), ScalarUsesCount);
10993
+ KeepScalar = ScalarUsesCount <= 1 || !has_single_bit(ScalarUsesCount);
11033
10994
}
11034
10995
if (KeepScalar) {
11035
10996
ExternalUsesAsOriginalScalar.insert(EU.Scalar);
@@ -11726,14 +11687,13 @@ BoUpSLP::isGatherShuffledEntry(
11726
11687
if (TE == VectorizableTree.front().get())
11727
11688
return {};
11728
11689
// FIXME: Gathering for non-power-of-2 nodes not implemented yet.
11729
- if (TE->isNonPowOf2Vec(*TTI ))
11690
+ if (TE->isNonPowOf2Vec())
11730
11691
return {};
11731
11692
Mask.assign(VL.size(), PoisonMaskElem);
11732
11693
assert(TE->UserTreeIndices.size() == 1 &&
11733
11694
"Expected only single user of the gather node.");
11734
- // Number of scalars must be divisible by NumParts.
11735
- if (VL.size() % NumParts != 0)
11736
- return {};
11695
+ assert(VL.size() % NumParts == 0 &&
11696
+ "Number of scalars must be divisible by NumParts.");
11737
11697
unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
11738
11698
SmallVector<std::optional<TTI::ShuffleKind>> Res;
11739
11699
for (unsigned Part : seq<unsigned>(NumParts)) {
@@ -12872,7 +12832,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
12872
12832
SmallVector<SmallVector<const TreeEntry *>> Entries;
12873
12833
Type *OrigScalarTy = GatheredScalars.front()->getType();
12874
12834
auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
12875
- unsigned NumParts = TTI->getRegUsageForType (VecTy);
12835
+ unsigned NumParts = TTI->getNumberOfParts (VecTy);
12876
12836
if (NumParts == 0 || NumParts >= GatheredScalars.size())
12877
12837
NumParts = 1;
12878
12838
if (!all_of(GatheredScalars, IsaPred<UndefValue>)) {
@@ -16126,7 +16086,7 @@ void BoUpSLP::computeMinimumValueSizes() {
16126
16086
[&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
16127
16087
return 0u;
16128
16088
16129
- unsigned NumParts = TTI->getRegUsageForType (
16089
+ unsigned NumParts = TTI->getNumberOfParts (
16130
16090
getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
16131
16091
16132
16092
// The maximum bit width required to represent all the values that can be
@@ -16183,7 +16143,7 @@ void BoUpSLP::computeMinimumValueSizes() {
16183
16143
// use - ignore it.
16184
16144
if (NumParts > 1 &&
16185
16145
NumParts ==
16186
- TTI->getRegUsageForType (getWidenedType(
16146
+ TTI->getNumberOfParts (getWidenedType(
16187
16147
IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
16188
16148
return 0u;
16189
16149
@@ -17044,7 +17004,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
17044
17004
for (unsigned I = NextInst; I < MaxInst; ++I) {
17045
17005
unsigned ActualVF = std::min(MaxInst - I, VF);
17046
17006
17047
- if (!hasFullVectorsOnly(*TTI, ScalarTy, ActualVF))
17007
+ if (!has_single_bit( ActualVF))
17048
17008
continue;
17049
17009
17050
17010
if (MaxVFOnly && ActualVF < MaxVF)
0 commit comments