@@ -260,20 +260,6 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
260
260
VF * getNumElements(ScalarTy));
261
261
}
262
262
263
- /// Returns the number of elements of the given type \p Ty, not less than \p Sz,
264
- /// which forms type, which splits by \p TTI into whole vector types during
265
- /// legalization.
266
- static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI,
267
- Type *Ty, unsigned Sz) {
268
- if (!isValidElementType(Ty))
269
- return PowerOf2Ceil(Sz);
270
- // Find the number of elements, which forms full vectors.
271
- const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz));
272
- if (NumParts == 0 || NumParts == Sz)
273
- return PowerOf2Ceil(Sz);
274
- return PowerOf2Ceil(divideCeil(Sz, NumParts)) * NumParts;
275
- }
276
-
277
263
static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
278
264
SmallVectorImpl<int> &Mask) {
279
265
// The ShuffleBuilder implementation use shufflevector to splat an "element".
@@ -1238,22 +1224,6 @@ static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
1238
1224
(all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
1239
1225
}
1240
1226
1241
- /// Returns true if widened type of \p Ty elements with size \p Sz represents
1242
- /// full vector type, i.e. adding extra element results in extra parts upon type
1243
- /// legalization.
1244
- static bool hasFullVectorsOnly(const TargetTransformInfo &TTI, Type *Ty,
1245
- unsigned Sz) {
1246
- if (Sz <= 1)
1247
- return false;
1248
- if (!isValidElementType(Ty) && !isa<FixedVectorType>(Ty))
1249
- return false;
1250
- if (has_single_bit(Sz))
1251
- return true;
1252
- const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz));
1253
- return NumParts > 0 && NumParts != Sz && has_single_bit(Sz / NumParts) &&
1254
- Sz % NumParts == 0;
1255
- }
1256
-
1257
1227
namespace slpvectorizer {
1258
1228
1259
1229
/// Bottom Up SLP Vectorizer.
@@ -2497,9 +2467,7 @@ class BoUpSLP {
2497
2467
}
2498
2468
// TODO: Check if we can remove a check for non-power-2 number of
2499
2469
// scalars after full support of non-power-2 vectorization.
2500
- return UniqueValues.size() != 2 &&
2501
- hasFullVectorsOnly(*R.TTI, (*UniqueValues.begin())->getType(),
2502
- UniqueValues.size());
2470
+ return UniqueValues.size() != 2 && has_single_bit(UniqueValues.size());
2503
2471
};
2504
2472
2505
2473
// If the initial strategy fails for any of the operand indexes, then we
@@ -3308,9 +3276,8 @@ class BoUpSLP {
3308
3276
SmallVectorImpl<Value *> *AltScalars = nullptr) const;
3309
3277
3310
3278
/// Return true if this is a non-power-of-2 node.
3311
- bool isNonPowOf2Vec(const TargetTransformInfo &TTI) const {
3312
- bool IsNonPowerOf2 = !hasFullVectorsOnly(
3313
- TTI, getValueType(Scalars.front()), Scalars.size());
3279
+ bool isNonPowOf2Vec() const {
3280
+ bool IsNonPowerOf2 = !has_single_bit(Scalars.size());
3314
3281
assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) &&
3315
3282
"Reshuffling not supported with non-power-of-2 vectors yet.");
3316
3283
return IsNonPowerOf2;
@@ -3488,7 +3455,7 @@ class BoUpSLP {
3488
3455
3489
3456
if (UserTreeIdx.UserTE) {
3490
3457
Last->UserTreeIndices.push_back(UserTreeIdx);
3491
- assert((!Last->isNonPowOf2Vec(*TTI ) || Last->ReorderIndices.empty()) &&
3458
+ assert((!Last->isNonPowOf2Vec() || Last->ReorderIndices.empty()) &&
3492
3459
"Reordering isn't implemented for non-power-of-2 nodes yet");
3493
3460
}
3494
3461
return Last;
@@ -4394,7 +4361,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
4394
4361
if (!isValidElementType(ScalarTy))
4395
4362
return std::nullopt;
4396
4363
auto *VecTy = getWidenedType(ScalarTy, NumScalars);
4397
- int NumParts = TTI->getRegUsageForType (VecTy);
4364
+ int NumParts = TTI->getNumberOfParts (VecTy);
4398
4365
if (NumParts == 0 || NumParts >= NumScalars)
4399
4366
NumParts = 1;
4400
4367
SmallVector<int> ExtractMask;
@@ -4766,7 +4733,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4766
4733
// Check the order of pointer operands or that all pointers are the same.
4767
4734
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order);
4768
4735
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
4769
- if (!Order.empty() && !hasFullVectorsOnly(*TTI, ScalarTy, Sz )) {
4736
+ if (!Order.empty() && !has_single_bit(VL.size() )) {
4770
4737
assert(VectorizeNonPowerOf2 && "non-power-of-2 number of loads only "
4771
4738
"supported with VectorizeNonPowerOf2");
4772
4739
return LoadsState::Gather;
@@ -4820,13 +4787,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4820
4787
});
4821
4788
});
4822
4789
const unsigned AbsoluteDiff = std::abs(*Diff);
4823
- if (IsPossibleStrided &&
4824
- (IsAnyPointerUsedOutGraph ||
4825
- ((Sz > MinProfitableStridedLoads ||
4826
- (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4827
- hasFullVectorsOnly(*TTI, ScalarTy, AbsoluteDiff))) &&
4828
- AbsoluteDiff > Sz) ||
4829
- *Diff == -(static_cast<int>(Sz) - 1))) {
4790
+ if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
4791
+ ((Sz > MinProfitableStridedLoads ||
4792
+ (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4793
+ has_single_bit(AbsoluteDiff))) &&
4794
+ AbsoluteDiff > Sz) ||
4795
+ *Diff == -(static_cast<int>(Sz) - 1))) {
4830
4796
int Stride = *Diff / static_cast<int>(Sz - 1);
4831
4797
if (*Diff == Stride * static_cast<int>(Sz - 1)) {
4832
4798
Align Alignment =
@@ -5231,7 +5197,7 @@ static bool areTwoInsertFromSameBuildVector(
5231
5197
std::optional<BoUpSLP::OrdersType>
5232
5198
BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5233
5199
// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
5234
- if (TE.isNonPowOf2Vec(*TTI ))
5200
+ if (TE.isNonPowOf2Vec())
5235
5201
return std::nullopt;
5236
5202
5237
5203
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -5265,8 +5231,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5265
5231
}
5266
5232
}
5267
5233
if (Sz == 2 && TE.getVectorFactor() == 4 &&
5268
- TTI->getRegUsageForType (getWidenedType(TE.Scalars.front()->getType(),
5269
- 2 * TE.getVectorFactor())) == 1)
5234
+ TTI->getNumberOfParts (getWidenedType(TE.Scalars.front()->getType(),
5235
+ 2 * TE.getVectorFactor())) == 1)
5270
5236
return std::nullopt;
5271
5237
if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
5272
5238
Sz)) {
@@ -5615,7 +5581,7 @@ void BoUpSLP::reorderTopToBottom() {
5615
5581
5616
5582
// Reorder the graph nodes according to their vectorization factor.
5617
5583
for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
5618
- VF - = 2) {
5584
+ VF / = 2) {
5619
5585
auto It = VFToOrderedEntries.find(VF);
5620
5586
if (It == VFToOrderedEntries.end())
5621
5587
continue;
@@ -5788,7 +5754,7 @@ bool BoUpSLP::canReorderOperands(
5788
5754
ArrayRef<TreeEntry *> ReorderableGathers,
5789
5755
SmallVectorImpl<TreeEntry *> &GatherOps) {
5790
5756
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
5791
- if (UserTE->isNonPowOf2Vec(*TTI ))
5757
+ if (UserTE->isNonPowOf2Vec())
5792
5758
return false;
5793
5759
5794
5760
for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
@@ -5963,7 +5929,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
5963
5929
auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
5964
5930
const auto AllowsReordering = [&](const TreeEntry *TE) {
5965
5931
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
5966
- if (TE->isNonPowOf2Vec(*TTI ))
5932
+ if (TE->isNonPowOf2Vec())
5967
5933
return false;
5968
5934
if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
5969
5935
(TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) ||
@@ -6609,7 +6575,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
6609
6575
case Instruction::ExtractElement: {
6610
6576
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
6611
6577
// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
6612
- if (!hasFullVectorsOnly(*TTI, VL0->getType(), VL.size()))
6578
+ if (!has_single_bit( VL.size()))
6613
6579
return TreeEntry::NeedToGather;
6614
6580
if (Reuse || !CurrentOrder.empty())
6615
6581
return TreeEntry::Vectorize;
@@ -7019,7 +6985,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7019
6985
ReuseShuffleIndices.clear();
7020
6986
} else {
7021
6987
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
7022
- if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec(*TTI )) {
6988
+ if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
7023
6989
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
7024
6990
"for nodes with padding.\n");
7025
6991
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
@@ -7032,18 +6998,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7032
6998
return isa<UndefValue>(V) ||
7033
6999
!isConstant(V);
7034
7000
})) ||
7035
- !hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7036
- NumUniqueScalarValues)) {
7001
+ !llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
7037
7002
if (DoNotFail && UniquePositions.size() > 1 &&
7038
7003
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
7039
7004
all_of(UniqueValues, [=](Value *V) {
7040
7005
return isa<ExtractElementInst>(V) ||
7041
7006
areAllUsersVectorized(cast<Instruction>(V),
7042
7007
UserIgnoreList);
7043
7008
})) {
7044
- // Find the number of elements, which forms full vectors.
7045
- unsigned PWSz = getFullVectorNumberOfElements(
7046
- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
7009
+ unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
7047
7010
if (PWSz == VL.size()) {
7048
7011
ReuseShuffleIndices.clear();
7049
7012
} else {
@@ -9254,7 +9217,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
9254
9217
}
9255
9218
assert(!CommonMask.empty() && "Expected non-empty common mask.");
9256
9219
auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
9257
- unsigned NumParts = TTI.getRegUsageForType (MaskVecTy);
9220
+ unsigned NumParts = TTI.getNumberOfParts (MaskVecTy);
9258
9221
if (NumParts == 0 || NumParts >= Mask.size())
9259
9222
NumParts = 1;
9260
9223
unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
@@ -9271,7 +9234,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
9271
9234
}
9272
9235
assert(!CommonMask.empty() && "Expected non-empty common mask.");
9273
9236
auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
9274
- unsigned NumParts = TTI.getRegUsageForType (MaskVecTy);
9237
+ unsigned NumParts = TTI.getNumberOfParts (MaskVecTy);
9275
9238
if (NumParts == 0 || NumParts >= Mask.size())
9276
9239
NumParts = 1;
9277
9240
unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
@@ -9777,7 +9740,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9777
9740
unsigned const NumElts = SrcVecTy->getNumElements();
9778
9741
unsigned const NumScalars = VL.size();
9779
9742
9780
- unsigned NumOfParts = TTI->getRegUsageForType (SrcVecTy);
9743
+ unsigned NumOfParts = TTI->getNumberOfParts (SrcVecTy);
9781
9744
9782
9745
SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
9783
9746
unsigned OffsetBeg = *getElementIndex(VL.front());
@@ -10993,9 +10956,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10993
10956
// Keep original scalar if number of externally used instructions in
10994
10957
// the same entry is not power of 2. It may help to do some extra
10995
10958
// vectorization for now.
10996
- KeepScalar =
10997
- ScalarUsesCount <= 1 ||
10998
- !hasFullVectorsOnly(*TTI, EU.Scalar->getType(), ScalarUsesCount);
10959
+ KeepScalar = ScalarUsesCount <= 1 || !has_single_bit(ScalarUsesCount);
10999
10960
}
11000
10961
if (KeepScalar) {
11001
10962
ExternalUsesAsOriginalScalar.insert(EU.Scalar);
@@ -11688,14 +11649,13 @@ BoUpSLP::isGatherShuffledEntry(
11688
11649
if (TE == VectorizableTree.front().get())
11689
11650
return {};
11690
11651
// FIXME: Gathering for non-power-of-2 nodes not implemented yet.
11691
- if (TE->isNonPowOf2Vec(*TTI ))
11652
+ if (TE->isNonPowOf2Vec())
11692
11653
return {};
11693
11654
Mask.assign(VL.size(), PoisonMaskElem);
11694
11655
assert(TE->UserTreeIndices.size() == 1 &&
11695
11656
"Expected only single user of the gather node.");
11696
- // Number of scalars must be divisible by NumParts.
11697
- if (VL.size() % NumParts != 0)
11698
- return {};
11657
+ assert(VL.size() % NumParts == 0 &&
11658
+ "Number of scalars must be divisible by NumParts.");
11699
11659
unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
11700
11660
SmallVector<std::optional<TTI::ShuffleKind>> Res;
11701
11661
for (unsigned Part : seq<unsigned>(NumParts)) {
@@ -12834,7 +12794,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
12834
12794
SmallVector<SmallVector<const TreeEntry *>> Entries;
12835
12795
Type *OrigScalarTy = GatheredScalars.front()->getType();
12836
12796
auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
12837
- unsigned NumParts = TTI->getRegUsageForType (VecTy);
12797
+ unsigned NumParts = TTI->getNumberOfParts (VecTy);
12838
12798
if (NumParts == 0 || NumParts >= GatheredScalars.size())
12839
12799
NumParts = 1;
12840
12800
if (!all_of(GatheredScalars, IsaPred<UndefValue>)) {
@@ -16080,7 +16040,7 @@ void BoUpSLP::computeMinimumValueSizes() {
16080
16040
[&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
16081
16041
return 0u;
16082
16042
16083
- unsigned NumParts = TTI->getRegUsageForType (
16043
+ unsigned NumParts = TTI->getNumberOfParts (
16084
16044
getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
16085
16045
16086
16046
// The maximum bit width required to represent all the values that can be
@@ -16137,7 +16097,7 @@ void BoUpSLP::computeMinimumValueSizes() {
16137
16097
// use - ignore it.
16138
16098
if (NumParts > 1 &&
16139
16099
NumParts ==
16140
- TTI->getRegUsageForType (getWidenedType(
16100
+ TTI->getNumberOfParts (getWidenedType(
16141
16101
IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
16142
16102
return 0u;
16143
16103
@@ -16998,7 +16958,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
16998
16958
for (unsigned I = NextInst; I < MaxInst; ++I) {
16999
16959
unsigned ActualVF = std::min(MaxInst - I, VF);
17000
16960
17001
- if (!hasFullVectorsOnly(*TTI, ScalarTy, ActualVF))
16961
+ if (!has_single_bit( ActualVF))
17002
16962
continue;
17003
16963
17004
16964
if (MaxVFOnly && ActualVF < MaxVF)
0 commit comments