@@ -4577,6 +4577,31 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
4577
4577
Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind,
4578
4578
Type *ScalarTy, VectorType *VecTy);
4579
4579
4580
+ /// Returns the cost of the shuffle instructions with the given \p Kind, vector
4581
+ /// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
4582
+ /// subvector pattern.
4583
+ static InstructionCost
4584
+ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
4585
+ VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
4586
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
4587
+ int Index = 0, VectorType *SubTp = nullptr,
4588
+ ArrayRef<const Value *> Args = std::nullopt) {
4589
+ if (Kind != TTI::SK_PermuteTwoSrc)
4590
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
4591
+ int NumSrcElts = Tp->getElementCount().getKnownMinValue();
4592
+ int NumSubElts;
4593
+ if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
4594
+ Mask, NumSrcElts, NumSubElts, Index)) {
4595
+ if (Index + NumSubElts > NumSrcElts &&
4596
+ Index + NumSrcElts <= static_cast<int>(Mask.size()))
4597
+ return TTI.getShuffleCost(
4598
+ TTI::SK_InsertSubvector,
4599
+ getWidenedType(Tp->getElementType(), Mask.size()), Mask,
4600
+ TTI::TCK_RecipThroughput, Index, Tp);
4601
+ }
4602
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
4603
+ }
4604
+
4580
4605
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4581
4606
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
4582
4607
SmallVectorImpl<Value *> &PointerOps, bool TryRecursiveCheck) const {
@@ -4783,8 +4808,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4783
4808
for (int Idx : seq<int>(0, VL.size()))
4784
4809
ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
4785
4810
VecLdCost +=
4786
- TTI. getShuffleCost(TTI::SK_InsertSubvector, VecTy, ShuffleMask ,
4787
- CostKind, I * VF, SubVecTy);
4811
+ :: getShuffleCost(TTI, TTI ::SK_InsertSubvector, VecTy,
4812
+ ShuffleMask, CostKind, I * VF, SubVecTy);
4788
4813
}
4789
4814
// If masked gather cost is higher - better to vectorize, so
4790
4815
// consider it as a gather node. It will be better estimated
@@ -5223,7 +5248,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5223
5248
InstructionCost PermuteCost =
5224
5249
TopToBottom
5225
5250
? 0
5226
- : TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, Mask);
5251
+ : :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, Ty, Mask);
5227
5252
InstructionCost InsertFirstCost = TTI->getVectorInstrCost(
5228
5253
Instruction::InsertElement, Ty, TTI::TCK_RecipThroughput, 0,
5229
5254
PoisonValue::get(Ty), *It);
@@ -8152,31 +8177,6 @@ class BaseShuffleAnalysis {
8152
8177
};
8153
8178
} // namespace
8154
8179
8155
- /// Returns the cost of the shuffle instructions with the given \p Kind, vector
8156
- /// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
8157
- /// subvector pattern.
8158
- static InstructionCost
8159
- getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
8160
- VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
8161
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
8162
- int Index = 0, VectorType *SubTp = nullptr,
8163
- ArrayRef<const Value *> Args = std::nullopt) {
8164
- if (Kind != TTI::SK_PermuteTwoSrc)
8165
- return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
8166
- int NumSrcElts = Tp->getElementCount().getKnownMinValue();
8167
- int NumSubElts;
8168
- if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
8169
- Mask, NumSrcElts, NumSubElts, Index)) {
8170
- if (Index + NumSubElts > NumSrcElts &&
8171
- Index + NumSrcElts <= static_cast<int>(Mask.size()))
8172
- return TTI.getShuffleCost(
8173
- TTI::SK_InsertSubvector,
8174
- getWidenedType(Tp->getElementType(), Mask.size()), Mask,
8175
- TTI::TCK_RecipThroughput, Index, Tp);
8176
- }
8177
- return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
8178
- }
8179
-
8180
8180
/// Calculate the scalar and the vector costs from vectorizing set of GEPs.
8181
8181
static std::pair<InstructionCost, InstructionCost>
8182
8182
getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
@@ -8546,8 +8546,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8546
8546
for (unsigned I = VF, E = VL.size(); I < E; I += VF) {
8547
8547
for (unsigned Idx : seq<unsigned>(0, E))
8548
8548
ShuffleMask[Idx] = Idx / VF == I ? E + Idx % VF : Idx;
8549
- GatherCost += TTI. getShuffleCost(TTI::SK_InsertSubvector, VecTy,
8550
- ShuffleMask, CostKind, I, LoadTy);
8549
+ GatherCost += :: getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
8550
+ ShuffleMask, CostKind, I, LoadTy);
8551
8551
}
8552
8552
}
8553
8553
GatherCost -= ScalarsCost;
@@ -8574,10 +8574,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8574
8574
InstructionCost InsertCost =
8575
8575
TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0,
8576
8576
PoisonValue::get(VecTy), *It);
8577
- return InsertCost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
8578
- VecTy, ShuffleMask, CostKind,
8579
- /*Index=*/0, /*SubTp=*/nullptr,
8580
- /*Args=*/*It);
8577
+ return InsertCost + ::getShuffleCost(TTI,
8578
+ TargetTransformInfo::SK_Broadcast,
8579
+ VecTy, ShuffleMask, CostKind,
8580
+ /*Index=*/0, /*SubTp=*/nullptr,
8581
+ /*Args=*/*It);
8581
8582
}
8582
8583
return GatherCost +
8583
8584
(all_of(Gathers, IsaPred<UndefValue>)
@@ -8801,8 +8802,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8801
8802
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
8802
8803
if (isEmptyOrIdentity(Mask, VF))
8803
8804
return TTI::TCC_Free;
8804
- return TTI. getShuffleCost(TTI::SK_PermuteSingleSrc,
8805
- cast<VectorType>(V1->getType()), Mask);
8805
+ return :: getShuffleCost(TTI, TTI::SK_PermuteSingleSrc,
8806
+ cast<VectorType>(V1->getType()), Mask);
8806
8807
}
8807
8808
InstructionCost createIdentity(Value *) const { return TTI::TCC_Free; }
8808
8809
InstructionCost createPoison(Type *Ty, unsigned VF) const {
@@ -9460,7 +9461,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9460
9461
::addMask(Mask, E->ReuseShuffleIndices);
9461
9462
if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
9462
9463
CommonCost =
9463
- TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
9464
+ :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
9464
9465
assert((E->State == TreeEntry::Vectorize ||
9465
9466
E->State == TreeEntry::ScatterVectorize ||
9466
9467
E->State == TreeEntry::StridedVectorize) &&
@@ -9721,8 +9722,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9721
9722
// we can merge this shuffle with the following SK_Select.
9722
9723
auto *InsertVecTy = getWidenedType(ScalarTy, InsertVecSz);
9723
9724
if (!IsIdentity)
9724
- Cost += TTI-> getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
9725
- InsertVecTy, Mask);
9725
+ Cost += :: getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
9726
+ InsertVecTy, Mask);
9726
9727
auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
9727
9728
return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
9728
9729
}));
@@ -9736,9 +9737,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9736
9737
if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
9737
9738
if (InsertVecSz != VecSz) {
9738
9739
auto *ActualVecTy = getWidenedType(ScalarTy, VecSz);
9739
- Cost += TTI-> getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
9740
- std::nullopt, CostKind, OffsetBeg - Offset,
9741
- InsertVecTy);
9740
+ Cost += :: getShuffleCost(*TTI, TTI::SK_InsertSubvector, ActualVecTy,
9741
+ std::nullopt, CostKind, OffsetBeg - Offset,
9742
+ InsertVecTy);
9742
9743
} else {
9743
9744
for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
9744
9745
Mask[I] = InMask.test(I) ? PoisonMaskElem : I;
@@ -9867,8 +9868,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9867
9868
if (CondNumElements != VecTyNumElements) {
9868
9869
// When the return type is i1 but the source is fixed vector type, we
9869
9870
// need to duplicate the condition value.
9870
- VecCost += TTI-> getShuffleCost(
9871
- TTI::SK_PermuteSingleSrc, CondType,
9871
+ VecCost += :: getShuffleCost(
9872
+ *TTI, TTI::SK_PermuteSingleSrc, CondType,
9872
9873
createReplicatedMask(VecTyNumElements / CondNumElements,
9873
9874
CondNumElements));
9874
9875
}
@@ -10851,9 +10852,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10851
10852
SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
10852
10853
std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
10853
10854
OrigMask.begin());
10854
- C = TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc,
10855
- getWidenedType(TE->getMainOp()->getType(), VecVF),
10856
- OrigMask);
10855
+ C = :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
10856
+ getWidenedType(TE->getMainOp()->getType(), VecVF),
10857
+ OrigMask);
10857
10858
LLVM_DEBUG(
10858
10859
dbgs() << "SLP: Adding cost " << C
10859
10860
<< " for final shuffle of insertelement external users.\n";
@@ -10883,7 +10884,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10883
10884
static_cast<int>(Data.index()) == Data.value());
10884
10885
})) {
10885
10886
InstructionCost C =
10886
- TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
10887
+ :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FTy, Mask);
10887
10888
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
10888
10889
<< " for final shuffle of insertelement "
10889
10890
"external users.\n";
@@ -11584,8 +11585,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11584
11585
TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
11585
11586
/*Extract*/ false, CostKind);
11586
11587
if (DuplicateNonConst)
11587
- Cost += TTI-> getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
11588
- VecTy, ShuffleMask);
11588
+ Cost += :: getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
11589
+ VecTy, ShuffleMask);
11589
11590
return Cost;
11590
11591
}
11591
11592
0 commit comments