Skip to content

Commit 77941eb

Browse files
authored
[CostModel] Add a DstTy to getShuffleCost (#141634)
A shuffle will take two input vectors and a mask, to produce a new vector of size <MaskElts x SrcEltTy>. Historically it has been assumed that the SrcTy and the DstTy are the same for getShuffleCost, with that being relaxed in recent years. If the Tp passed to getShuffleCost is the SrcTy, then the DstTy can be calculated from the Mask elts and the src elt size, but the Mask is not always provided and the Tp is not reliably always the SrcTy. This has led to situations notably in the SLP vectorizer but also in the generic cost routines where assumption about how vectors will be legalized are built into the generic cost routines - for example whether they will widen or promote, with the cost modelling assuming they will widen but the default lowering to promote for integer vectors. This patch attempts to start improving that - it originally tried to alter more of the cost model but that too quickly became too many changes at once, so this patch just plumbs in a DstTy to getShuffleCost so that DstTy and SrcTy can be reliably distinguished. The callers of getShuffleCost have been updated to try and include a DstTy that is more accurate. Otherwise it tries to be fairly non-functional, keeping the SrcTy used as the primary type used in shuffle cost routines, only using DstTy where it was in the past (for InsertSubVector for example). Some asserts have been added that help to check for consistent values when a Mask and a DstTy are provided to getShuffleCost. Some of them took a while to get right, and some non-mask calls might still be incorrect. Hopefully this will provide a useful base to build more shuffles that alter size.
1 parent ea32139 commit 77941eb

25 files changed

+444
-348
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,16 +1381,16 @@ class TargetTransformInfo {
13811381
const SmallBitVector &OpcodeMask,
13821382
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
13831383

1384-
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1385-
/// The exact mask may be passed as Mask, or else the array will be empty.
1386-
/// The index and subtype parameters are used by the subvector insertion and
1387-
/// extraction shuffle kinds to show the insert/extract point and the type of
1388-
/// the subvector being inserted/extracted. The operands of the shuffle can be
1389-
/// passed through \p Args, which helps improve the cost estimation in some
1390-
/// cases, like in broadcast loads.
1391-
/// NOTE: For subvector extractions Tp represents the source type.
1384+
/// \return The cost of a shuffle instruction of kind Kind with inputs of type
1385+
/// SrcTy, producing a vector of type DstTy. The exact mask may be passed as
1386+
/// Mask, or else the array will be empty. The Index and SubTp parameters
1387+
/// are used by the subvector insertions shuffle kinds to show the insert
1388+
/// point and the type of the subvector being inserted. The operands of the
1389+
/// shuffle can be passed through \p Args, which helps improve the cost
1390+
/// estimation in some cases, like in broadcast loads.
13921391
LLVM_ABI InstructionCost getShuffleCost(
1393-
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = {},
1392+
ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1393+
ArrayRef<int> Mask = {},
13941394
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
13951395
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = {},
13961396
const Instruction *CxtI = nullptr) const;

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -710,9 +710,9 @@ class TargetTransformInfoImplBase {
710710
}
711711

712712
virtual InstructionCost
713-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
714-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
715-
ArrayRef<const Value *> Args = {},
713+
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
714+
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
715+
VectorType *SubTp, ArrayRef<const Value *> Args = {},
716716
const Instruction *CxtI = nullptr) const {
717717
return 1;
718718
}
@@ -1541,13 +1541,14 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
15411541
return 0;
15421542

15431543
if (Shuffle->isExtractSubvectorMask(SubIndex))
1544-
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1545-
Mask, CostKind, SubIndex, VecTy,
1546-
Operands, Shuffle);
1544+
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1545+
VecSrcTy, Mask, CostKind, SubIndex,
1546+
VecTy, Operands, Shuffle);
15471547

15481548
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
15491549
return TargetTTI->getShuffleCost(
1550-
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1550+
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1551+
SubIndex,
15511552
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
15521553
Operands, Shuffle);
15531554

@@ -1576,62 +1577,69 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
15761577

15771578
return TargetTTI->getShuffleCost(
15781579
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1579-
AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1580+
VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
15801581
}
15811582

15821583
// Narrowing shuffle - perform shuffle at original wider width and
15831584
// then extract the lower elements.
1585+
// FIXME: This can assume widening, which is not true of all vector
1586+
// architectures (and is not even the default).
15841587
AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
15851588

15861589
InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
15871590
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1588-
VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1591+
VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1592+
Shuffle);
15891593

15901594
SmallVector<int, 16> ExtractMask(Mask.size());
15911595
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
15921596
return ShuffleCost + TargetTTI->getShuffleCost(
1593-
TTI::SK_ExtractSubvector, VecSrcTy,
1597+
TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
15941598
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
15951599
}
15961600

15971601
if (Shuffle->isIdentity())
15981602
return 0;
15991603

16001604
if (Shuffle->isReverse())
1601-
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1602-
0, nullptr, Operands, Shuffle);
1605+
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1606+
CostKind, 0, nullptr, Operands,
1607+
Shuffle);
16031608

16041609
if (Shuffle->isSelect())
1605-
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1606-
0, nullptr, Operands, Shuffle);
1610+
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1611+
CostKind, 0, nullptr, Operands,
1612+
Shuffle);
16071613

16081614
if (Shuffle->isTranspose())
1609-
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1610-
CostKind, 0, nullptr, Operands,
1615+
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1616+
Mask, CostKind, 0, nullptr, Operands,
16111617
Shuffle);
16121618

16131619
if (Shuffle->isZeroEltSplat())
1614-
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1615-
CostKind, 0, nullptr, Operands,
1620+
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1621+
Mask, CostKind, 0, nullptr, Operands,
16161622
Shuffle);
16171623

16181624
if (Shuffle->isSingleSource())
1619-
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1620-
CostKind, 0, nullptr, Operands,
1621-
Shuffle);
1625+
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1626+
VecSrcTy, Mask, CostKind, 0, nullptr,
1627+
Operands, Shuffle);
16221628

16231629
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
16241630
return TargetTTI->getShuffleCost(
1625-
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1631+
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
16261632
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
16271633
Shuffle);
16281634

16291635
if (Shuffle->isSplice(SubIndex))
1630-
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1631-
SubIndex, nullptr, Operands, Shuffle);
1636+
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1637+
CostKind, SubIndex, nullptr, Operands,
1638+
Shuffle);
16321639

1633-
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1634-
CostKind, 0, nullptr, Operands, Shuffle);
1640+
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1641+
Mask, CostKind, 0, nullptr, Operands,
1642+
Shuffle);
16351643
}
16361644
case Instruction::ExtractElement: {
16371645
auto *EEI = dyn_cast<ExtractElementInst>(U);

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
329329
// Cost the call + mask.
330330
auto Cost =
331331
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
332-
if (VD->isMasked())
333-
Cost += thisT()->getShuffleCost(
334-
TargetTransformInfo::SK_Broadcast,
335-
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
336-
nullptr, {});
332+
if (VD->isMasked()) {
333+
auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
334+
Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
335+
VecTy, {}, CostKind, 0, nullptr, {});
336+
}
337337

338338
// Lowering to a library call (with output pointers) may require us to emit
339339
// reloads for the results.
@@ -1101,11 +1101,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11011101

11021102
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,
11031103
ArrayRef<int> Mask,
1104-
VectorType *Ty, int &Index,
1104+
VectorType *SrcTy, int &Index,
11051105
VectorType *&SubTy) const {
11061106
if (Mask.empty())
11071107
return Kind;
1108-
int NumSrcElts = Ty->getElementCount().getKnownMinValue();
1108+
int NumSrcElts = SrcTy->getElementCount().getKnownMinValue();
11091109
switch (Kind) {
11101110
case TTI::SK_PermuteSingleSrc: {
11111111
if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
@@ -1116,7 +1116,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11161116
return TTI::SK_Broadcast;
11171117
if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
11181118
(Index + Mask.size()) <= (size_t)NumSrcElts) {
1119-
SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
1119+
SubTy = FixedVectorType::get(SrcTy->getElementType(), Mask.size());
11201120
return TTI::SK_ExtractSubvector;
11211121
}
11221122
break;
@@ -1127,7 +1127,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11271127
Mask, NumSrcElts, NumSubElts, Index)) {
11281128
if (Index + NumSubElts > NumSrcElts)
11291129
return Kind;
1130-
SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts);
1130+
SubTy = FixedVectorType::get(SrcTy->getElementType(), NumSubElts);
11311131
return TTI::SK_InsertSubvector;
11321132
}
11331133
if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))
@@ -1151,13 +1151,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11511151
}
11521152

11531153
InstructionCost
1154-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
1155-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
1156-
ArrayRef<const Value *> Args = {},
1154+
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1155+
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
1156+
VectorType *SubTp, ArrayRef<const Value *> Args = {},
11571157
const Instruction *CxtI = nullptr) const override {
1158-
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
1158+
switch (improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp)) {
11591159
case TTI::SK_Broadcast:
1160-
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1160+
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
11611161
return getBroadcastShuffleOverhead(FVT, CostKind);
11621162
return InstructionCost::getInvalid();
11631163
case TTI::SK_Select:
@@ -1166,14 +1166,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11661166
case TTI::SK_Transpose:
11671167
case TTI::SK_PermuteSingleSrc:
11681168
case TTI::SK_PermuteTwoSrc:
1169-
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1169+
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
11701170
return getPermuteShuffleOverhead(FVT, CostKind);
11711171
return InstructionCost::getInvalid();
11721172
case TTI::SK_ExtractSubvector:
1173-
return getExtractSubvectorOverhead(Tp, CostKind, Index,
1173+
return getExtractSubvectorOverhead(SrcTy, CostKind, Index,
11741174
cast<FixedVectorType>(SubTp));
11751175
case TTI::SK_InsertSubvector:
1176-
return getInsertSubvectorOverhead(Tp, CostKind, Index,
1176+
return getInsertSubvectorOverhead(DstTy, CostKind, Index,
11771177
cast<FixedVectorType>(SubTp));
11781178
}
11791179
llvm_unreachable("Unknown TTI::ShuffleKind");
@@ -1910,6 +1910,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19101910
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
19111911
unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
19121912
return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1913+
cast<VectorType>(RetTy),
19131914
cast<VectorType>(Args[0]->getType()), {},
19141915
CostKind, Index, cast<VectorType>(RetTy));
19151916
}
@@ -1920,17 +1921,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19201921
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
19211922
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
19221923
return thisT()->getShuffleCost(
1923-
TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), {},
1924-
CostKind, Index, cast<VectorType>(Args[1]->getType()));
1924+
TTI::SK_InsertSubvector, cast<VectorType>(RetTy),
1925+
cast<VectorType>(Args[0]->getType()), {}, CostKind, Index,
1926+
cast<VectorType>(Args[1]->getType()));
19251927
}
19261928
case Intrinsic::vector_reverse: {
1927-
return thisT()->getShuffleCost(TTI::SK_Reverse,
1929+
return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
19281930
cast<VectorType>(Args[0]->getType()), {},
19291931
CostKind, 0, cast<VectorType>(RetTy));
19301932
}
19311933
case Intrinsic::vector_splice: {
19321934
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1933-
return thisT()->getShuffleCost(TTI::SK_Splice,
1935+
return thisT()->getShuffleCost(TTI::SK_Splice, cast<VectorType>(RetTy),
19341936
cast<VectorType>(Args[0]->getType()), {},
19351937
CostKind, Index, cast<VectorType>(RetTy));
19361938
}
@@ -2376,8 +2378,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23762378
CostKind, 1, nullptr, nullptr);
23772379
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
23782380
CostKind, 0, nullptr, nullptr);
2379-
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, {}, CostKind,
2380-
0, nullptr);
2381+
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, SearchTy, {},
2382+
CostKind, 0, nullptr);
23812383
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
23822384
CmpInst::ICMP_EQ, CostKind);
23832385
Cost +=
@@ -2961,8 +2963,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
29612963
while (NumVecElts > MVTLen) {
29622964
NumVecElts /= 2;
29632965
VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2964-
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2965-
CostKind, NumVecElts, SubTy);
2966+
ShuffleCost += thisT()->getShuffleCost(
2967+
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
29662968
ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
29672969
Ty = SubTy;
29682970
++LongVectorCount;
@@ -2978,7 +2980,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
29782980
// By default reductions need one shuffle per reduction level.
29792981
ShuffleCost +=
29802982
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2981-
{}, CostKind, 0, Ty);
2983+
Ty, {}, CostKind, 0, Ty);
29822984
ArithCost +=
29832985
NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
29842986
return ShuffleCost + ArithCost +
@@ -3052,8 +3054,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30523054
NumVecElts /= 2;
30533055
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
30543056

3055-
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
3056-
CostKind, NumVecElts, SubTy);
3057+
ShuffleCost += thisT()->getShuffleCost(
3058+
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
30573059

30583060
IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
30593061
MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
@@ -3069,7 +3071,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30693071
// architecture-dependent length.
30703072
ShuffleCost +=
30713073
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
3072-
{}, CostKind, 0, Ty);
3074+
Ty, {}, CostKind, 0, Ty);
30733075
IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
30743076
MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
30753077
// The last min/max should be in vector registers and we counted it above.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -985,11 +985,16 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
985985
}
986986

987987
InstructionCost TargetTransformInfo::getShuffleCost(
988-
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
988+
ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef<int> Mask,
989989
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
990990
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
991-
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
992-
Index, SubTp, Args, CxtI);
991+
assert((Mask.empty() || DstTy->isScalableTy() ||
992+
Mask.size() == DstTy->getElementCount().getKnownMinValue()) &&
993+
"Expected the Mask to match the return size if given");
994+
assert(SrcTy->getScalarType() == DstTy->getScalarType() &&
995+
"Expected the same scalar types");
996+
InstructionCost Cost = TTIImpl->getShuffleCost(
997+
Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp, Args, CxtI);
993998
assert(Cost >= 0 && "TTI should not produce negative costs!");
994999
return Cost;
9951000
}

0 commit comments

Comments
 (0)