Skip to content

Commit 37b4441

Browse files
committed
[SLP] NFC: Introduce and use getDataFlowCost
Change-Id: I6a9155f4af3f8ccc943ab9d46c07dab07dc9b5c5
1 parent 39f7846 commit 37b4441

File tree

7 files changed

+123
-6
lines changed

7 files changed

+123
-6
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,14 @@ class TargetTransformInfo {
15341534
Function *F, Type *RetTy, ArrayRef<Type *> Tys,
15351535
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
15361536

1537+
/// \returns The cost of propagating Type \p DataType through Basic Block /
1538+
/// function boundaries. If \p IsCallingConv is specified, then \p DataType is
1539+
/// associated with either a function argument or return. Otherwise, \p
1540+
/// DataType is used in either a GEP instruction, or spans across BasicBlocks
1541+
/// (this is relevant because SelectionDAG builder may, for example, scalarize
1542+
/// illegal vectors across blocks, which introduces extract/insert code).
1543+
InstructionCost getDataFlowCost(Type *DataType, bool IsCallingConv) const;
1544+
15371545
/// \returns The number of pieces into which the provided type must be
15381546
/// split during legalization. Zero is returned when the answer is unknown.
15391547
unsigned getNumberOfParts(Type *Tp) const;
@@ -2096,6 +2104,8 @@ class TargetTransformInfo::Concept {
20962104
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
20972105
ArrayRef<Type *> Tys,
20982106
TTI::TargetCostKind CostKind) = 0;
2107+
virtual InstructionCost getDataFlowCost(Type *DataType,
2108+
bool IsCallingConv) = 0;
20992109
virtual unsigned getNumberOfParts(Type *Tp) = 0;
21002110
virtual InstructionCost
21012111
getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
@@ -2781,6 +2791,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
27812791
TTI::TargetCostKind CostKind) override {
27822792
return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
27832793
}
2794+
InstructionCost getDataFlowCost(Type *DataType, bool IsCallingConv) override {
2795+
return Impl.getDataFlowCost(DataType, IsCallingConv);
2796+
}
27842797
unsigned getNumberOfParts(Type *Tp) override {
27852798
return Impl.getNumberOfParts(Tp);
27862799
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,10 @@ class TargetTransformInfoImplBase {
772772
return 1;
773773
}
774774

775+
InstructionCost getDataFlowCost(Type *DataType, bool IsCallingConv) const {
776+
return 0;
777+
}
778+
775779
// Assume that we have a register of the right size for the type.
776780
unsigned getNumberOfParts(Type *Tp) const { return 1; }
777781

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2410,6 +2410,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
24102410
return 10;
24112411
}
24122412

2413+
InstructionCost getDataFlowCost(Type *DataType, bool IsCallingConv) {
2414+
return 0;
2415+
}
2416+
24132417
unsigned getNumberOfParts(Type *Tp) {
24142418
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
24152419
return LT.first.isValid() ? *LT.first.getValue() : 0;

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,13 @@ TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
11161116
return Cost;
11171117
}
11181118

1119+
InstructionCost TargetTransformInfo::getDataFlowCost(Type *DataType,
1120+
bool IsCallingConv) const {
1121+
InstructionCost Cost = TTIImpl->getDataFlowCost(DataType, IsCallingConv);
1122+
assert(Cost >= 0 && "TTI should not produce negative costs!");
1123+
return Cost;
1124+
}
1125+
11191126
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
11201127
return TTIImpl->getNumberOfParts(Tp);
11211128
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,14 @@ bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
306306
return !F || !ST->isSingleLaneExecution(*F);
307307
}
308308

309+
InstructionCost GCNTTIImpl::getDataFlowCost(Type *DataType,
310+
bool IsCallingConv) {
311+
if (isTypeLegal(DataType) || IsCallingConv)
312+
return BaseT::getDataFlowCost(DataType, IsCallingConv);
313+
314+
return getNumberOfParts(DataType);
315+
}
316+
309317
unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
310318
// NB: RCID is not an RCID. In fact it is 0 or 1 for scalar or vector
311319
// registers. See getRegisterClassForType for the implementation.

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
161161
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
162162
const Instruction *I = nullptr);
163163

164+
InstructionCost getDataFlowCost(Type *DataType, bool IsCallingConv);
165+
164166
bool isInlineAsmSourceOfDivergence(const CallInst *CI,
165167
ArrayRef<unsigned> Indices = {}) const;
166168

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9044,6 +9044,51 @@ static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
90449044
return ArgTys;
90459045
}
90469046

9047+
// The cost model may determine that vectorizing and eliminating a series of
9048+
// ExtractElements is beneficial. However, if the input vector is a function
9049+
// argument, the calling convention may require extractions in the geneerated
9050+
// code. In this scenario, vectorizaino would then not eliminate the
9051+
// ExtractElement sequence, but would add additional vectorization code.
9052+
// getCCCostFromScalars does the proper accounting for this.
9053+
static unsigned getCCCostFromScalars(ArrayRef<Value *> &Scalars,
9054+
unsigned ScalarSize,
9055+
TargetTransformInfo *TTI) {
9056+
SetVector<Value *> ArgRoots;
9057+
for (unsigned I = 0; I < ScalarSize; I++) {
9058+
auto *Scalar = Scalars[I];
9059+
if (!Scalar)
9060+
continue;
9061+
auto *EE = dyn_cast<ExtractElementInst>(Scalar);
9062+
if (!EE)
9063+
continue;
9064+
9065+
auto *Vec = EE->getOperand(0);
9066+
if (!Vec->getType()->isVectorTy())
9067+
continue;
9068+
9069+
auto F = EE->getFunction();
9070+
auto FoundIt = find_if(
9071+
F->args(), [&Vec](Argument &I) { return Vec == cast<Value>(&I); });
9072+
9073+
if (FoundIt == F->arg_end())
9074+
continue;
9075+
9076+
if (!ArgRoots.contains(Vec))
9077+
ArgRoots.insert(Vec);
9078+
}
9079+
9080+
if (!ArgRoots.size())
9081+
return 0;
9082+
9083+
unsigned Cost = 0;
9084+
for (auto ArgOp : ArgRoots) {
9085+
Cost += TTI->getDataFlowCost(ArgOp->getType(), /*IsCallingConv*/ true)
9086+
.getValue()
9087+
.value_or(0);
9088+
}
9089+
return Cost;
9090+
}
9091+
90479092
InstructionCost
90489093
BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
90499094
SmallPtrSetImpl<Value *> &CheckedExtracts) {
@@ -9075,15 +9120,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
90759120
auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF);
90769121

90779122
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
9123+
InstructionCost CommonCost = getCCCostFromScalars(VL, VL.size(), TTI);
90789124
if (E->State == TreeEntry::NeedToGather) {
90799125
if (allConstant(VL))
9080-
return 0;
9126+
return CommonCost;
90819127
if (isa<InsertElementInst>(VL[0]))
90829128
return InstructionCost::getInvalid();
9083-
return processBuildVector<ShuffleCostEstimator, InstructionCost>(
9084-
E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
9129+
return CommonCost +
9130+
processBuildVector<ShuffleCostEstimator, InstructionCost>(
9131+
E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
90859132
}
9086-
InstructionCost CommonCost = 0;
90879133
SmallVector<int> Mask;
90889134
bool IsReverseOrder = isReverseOrder(E->ReorderIndices);
90899135
if (!E->ReorderIndices.empty() &&
@@ -10241,6 +10287,31 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1024110287

1024210288
InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts);
1024310289
Cost += C;
10290+
10291+
// Calculate the cost difference of propagating a vector vs series of scalars
10292+
// across blocks. This may be nonzero in the case of illegal vectors.
10293+
Instruction *VL0 = TE.getMainOp();
10294+
bool IsAPhi = VL0 && isa<PHINode>(VL0);
10295+
bool HasNextEntry = VL0 && ((I + 1) < VectorizableTree.size());
10296+
bool LiveThru = false;
10297+
if (HasNextEntry) {
10298+
Instruction *VL1 = VectorizableTree[I + 1]->getMainOp();
10299+
LiveThru = VL1 && (VL0->getParent() != VL1->getParent());
10300+
}
10301+
if (IsAPhi || LiveThru) {
10302+
VectorType *VTy = dyn_cast<VectorType>(VL0->getType());
10303+
Type *ScalarTy = VTy ? VTy->getElementType() : VL0->getType();
10304+
if (ScalarTy && isValidElementType(ScalarTy)) {
10305+
InstructionCost ScalarDFlow =
10306+
TTI->getDataFlowCost(ScalarTy,
10307+
/*IsCallingConv*/ false) *
10308+
TE.getVectorFactor();
10309+
InstructionCost VectorDFlow =
10310+
TTI->getDataFlowCost(FixedVectorType::get(ScalarTy, TE.getVectorFactor()), /*IsCallingConv*/ false);
10311+
Cost += (VectorDFlow - ScalarDFlow);
10312+
}
10313+
}
10314+
1024410315
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle "
1024510316
<< shortBundleName(TE.Scalars) << ".\n"
1024610317
<< "SLP: Current total cost = " << Cost << "\n");
@@ -10257,15 +10328,24 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1025710328
for (ExternalUser &EU : ExternalUses) {
1025810329
// We only add extract cost once for the same scalar.
1025910330
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
10260-
!ExtractCostCalculated.insert(EU.Scalar).second)
10331+
!ExtractCostCalculated.insert(EU.Scalar).second) {
1026110332
continue;
10333+
}
1026210334

1026310335
// Uses by ephemeral values are free (because the ephemeral value will be
1026410336
// removed prior to code generation, and so the extraction will be
1026510337
// removed as well).
1026610338
if (EphValues.count(EU.User))
1026710339
continue;
1026810340

10341+
// Account for any additional costs required by CallingConvention for the
10342+
// type.
10343+
if (isa_and_nonnull<ReturnInst>(EU.User)) {
10344+
Cost +=
10345+
TTI->getDataFlowCost(EU.Scalar->getType(), /*IsCallingConv*/ true);
10346+
continue;
10347+
}
10348+
1026910349
// No extract cost for vector "scalar"
1027010350
if (isa<FixedVectorType>(EU.Scalar->getType()))
1027110351
continue;
@@ -10566,7 +10646,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1056610646
if (ViewSLPTree)
1056710647
ViewGraph(this, "SLP" + F->getName(), false, Str);
1056810648
#endif
10569-
1057010649
return Cost;
1057110650
}
1057210651

0 commit comments

Comments
 (0)