Skip to content

Commit c918bde

Browse files
committed
[ARM][SLP] Fix incorrect cost function for SLP Vectorization of ZExt/SExt
PR #117350 made changes to the SLP vectorizer which introduced a regression on ARM vectorization benchmarks. This was due to the changes assuming that SExt/ZExt vector instructions have constant cost. This behaviour is expected for RISCV but not on ARM where we take into account source and destination type of SExt/ZExt instructions when calculating vector cost. Change-Id: I6f995dcde26e5aaf62b779b63e52988fb333f941
1 parent 05bd7d2 commit c918bde

File tree

6 files changed

+29
-1
lines changed

6 files changed

+29
-1
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,11 @@ class TargetTransformInfo {
11841184
/// \return true if vscale is known to be a power of 2
11851185
bool isVScaleKnownToBeAPowerOfTwo() const;
11861186

1187+
// \return true if vector implementations assume that SExt and ZExt
1188+
// instructions have a fixed cost.
1189+
bool isSExtCostConstant() const;
1190+
bool isZExtCostConstant() const;
1191+
11871192
/// \return True if the vectorization factor should be chosen to
11881193
/// make the vector of the smallest element type match the size of a
11891194
/// vector register. For wider element types, this could result in
@@ -2065,6 +2070,8 @@ class TargetTransformInfo::Concept {
20652070
virtual std::optional<unsigned> getMaxVScale() const = 0;
20662071
virtual std::optional<unsigned> getVScaleForTuning() const = 0;
20672072
virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
2073+
virtual bool isSExtCostConstant() const = 0;
2074+
virtual bool isZExtCostConstant() const = 0;
20682075
virtual bool
20692076
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
20702077
virtual ElementCount getMinimumVF(unsigned ElemWidth,
@@ -2719,6 +2726,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
27192726
bool isVScaleKnownToBeAPowerOfTwo() const override {
27202727
return Impl.isVScaleKnownToBeAPowerOfTwo();
27212728
}
2729+
bool isSExtCostConstant() const override { return Impl.isSExtCostConstant(); }
2730+
bool isZExtCostConstant() const override { return Impl.isZExtCostConstant(); }
27222731
bool shouldMaximizeVectorBandwidth(
27232732
TargetTransformInfo::RegisterKind K) const override {
27242733
return Impl.shouldMaximizeVectorBandwidth(K);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,8 @@ class TargetTransformInfoImplBase {
530530
std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
531531
std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
532532
bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
533+
bool isSExtCostConstant() const { return true; }
534+
bool isZExtCostConstant() const { return true; }
533535

534536
bool
535537
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
801801
std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
802802
bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
803803

804+
bool isSExtCostConstant() const { return true; }
805+
bool isZExtCostConstant() const { return true; }
806+
804807
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
805808
/// are set if the demanded result elements need to be inserted and/or
806809
/// extracted from vectors.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,14 @@ bool TargetTransformInfo::isProfitableToSinkOperands(
14051405
return TTIImpl->isProfitableToSinkOperands(I, OpsToSink);
14061406
}
14071407

1408+
bool TargetTransformInfo::isSExtCostConstant() const {
1409+
return TTIImpl->isSExtCostConstant();
1410+
}
1411+
1412+
bool TargetTransformInfo::isZExtCostConstant() const {
1413+
return TTIImpl->isZExtCostConstant();
1414+
}
1415+
14081416
bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
14091417
return TTIImpl->isVectorShiftByScalarCheap(Ty);
14101418
}

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
109109

110110
bool enableInterleavedAccessVectorization() { return true; }
111111

112+
// Cost model for vector SExt and ZExt takes into account
113+
// source and destination vector type for MVE and NEON.
114+
bool isSExtCostConstant() const { return false; }
115+
bool isZExtCostConstant() const { return false; }
116+
112117
TTI::AddressingModeKind
113118
getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
114119

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11440,7 +11440,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1144011440
I->getOpcode());
1144111441
});
1144211442
if (IsArithmeticExtendedReduction &&
11443-
(VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt))
11443+
(VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt) &&
11444+
(TTI->isZExtCostConstant() && TTI->isSExtCostConstant()))
1144411445
return CommonCost;
1144511446
return CommonCost +
1144611447
TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind,

0 commit comments

Comments
 (0)