Skip to content

Commit c3ad6a3

Browse files
committed
[LV] Fix MVE regression from llvm#132190
Register pressure was only considered if the vector bandwidth was being maximised (chosen either by the target or user options), but llvm#132190 inadvertently caused high pressure VFs to be pruned even when max bandwidth wasn't enabled. This PR returns to the previous behaviour.
1 parent 54d544b commit c3ad6a3

File tree

1 file changed

+21
-6
lines changed

1 file changed

+21
-6
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,10 @@ class LoopVectorizationCostModel {
959959
return expectedCost(UserVF).isValid();
960960
}
961961

962+
/// \return True if maximizing vector bandwidth is enabled by the target or
963+
/// user options.
964+
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
965+
962966
/// \return The size (in bits) of the smallest and widest types in the code
963967
/// that needs to be vectorized. We ignore values that remain scalar such as
964968
/// 64 bit loop indices.
@@ -3926,6 +3930,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39263930
return FixedScalableVFPair::getNone();
39273931
}
39283932

3933+
bool LoopVectorizationCostModel::useMaxBandwidth(
3934+
TargetTransformInfo::RegisterKind RegKind) {
3935+
return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
3936+
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3937+
(UseWiderVFIfCallVariantsPresent &&
3938+
Legal->hasVectorCallVariants())));
3939+
}
3940+
39293941
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39303942
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39313943
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3991,10 +4003,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39914003
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39924004
: TargetTransformInfo::RGK_FixedWidthVector;
39934005
ElementCount MaxVF = MaxVectorElementCount;
3994-
if (MaximizeBandwidth ||
3995-
(MaximizeBandwidth.getNumOccurrences() == 0 &&
3996-
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3997-
(UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
4006+
if (useMaxBandwidth(RegKind)) {
39984007
auto MaxVectorElementCountMaxBW = ElementCount::get(
39994008
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
40004009
ComputeScalableMaxVF);
@@ -4357,7 +4366,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43574366

43584367
/// Don't consider the VF if it exceeds the number of registers for the
43594368
/// target.
4360-
if (RU.exceedsMaxNumRegs(TTI))
4369+
if (CM.useMaxBandwidth(VF.isScalable()
4370+
? TargetTransformInfo::RGK_ScalableVector
4371+
: TargetTransformInfo::RGK_FixedWidthVector) &&
4372+
RU.exceedsMaxNumRegs(TTI))
43614373
continue;
43624374

43634375
InstructionCost C = CM.expectedCost(VF);
@@ -7127,7 +7139,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71277139
InstructionCost Cost = cost(*P, VF);
71287140
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
71297141

7130-
if (RU.exceedsMaxNumRegs(TTI)) {
7142+
if (CM.useMaxBandwidth(VF.isScalable()
7143+
? TargetTransformInfo::RGK_ScalableVector
7144+
: TargetTransformInfo::RGK_FixedWidthVector) &&
7145+
RU.exceedsMaxNumRegs(TTI)) {
71317146
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
71327147
<< VF << " because it uses too many registers\n");
71337148
continue;

0 commit comments

Comments
 (0)