@@ -959,6 +959,10 @@ class LoopVectorizationCostModel {
959
959
return expectedCost (UserVF).isValid ();
960
960
}
961
961
962
+ // / \return True if maximizing vector bandwidth is enabled by the target or
963
+ // / user options.
964
+ bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
965
+
962
966
// / \return The size (in bits) of the smallest and widest types in the code
963
967
// / that needs to be vectorized. We ignore values that remain scalar such as
964
968
// / 64 bit loop indices.
@@ -3926,6 +3930,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3926
3930
return FixedScalableVFPair::getNone ();
3927
3931
}
3928
3932
3933
+ bool LoopVectorizationCostModel::useMaxBandwidth (
3934
+ TargetTransformInfo::RegisterKind RegKind) {
3935
+ return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences () == 0 &&
3936
+ (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3937
+ (UseWiderVFIfCallVariantsPresent &&
3938
+ Legal->hasVectorCallVariants ())));
3939
+ }
3940
+
3929
3941
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget (
3930
3942
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
3931
3943
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3991,10 +4003,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
3991
4003
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
3992
4004
: TargetTransformInfo::RGK_FixedWidthVector;
3993
4005
ElementCount MaxVF = MaxVectorElementCount;
3994
- if (MaximizeBandwidth ||
3995
- (MaximizeBandwidth.getNumOccurrences () == 0 &&
3996
- (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3997
- (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants ())))) {
4006
+ if (useMaxBandwidth (RegKind)) {
3998
4007
auto MaxVectorElementCountMaxBW = ElementCount::get (
3999
4008
llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
4000
4009
ComputeScalableMaxVF);
@@ -4357,7 +4366,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4357
4366
4358
4367
// / Don't consider the VF if it exceeds the number of registers for the
4359
4368
// / target.
4360
- if (RU.exceedsMaxNumRegs (TTI))
4369
+ if (CM.useMaxBandwidth (VF.isScalable ()
4370
+ ? TargetTransformInfo::RGK_ScalableVector
4371
+ : TargetTransformInfo::RGK_FixedWidthVector) &&
4372
+ RU.exceedsMaxNumRegs (TTI))
4361
4373
continue ;
4362
4374
4363
4375
InstructionCost C = CM.expectedCost (VF);
@@ -7127,7 +7139,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7127
7139
InstructionCost Cost = cost (*P, VF);
7128
7140
VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7129
7141
7130
- if (RU.exceedsMaxNumRegs (TTI)) {
7142
+ if (CM.useMaxBandwidth (VF.isScalable ()
7143
+ ? TargetTransformInfo::RGK_ScalableVector
7144
+ : TargetTransformInfo::RGK_FixedWidthVector) &&
7145
+ RU.exceedsMaxNumRegs (TTI)) {
7131
7146
LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
7132
7147
<< VF << " because it uses too many registers\n " );
7133
7148
continue ;
0 commit comments