@@ -390,6 +390,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
390
390
cl::desc(
391
391
" Enable vectorization of early exit loops with uncountable exits." ));
392
392
393
+ static cl::opt<unsigned > MaxNumPotentiallyFaultingPointers (
394
+ " max-num-faulting-pointers" , cl::init(1 ), cl::Hidden,
395
+ cl::desc(
396
+ " The maximum number of potentially faulting pointers we permit when "
397
+ " vectorizing loops with uncountable exits." ));
398
+
393
399
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
394
400
// variables not overflowing do not hold. See `emitSCEVChecks`.
395
401
static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1582,6 +1588,22 @@ class LoopVectorizationCostModel {
1582
1588
ElementCount MaxSafeVF,
1583
1589
bool FoldTailByMasking);
1584
1590
1591
+ bool isSafeForAnyVectorWidth () const {
1592
+ return Legal->isSafeForAnyVectorWidth () &&
1593
+ (!Legal->hasUncountableEarlyExit () ||
1594
+ !Legal->getNumPotentiallyFaultingLoads ());
1595
+ }
1596
+
1597
+ uint64_t getMaxSafeVectorWidthInBits () const {
1598
+ uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits ();
1599
+ // The legalizer bails out if getMinPageSize does not return a value.
1600
+ if (Legal->hasUncountableEarlyExit () &&
1601
+ Legal->getNumPotentiallyFaultingLoads ())
1602
+ MaxSafeVectorWidth =
1603
+ std::min (MaxSafeVectorWidth, uint64_t (*TTI.getMinPageSize ()) * 8 );
1604
+ return MaxSafeVectorWidth;
1605
+ }
1606
+
1585
1607
// / Checks if scalable vectorization is supported and enabled. Caches the
1586
1608
// / result to avoid repeated debug dumps for repeated queries.
1587
1609
bool isScalableVectorizationAllowed ();
@@ -2123,6 +2145,41 @@ class GeneratedRTChecks {
2123
2145
};
2124
2146
} // namespace
2125
2147
2148
+ std::optional<unsigned > getMaxVScale (const Function &F,
2149
+ const TargetTransformInfo &TTI) {
2150
+ if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2151
+ return MaxVScale;
2152
+
2153
+ if (F.hasFnAttribute (Attribute::VScaleRange))
2154
+ return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2155
+
2156
+ return std::nullopt;
2157
+ }
2158
+
2159
+ static void addPointerAlignmentChecks (
2160
+ const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads,
2161
+ Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2162
+ ElementCount VF) {
2163
+ ScalarEvolution *SE = PSE.getSE ();
2164
+ const DataLayout &DL = SE->getDataLayout ();
2165
+ Type *PtrIntType = DL.getIntPtrType (SE->getContext ());
2166
+
2167
+ const SCEV *Zero = SE->getZero (PtrIntType);
2168
+ const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF);
2169
+
2170
+ for (auto Load : *Loads) {
2171
+ APInt EltSize (
2172
+ DL.getIndexTypeSizeInBits (Load.first ->getPointerOperandType ()),
2173
+ DL.getTypeStoreSize (Load.first ->getType ()).getFixedValue ());
2174
+ const SCEV *Start = SE->getPtrToIntExpr (Load.second , PtrIntType);
2175
+ const SCEV *Align =
2176
+ SE->getMulExpr (ScevEC, SE->getConstant (EltSize),
2177
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
2178
+ const SCEV *Rem = SE->getURemExpr (Start, Align);
2179
+ PSE.addPredicate (*(SE->getEqualPredicate (Rem, Zero)));
2180
+ }
2181
+ }
2182
+
2126
2183
static bool useActiveLaneMask (TailFoldingStyle Style) {
2127
2184
return Style == TailFoldingStyle::Data ||
2128
2185
Style == TailFoldingStyle::DataAndControlFlow ||
@@ -2292,17 +2349,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
2292
2349
llvm_unreachable (" invalid enum" );
2293
2350
}
2294
2351
2295
- std::optional<unsigned > getMaxVScale (const Function &F,
2296
- const TargetTransformInfo &TTI) {
2297
- if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2298
- return MaxVScale;
2299
-
2300
- if (F.hasFnAttribute (Attribute::VScaleRange))
2301
- return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2302
-
2303
- return std::nullopt;
2304
- }
2305
-
2306
2352
// / For the given VF and UF and maximum trip count computed for the loop, return
2307
2353
// / whether the induction variable might overflow in the vectorized loop. If not,
2308
2354
// / then we know a runtime overflow check always evaluates to false and can be
@@ -3899,13 +3945,22 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
3899
3945
return false ;
3900
3946
}
3901
3947
3902
- if (!Legal-> isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3948
+ if (!isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3903
3949
reportVectorizationInfo (" The target does not provide maximum vscale value "
3904
3950
" for safe distance analysis." ,
3905
3951
" ScalableVFUnfeasible" , ORE, TheLoop);
3906
3952
return false ;
3907
3953
}
3908
3954
3955
+ if (Legal->hasUncountableEarlyExit () &&
3956
+ Legal->getNumPotentiallyFaultingLoads () &&
3957
+ !TTI.isVScaleKnownToBeAPowerOfTwo ()) {
3958
+ reportVectorizationInfo (" Cannot vectorize potentially faulting early exit "
3959
+ " loop with scalable vectors." ,
3960
+ " ScalableVFUnfeasible" , ORE, TheLoop);
3961
+ return false ;
3962
+ }
3963
+
3909
3964
IsScalableVectorizationAllowed = true ;
3910
3965
return true ;
3911
3966
}
@@ -3917,7 +3972,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
3917
3972
3918
3973
auto MaxScalableVF = ElementCount::getScalable (
3919
3974
std::numeric_limits<ElementCount::ScalarTy>::max ());
3920
- if (Legal-> isSafeForAnyVectorWidth ())
3975
+ if (isSafeForAnyVectorWidth ())
3921
3976
return MaxScalableVF;
3922
3977
3923
3978
std::optional<unsigned > MaxVScale = getMaxVScale (*TheFunction, TTI);
@@ -3944,11 +3999,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
3944
3999
// the memory accesses that is most restrictive (involved in the smallest
3945
4000
// dependence distance).
3946
4001
unsigned MaxSafeElements =
3947
- llvm::bit_floor (Legal-> getMaxSafeVectorWidthInBits () / WidestType);
4002
+ llvm::bit_floor (getMaxSafeVectorWidthInBits () / WidestType);
3948
4003
3949
4004
auto MaxSafeFixedVF = ElementCount::getFixed (MaxSafeElements);
3950
4005
auto MaxSafeScalableVF = getMaxLegalScalableVF (MaxSafeElements);
3951
- if (!Legal-> isSafeForAnyVectorWidth ())
4006
+ if (!isSafeForAnyVectorWidth ())
3952
4007
this ->MaxSafeElements = MaxSafeElements;
3953
4008
3954
4009
LLVM_DEBUG (dbgs () << " LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10346,11 +10401,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10346
10401
return false ;
10347
10402
}
10348
10403
10349
- if (LVL.hasUncountableEarlyExit () && !EnableEarlyExitVectorization) {
10350
- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10351
- " early exit is not enabled" ,
10352
- " UncountableEarlyExitLoopsDisabled" , ORE, L);
10353
- return false ;
10404
+ if (LVL.hasUncountableEarlyExit ()) {
10405
+ if (!EnableEarlyExitVectorization) {
10406
+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10407
+ " early exit is not enabled" ,
10408
+ " UncountableEarlyExitLoopsDisabled" , ORE, L);
10409
+ return false ;
10410
+ }
10411
+
10412
+ unsigned NumPotentiallyFaultingPointers =
10413
+ LVL.getNumPotentiallyFaultingLoads ();
10414
+ if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
10415
+ reportVectorizationFailure (" Not worth vectorizing loop with uncountable "
10416
+ " early exit, due to number of potentially "
10417
+ " faulting loads" ,
10418
+ " UncountableEarlyExitMayFault" , ORE, L);
10419
+ return false ;
10420
+ } else if (NumPotentiallyFaultingPointers)
10421
+ LLVM_DEBUG (dbgs () << " LV: Need to version early-exit vector loop with "
10422
+ << " pointer alignment checks.\n " );
10354
10423
}
10355
10424
10356
10425
if (LVL.hasStructVectorCall ()) {
@@ -10508,8 +10577,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10508
10577
unsigned SelectedIC = std::max (IC, UserIC);
10509
10578
// Optimistically generate runtime checks if they are needed. Drop them if
10510
10579
// they turn out to not be profitable.
10511
- if (VF.Width .isVector () || SelectedIC > 1 )
10580
+ if (VF.Width .isVector () || SelectedIC > 1 ) {
10581
+ if (LVL.getNumPotentiallyFaultingLoads ()) {
10582
+ assert (SelectedIC == 1 &&
10583
+ " Interleaving not supported for early exit loops and "
10584
+ " potentially faulting loads" );
10585
+ assert (!CM.foldTailWithEVL () &&
10586
+ " Explicit vector length unsupported for early exit loops and "
10587
+ " potentially faulting loads" );
10588
+ addPointerAlignmentChecks (LVL.getPotentiallyFaultingLoads (), F, PSE,
10589
+ TTI, VF.Width );
10590
+ }
10512
10591
Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10592
+ }
10513
10593
10514
10594
// Check if it is profitable to vectorize with runtime checks.
10515
10595
bool ForceVectorization =
0 commit comments