@@ -401,6 +401,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
401
401
cl::desc(
402
402
" Enable vectorization of early exit loops with uncountable exits." ));
403
403
404
+ static cl::opt<unsigned > MaxNumPotentiallyFaultingPointers (
405
+ " max-num-faulting-pointers" , cl::init(1 ), cl::Hidden,
406
+ cl::desc(
407
+ " The maximum number of potentially faulting pointers we permit when "
408
+ " vectorizing loops with uncountable exits." ));
409
+
404
410
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
405
411
// variables not overflowing do not hold. See `emitSCEVChecks`.
406
412
static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1615,6 +1621,22 @@ class LoopVectorizationCostModel {
1615
1621
ElementCount MaxSafeVF,
1616
1622
bool FoldTailByMasking);
1617
1623
1624
+ bool isSafeForAnyVectorWidth () const {
1625
+ return Legal->isSafeForAnyVectorWidth () &&
1626
+ (!Legal->hasUncountableEarlyExit () ||
1627
+ !Legal->getNumPotentiallyFaultingLoads ());
1628
+ }
1629
+
1630
+ uint64_t getMaxSafeVectorWidthInBits () const {
1631
+ uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits ();
1632
+ // The legalizer bails out if getMinPageSize does not return a value.
1633
+ if (Legal->hasUncountableEarlyExit () &&
1634
+ Legal->getNumPotentiallyFaultingLoads ())
1635
+ MaxSafeVectorWidth =
1636
+ std::min (MaxSafeVectorWidth, uint64_t (*TTI.getMinPageSize ()) * 8 );
1637
+ return MaxSafeVectorWidth;
1638
+ }
1639
+
1618
1640
// / Checks if scalable vectorization is supported and enabled. Caches the
1619
1641
// / result to avoid repeated debug dumps for repeated queries.
1620
1642
bool isScalableVectorizationAllowed ();
@@ -2163,6 +2185,41 @@ class GeneratedRTChecks {
2163
2185
};
2164
2186
} // namespace
2165
2187
2188
+ std::optional<unsigned > getMaxVScale (const Function &F,
2189
+ const TargetTransformInfo &TTI) {
2190
+ if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2191
+ return MaxVScale;
2192
+
2193
+ if (F.hasFnAttribute (Attribute::VScaleRange))
2194
+ return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2195
+
2196
+ return std::nullopt;
2197
+ }
2198
+
2199
+ static void addPointerAlignmentChecks (
2200
+ const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads,
2201
+ Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2202
+ ElementCount VF) {
2203
+ ScalarEvolution *SE = PSE.getSE ();
2204
+ const DataLayout &DL = SE->getDataLayout ();
2205
+ Type *PtrIntType = DL.getIntPtrType (SE->getContext ());
2206
+
2207
+ const SCEV *Zero = SE->getZero (PtrIntType);
2208
+ const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF);
2209
+
2210
+ for (auto Load : *Loads) {
2211
+ APInt EltSize (
2212
+ DL.getIndexTypeSizeInBits (Load.first ->getPointerOperandType ()),
2213
+ DL.getTypeStoreSize (Load.first ->getType ()).getFixedValue ());
2214
+ const SCEV *Start = SE->getPtrToIntExpr (Load.second , PtrIntType);
2215
+ const SCEV *Align =
2216
+ SE->getMulExpr (ScevEC, SE->getConstant (EltSize),
2217
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
2218
+ const SCEV *Rem = SE->getURemExpr (Start, Align);
2219
+ PSE.addPredicate (*(SE->getEqualPredicate (Rem, Zero)));
2220
+ }
2221
+ }
2222
+
2166
2223
static bool useActiveLaneMask (TailFoldingStyle Style) {
2167
2224
return Style == TailFoldingStyle::Data ||
2168
2225
Style == TailFoldingStyle::DataAndControlFlow ||
@@ -2332,17 +2389,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
2332
2389
llvm_unreachable (" invalid enum" );
2333
2390
}
2334
2391
2335
- std::optional<unsigned > getMaxVScale (const Function &F,
2336
- const TargetTransformInfo &TTI) {
2337
- if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2338
- return MaxVScale;
2339
-
2340
- if (F.hasFnAttribute (Attribute::VScaleRange))
2341
- return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2342
-
2343
- return std::nullopt;
2344
- }
2345
-
2346
2392
// / For the given VF and UF and maximum trip count computed for the loop, return
2347
2393
// / whether the induction variable might overflow in the vectorized loop. If not,
2348
2394
// / then we know a runtime overflow check always evaluates to false and can be
@@ -3835,13 +3881,22 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
3835
3881
return false ;
3836
3882
}
3837
3883
3838
- if (!Legal-> isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3884
+ if (!isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3839
3885
reportVectorizationInfo (" The target does not provide maximum vscale value "
3840
3886
" for safe distance analysis." ,
3841
3887
" ScalableVFUnfeasible" , ORE, TheLoop);
3842
3888
return false ;
3843
3889
}
3844
3890
3891
+ if (Legal->hasUncountableEarlyExit () &&
3892
+ Legal->getNumPotentiallyFaultingLoads () &&
3893
+ !TTI.isVScaleKnownToBeAPowerOfTwo ()) {
3894
+ reportVectorizationInfo (" Cannot vectorize potentially faulting early exit "
3895
+ " loop with scalable vectors." ,
3896
+ " ScalableVFUnfeasible" , ORE, TheLoop);
3897
+ return false ;
3898
+ }
3899
+
3845
3900
IsScalableVectorizationAllowed = true ;
3846
3901
return true ;
3847
3902
}
@@ -3853,7 +3908,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
3853
3908
3854
3909
auto MaxScalableVF = ElementCount::getScalable (
3855
3910
std::numeric_limits<ElementCount::ScalarTy>::max ());
3856
- if (Legal-> isSafeForAnyVectorWidth ())
3911
+ if (isSafeForAnyVectorWidth ())
3857
3912
return MaxScalableVF;
3858
3913
3859
3914
std::optional<unsigned > MaxVScale = getMaxVScale (*TheFunction, TTI);
@@ -3880,11 +3935,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
3880
3935
// the memory accesses that is most restrictive (involved in the smallest
3881
3936
// dependence distance).
3882
3937
unsigned MaxSafeElements =
3883
- llvm::bit_floor (Legal-> getMaxSafeVectorWidthInBits () / WidestType);
3938
+ llvm::bit_floor (getMaxSafeVectorWidthInBits () / WidestType);
3884
3939
3885
3940
auto MaxSafeFixedVF = ElementCount::getFixed (MaxSafeElements);
3886
3941
auto MaxSafeScalableVF = getMaxLegalScalableVF (MaxSafeElements);
3887
- if (!Legal-> isSafeForAnyVectorWidth ())
3942
+ if (!isSafeForAnyVectorWidth ())
3888
3943
this ->MaxSafeElements = MaxSafeElements;
3889
3944
3890
3945
LLVM_DEBUG (dbgs () << " LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10428,11 +10483,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10428
10483
return false ;
10429
10484
}
10430
10485
10431
- if (LVL.hasUncountableEarlyExit () && !EnableEarlyExitVectorization) {
10432
- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10433
- " early exit is not enabled" ,
10434
- " UncountableEarlyExitLoopsDisabled" , ORE, L);
10435
- return false ;
10486
+ if (LVL.hasUncountableEarlyExit ()) {
10487
+ if (!EnableEarlyExitVectorization) {
10488
+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10489
+ " early exit is not enabled" ,
10490
+ " UncountableEarlyExitLoopsDisabled" , ORE, L);
10491
+ return false ;
10492
+ }
10493
+
10494
+ unsigned NumPotentiallyFaultingPointers =
10495
+ LVL.getNumPotentiallyFaultingLoads ();
10496
+ if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
10497
+ reportVectorizationFailure (" Not worth vectorizing loop with uncountable "
10498
+ " early exit, due to number of potentially "
10499
+ " faulting loads" ,
10500
+ " UncountableEarlyExitMayFault" , ORE, L);
10501
+ return false ;
10502
+ } else if (NumPotentiallyFaultingPointers)
10503
+ LLVM_DEBUG (dbgs () << " LV: Need to version early-exit vector loop with "
10504
+ << " pointer alignment checks.\n " );
10436
10505
}
10437
10506
10438
10507
if (LVL.hasStructVectorCall ()) {
@@ -10590,8 +10659,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10590
10659
unsigned SelectedIC = std::max (IC, UserIC);
10591
10660
// Optimistically generate runtime checks if they are needed. Drop them if
10592
10661
// they turn out to not be profitable.
10593
- if (VF.Width .isVector () || SelectedIC > 1 )
10662
+ if (VF.Width .isVector () || SelectedIC > 1 ) {
10663
+ if (LVL.getNumPotentiallyFaultingLoads ()) {
10664
+ assert (SelectedIC == 1 &&
10665
+ " Interleaving not supported for early exit loops and "
10666
+ " potentially faulting loads" );
10667
+ assert (!CM.foldTailWithEVL () &&
10668
+ " Explicit vector length unsupported for early exit loops and "
10669
+ " potentially faulting loads" );
10670
+ addPointerAlignmentChecks (LVL.getPotentiallyFaultingLoads (), F, PSE,
10671
+ TTI, VF.Width );
10672
+ }
10594
10673
Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
10674
+ }
10595
10675
10596
10676
// Check if it is profitable to vectorize with runtime checks.
10597
10677
bool ForceVectorization =
0 commit comments