[SCEV] Prove no-self-wrap from negative power of two step (#101416)

preames · nikic · web-flow · commit f0944f4be0b3 · 2024-08-01T13:17:07.000-07:00
We have existing code which reasons about a step evenly dividing the
iteration space is a finite loop with a single exit implying
no-self-wrap. The sign of the step doesn't effect this.

---------

Co-authored-by: Nikita Popov &lt;github@npopov.com&gt;
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1028,8 +1028,10 @@ class ScalarEvolution {
   /// Test if the given expression is known to be non-zero.
   bool isKnownNonZero(const SCEV *S);
 
-  /// Test if the given expression is known to be a power of 2.
-  bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false);
+  /// Test if the given expression is known to be a power of 2.  OrNegative
+  /// allows matching negative power of 2s, and OrZero allows matching 0.
+  bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false,
+                              bool OrNegative = false);
 
   /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from
   /// \p S by substitution of all AddRec sub-expression related to loop \p L
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9158,7 +9158,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
       InnerLHS = ZExt->getOperand();
     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS);
         AR && !AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
-        isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true)) {
+        isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true,
+                               /*OrNegative=*/true)) {
       auto Flags = AR->getNoWrapFlags();
       Flags = setFlags(Flags, SCEV::FlagNW);
       SmallVector<const SCEV *> Operands{AR->operands()};
@@ -10843,10 +10844,13 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
   return getUnsignedRangeMin(S) != 0;
 }
 
-bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero) {
-  auto NonRecursive = [this](const SCEV *S) {
+bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero,
+                                             bool OrNegative) {
+  auto NonRecursive = [this, OrNegative](const SCEV *S) {
     if (auto *C = dyn_cast<SCEVConstant>(S))
-      return C->getAPInt().isPowerOf2();
+      return C->getAPInt().isPowerOf2() ||
+             (OrNegative && C->getAPInt().isNegatedPowerOf2());
+
     // The vscale_range indicates vscale is a power-of-two.
     return isa<SCEVVScale>(S) && F.hasFnAttribute(Attribute::VScaleRange);
   };
@@ -12790,7 +12794,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
     if (!isLoopInvariant(RHS, L))
       return false;
 
-    if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true))
+    if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true,
+                                /*OrNegative*/ true))
       return false;
 
     if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L))
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
@@ -444,8 +444,6 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; The next two cases check to see if we can infer the flags on the IV
 ; of a countdown loop using vscale strides.
-; TODO: We should be able to because vscale is a power of two and these
-; are finite loops by assumption.
 
 define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
 ; CHECK-LABEL: 'vscale_countdown_ne'
@@ -455,15 +453,16 @@ define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_r
 ; CHECK-NEXT:    %start = sub i32 %n, %vscale
 ; CHECK-NEXT:    --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
-; CHECK-NEXT:    --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
-; CHECK-NEXT:    --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %sub = sub i32 %iv, %vscale
-; CHECK-NEXT:    --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @vscale_countdown_ne
-; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483647
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
 ;
 entry:
   %vscale = call i32 @llvm.vscale.i32()
@@ -495,15 +494,16 @@ define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale
 ; CHECK-NEXT:    %start = sub i32 %n, %VF
 ; CHECK-NEXT:    --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
-; CHECK-NEXT:    --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
-; CHECK-NEXT:    --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %sub = sub i32 %iv, %VF
-; CHECK-NEXT:    --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_countdown_ne
-; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 536870911
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
 ;
 entry:
   %vscale = call i32 @llvm.vscale.i32()