[SCEV] If max BTC is zero, then so is the exact BTC [2 of 2]

preames · preames · commit 29fa37ec9fce · 2021-09-01T11:51:48.000-07:00
This extends D108921 into a generic rule applied to constructing ExitLimits along all paths. The remaining paths (primarily howFarToZero) don't have the same reasoning about UB sensitivity as the howManyLessThan ones did. Instead, the remain cause for max counts being more precise than exact counts is that we apply context sensitive loop guards on the max path, and not on the exact path. That choice is mildly suspect, but out of scope of this patch. The MVETailPredication.cpp change deserves a bit of explanation. We were previously figuring out that two SCEVs happened to be equal because the happened to be identical. When we optimized one with context sensitive information, but not the other, we lost the ability to prove them equal. So, cover this case by subtracting and then applying loop guards again. Without this, we see changes in test/CodeGen/Thumb2/mve-blockplacement.ll Differential Revision: https://reviews.llvm.org/D109015
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7635,6 +7635,12 @@ ScalarEvolution::ExitLimit::ExitLimit(
     const SCEV *E, const SCEV *M, bool MaxOrZero,
     ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
     : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
+  // If we prove the max count is zero, so is the symbolic bound.  This happens
+  // in practice due to differences in a) how context sensitive we've chosen
+  // to be and b) how we reason about bounds impied by UB.
+  if (MaxNotTaken->isZero())
+    ExactNotTaken = MaxNotTaken;
+
   assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
           !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
          "Exact is not allowed to be less precise than Max");
@@ -11939,10 +11945,6 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
   } else {
     MaxBECount = computeMaxBECountForLT(
         Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
-    // If we prove the max count is zero, so is the symbolic bound.  This can
-    // happen due to differences in how we reason about bounds impied by UB.
-    if (MaxBECount->isZero())
-      BECount = MaxBECount;
   }
 
   if (isa<SCEVCouldNotCompute>(MaxBECount) &&
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -293,14 +293,18 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
     // Check for equality of TC and Ceil by calculating SCEV expression
     // TC - Ceil and test it for zero.
     //
-    bool Zero = SE->getMinusSCEV(
-                      SE->getBackedgeTakenCount(L),
-                      SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
-                                                     SE->getNegativeSCEV(VW)),
-                                      VW))
-                    ->isZero();
-
-    if (!Zero) {
+    const SCEV *Sub =
+      SE->getMinusSCEV(SE->getBackedgeTakenCount(L),
+                       SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
+                                                      SE->getNegativeSCEV(VW)),
+                                       VW));
+
+    // Use context sensitive facts about the path to the loop to refine.  This
+    // comes up as the backedge taken count can incorporate context sensitive
+    // reasoning, and our RHS just above doesn't.
+    Sub = SE->applyLoopGuards(Sub, L);
+
+    if (!Sub->isZero()) {
       LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
       return false;
     }
diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -523,7 +523,7 @@ exit:
 ; of context sensativity.
 define void @ne_zero_max_btc(i32 %a) {
 ; CHECK-LABEL: Determining loop execution counts for: @ne_zero_max_btc
-; CHECK: Loop %for.body: backedge-taken count is (-1 + (zext i32 (1 umax (1 smin %a)) to i64))<nsw>
+; CHECK: Loop %for.body: backedge-taken count is 0
 ; CHECK: Loop %for.body: max backedge-taken count is 0
 entry:
   %cmp = icmp slt i32 %a, 1