[SCEV] Compute AddRec range computations using different type BECount

caojoshua · caojoshua · commit 6ed152aff4aa · 2023-05-31T21:05:17.000-07:00
Before this patch, we can only use the MaxBECount for an AddRec's range computation if the MaxBECount has <= bit width of the AddRec. This patch reasons that if a MaxBECount has > bit width, and is <= the max value of AddRec's bit width, we can still use the MaxBECount. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D151698
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6703,8 +6703,15 @@ const ConstantRange &ScalarEvolution::getRangeRef(
           getConstantMaxBackedgeTakenCount(AddRec->getLoop());
       if (!isa<SCEVCouldNotCompute>(MaxBEScev)) {
         APInt MaxBECount = cast<SCEVConstant>(MaxBEScev)->getAPInt();
-        if (MaxBECount.getBitWidth() < BitWidth)
+
+        // Adjust MaxBECount to the same bitwidth as AddRec. We can truncate if
+        // MaxBECount's active bits are all <= AddRec's bit width.
+        if (MaxBECount.getBitWidth() > BitWidth &&
+            MaxBECount.getActiveBits() <= BitWidth)
+          MaxBECount = MaxBECount.trunc(BitWidth);
+        else if (MaxBECount.getBitWidth() < BitWidth)
           MaxBECount = MaxBECount.zext(BitWidth);
+
         if (MaxBECount.getBitWidth() == BitWidth) {
           auto RangeFromAffine = getRangeForAffineAR(
               AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount);
diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
@@ -52,8 +52,8 @@ define void @coupled_miv_type_mismatch(i32 %n) #0 {
 entry:
   br label %for.cond
 
-; DELIN: da analyze - input [* *]!
-; DELIN: da analyze - anti [* *|<]!
+; DELIN: da analyze - none!
+; DELIN: da analyze - consistent anti [1 -2]!
 ; DELIN: da analyze - none!
 for.cond:                                         ; preds = %for.inc11, %entry
   %indvars.iv11 = phi i64 [ %indvars.iv.next12, %for.inc11 ], [ 1, %entry ]
diff --git a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
@@ -233,7 +233,7 @@ define void @f4(i1 %c) {
 ; CHECK-NEXT:    %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
 ; CHECK-NEXT:    --> {%start,+,%step}<nsw><%loop> U: [0,128) S: [0,128) Exits: ((127 * %step)<nsw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.trunc = trunc i32 %iv to i16
-; CHECK-NEXT:    --> {(trunc i32 %start to i16),+,(trunc i32 %step to i16)}<%loop> U: full-set S: full-set Exits: ((trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))<nsw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(trunc i32 %start to i16),+,(trunc i32 %step to i16)}<%loop> U: [0,128) S: [0,128) Exits: ((trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))<nsw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i32 %iv, %step
 ; CHECK-NEXT:    --> {(%step + %start),+,%step}<nw><%loop> U: [-256,256) S: [-256,256) Exits: ((128 * %step)<nsw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %loop.iv.inc = add i32 %loop.iv, 1
@@ -247,11 +247,6 @@ define void @f4(i1 %c) {
 ; CHECK:       Loop %loop: Trip multiple is 128
 ;
 
-; @f4() demonstrates a case where SCEV is not able to compute a
-; precise range for %iv.trunc, though it should be able to, in theory.
-; This is because SCEV looks into affine add recurrences only when the
-; backedge taken count of the loop has the same bitwidth as the
-; induction variable.
 entry:
   %start = select i1 %c, i32 127, i32 0
   %step  = select i1 %c, i32 -1,  i32 1
diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
@@ -115,7 +115,7 @@ exit:
 define void @test_trunc(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
 ; CHECK-LABEL: @test_trunc
 ; CHECK:          %trunc2 = trunc i64 %iv2.inc to i32
-; CHECK-NEXT:     -->  {(trunc i64 (1 + {7,+,1}<%loop>) to i32),+,1}<%loop2>
+; CHECK-NEXT:     -->  {(trunc i64 (1 + {7,+,1}<%loop>)<nuw><nsw> to i32),+,1}<%loop2> U: [8,53) S: [8,53)  -->  52 U: [52,53) S: [52,53)
 entry:
   br label %loop
 
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
@@ -174,9 +174,9 @@ define i32 @PR12375(ptr readnone %arg) {
 ; CHECK-NEXT:    %tmp2 = phi ptr [ %arg, %bb ], [ %tmp5, %bb1 ]
 ; CHECK-NEXT:    --> {%arg,+,4}<nuw><%bb1> U: full-set S: full-set Exits: (4 + %arg)<nuw> LoopDispositions: { %bb1: Computable }
 ; CHECK-NEXT:    %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ]
-; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%bb1> U: [0,-2147483648) S: [0,-2147483648) Exits: 1 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%bb1> U: [0,2) S: [0,2) Exits: 1 LoopDispositions: { %bb1: Computable }
 ; CHECK-NEXT:    %tmp4 = add nsw i32 %tmp3, 1
-; CHECK-NEXT:    --> {1,+,1}<nuw><%bb1> U: [1,0) S: [1,0) Exits: 2 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%bb1> U: [1,3) S: [1,3) Exits: 2 LoopDispositions: { %bb1: Computable }
 ; CHECK-NEXT:    %tmp5 = getelementptr inbounds i32, ptr %tmp2, i64 1
 ; CHECK-NEXT:    --> {(4 + %arg)<nuw>,+,4}<nuw><%bb1> U: [4,0) S: [4,0) Exits: (8 + %arg)<nuw> LoopDispositions: { %bb1: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @PR12375
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll
@@ -602,7 +602,7 @@ define void @step_is_neg_addrec_slt_8(i64 %n) {
 ; CHECK-LABEL: 'step_is_neg_addrec_slt_8'
 ; CHECK-NEXT:  Determining loop execution counts for: @step_is_neg_addrec_slt_8
 ; CHECK-NEXT:  Loop %inner: backedge-taken count is (7 /u {0,+,-1}<nuw><nsw><%outer.header>)
-; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is -2147483640
+; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is 8
 ; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is (7 /u {0,+,-1}<nuw><nsw><%outer.header>)
 ; CHECK-NEXT:  Loop %inner: Predicated backedge-taken count is (7 /u {0,+,-1}<nuw><nsw><%outer.header>)
 ; CHECK-NEXT:   Predicates:
@@ -643,10 +643,10 @@ exit:
 define void @step_is_neg_addrec_slt_var(i32 %n) {
 ; CHECK-LABEL: 'step_is_neg_addrec_slt_var'
 ; CHECK-NEXT:  Determining loop execution counts for: @step_is_neg_addrec_slt_var
-; CHECK-NEXT:  Loop %inner: backedge-taken count is ((((-1 * (1 umin ({0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))))<nuw><nsw> + {0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n)) /u (1 umax {0,+,-1}<nuw><nsw><%outer.header>)) + (1 umin ({0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))))
-; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is -1
-; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ((((-1 * (1 umin ({0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))))<nuw><nsw> + {0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n)) /u (1 umax {0,+,-1}<nuw><nsw><%outer.header>)) + (1 umin ({0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))))
-; CHECK-NEXT:  Loop %inner: Predicated backedge-taken count is ((((-1 * (1 umin ({0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))))<nuw><nsw> + {0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n)) /u (1 umax {0,+,-1}<nuw><nsw><%outer.header>)) + (1 umin ({0,+,1}<nuw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))))
+; CHECK-NEXT:  Loop %inner: backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nsw><%outer.header> smax %n))
+; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is 2147483647
+; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nsw><%outer.header> smax %n))
+; CHECK-NEXT:  Loop %inner: Predicated backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nsw><%outer.header> smax %n))
 ; CHECK-NEXT:   Predicates:
 ; CHECK:       Loop %inner: Trip multiple is 1
 ; CHECK-NEXT:  Loop %outer.header: backedge-taken count is 0
@@ -685,10 +685,10 @@ exit:
 define void @step_is_neg_addrec_unknown_start(i32 %n) {
 ; CHECK-LABEL: 'step_is_neg_addrec_unknown_start'
 ; CHECK-NEXT:  Determining loop execution counts for: @step_is_neg_addrec_unknown_start
-; CHECK-NEXT:  Loop %inner: backedge-taken count is ((((-1 * (1 umin ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))))<nuw><nsw> + {(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>)) /u (1 umax {0,+,-1}<%outer.header>)) + (1 umin ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))))
+; CHECK-NEXT:  Loop %inner: backedge-taken count is ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nsw><%outer.header>))
 ; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is -2147483640
-; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ((((-1 * (1 umin ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))))<nuw><nsw> + {(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>)) /u (1 umax {0,+,-1}<%outer.header>)) + (1 umin ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))))
-; CHECK-NEXT:  Loop %inner: Predicated backedge-taken count is ((((-1 * (1 umin ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))))<nuw><nsw> + {(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>)) /u (1 umax {0,+,-1}<%outer.header>)) + (1 umin ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))))
+; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nsw><%outer.header>))
+; CHECK-NEXT:  Loop %inner: Predicated backedge-taken count is ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nsw><%outer.header>))
 ; CHECK-NEXT:   Predicates:
 ; CHECK:       Loop %inner: Trip multiple is 1
 ; CHECK-NEXT:  Loop %outer.header: backedge-taken count is 0
diff --git a/polly/test/CodeGen/switch-in-non-affine-region.ll b/polly/test/CodeGen/switch-in-non-affine-region.ll
@@ -14,13 +14,13 @@
 ;          }
 ;    }
 ;
-; CHECK: polly.stmt.if.then:
-; CHECK:   %[[trunc:.*]] = trunc i64 %polly.indvar to i32
-; CHECK:   %p_rem = srem i32 %[[trunc]], 4
-; CHECK:   switch i32 %p_rem, label %polly.stmt.sw.epilog [
-; CHECK:     i32 0, label %polly.stmt.sw.bb
-; CHECK:     i32 1, label %polly.stmt.sw.bb.3
-; CHECK:   ]
+; CHECK:      polly.stmt.if.then:
+; CHECK-NEXT:   %2 = trunc i64 %polly.indvar to i2
+; CHECK-NEXT:   %3 = zext i2 %2 to i32
+; CHECK-NEXT:   switch i32 %3, label %polly.stmt.sw.epilog [
+; CHECK-NEXT:     i32 0, label %polly.stmt.sw.bb
+; CHECK-NEXT:     i32 1, label %polly.stmt.sw.bb.3
+; CHECK-NEXT:   ]
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/polly/test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll b/polly/test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll
@@ -14,7 +14,7 @@
 ; INNERMOST-NEXT: Invariant Accesses: {
 ; INNERMOST-NEXT: }
 ; INNERMOST-NEXT: Context:
-; INNERMOST-NEXT: [p_0, p_1, p_2] -> {  : 0 <= p_0 <= 2147483647 and 0 <= p_1 <= 1024 and 0 <= p_2 <= 1024 }
+; INNERMOST-NEXT: [p_0, p_1, p_2] -> {  : 0 <= p_0 <= 1048576 and 0 <= p_1 <= 1024 and 0 <= p_2 <= 1024 }
 ; INNERMOST-NEXT: Assumed Context:
 ; INNERMOST-NEXT: [p_0, p_1, p_2] -> {  :  }
 ; INNERMOST-NEXT: Invalid Context:
@@ -89,9 +89,9 @@
 ; ALL-NEXT:         ReadAccess :=    [Reduction Type: NONE] [Scalar: 0]
 ; ALL-NEXT:             { Stmt_bb15__TO__bb25[i0, i1] -> MemRef_A[i1] };
 ; ALL-NEXT:         ReadAccess :=    [Reduction Type: NONE] [Scalar: 0]
-; ALL-NEXT:             { Stmt_bb15__TO__bb25[i0, i1] -> MemRef_A[o0] : 0 <= o0 <= 2147483647 };
+; ALL-NEXT:             { Stmt_bb15__TO__bb25[i0, i1] -> MemRef_A[o0] : 0 <= o0 <= 1048576 };
 ; ALL-NEXT:         MayWriteAccess :=    [Reduction Type: NONE] [Scalar: 0]
-; ALL-NEXT:             { Stmt_bb15__TO__bb25[i0, i1] -> MemRef_A[o0] : 0 <= o0 <= 2147483647 };
+; ALL-NEXT:             { Stmt_bb15__TO__bb25[i0, i1] -> MemRef_A[o0] : 0 <= o0 <= 1048576 };
 ; ALL-NEXT: }
 ;
 ;    void f(int *A) {