[SCEV] Infer flags from add/gep in any block

preames · preames · commit 0658bab870c8 · 2021-10-06T11:11:54.000-07:00
This patch removes a compile time restriction from isSCEVExprNeverPoison. We've strengthened our ability to reason about flags on scopes other than addrecs, and this bailout prevents us from using it. The comment is also suspect as well in that we're in the middle of constructing a SCEV for I. As such, we're going to visit all operands *anyways*. Differential Revision: https://reviews.llvm.org/D111186
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6645,16 +6645,6 @@ bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
 
 
 bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
-  // Here we check that I is in the header of the innermost loop containing I,
-  // since we only deal with instructions in the loop header. The actual loop we
-  // need to check later will come from an add recurrence, but getting that
-  // requires computing the SCEV of the operands, which can be expensive. This
-  // check we can do cheaply to rule out some cases early.
-  Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
-  if (InnermostContainingLoop == nullptr ||
-      InnermostContainingLoop->getHeader() != I->getParent())
-    return false;
-
   // Only proceed if we can prove that I does not yield poison.
   if (!programUndefinedIfPoison(I))
     return false;
diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -623,7 +623,7 @@ entry:
 
 ; CHECK-LABEL: p9
 ; CHECK: da analyze - none!
-; CHECK: da analyze - flow [|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -1628,9 +1628,9 @@ define noundef i32 @add-basic(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'add-basic'
 ; CHECK-NEXT:  Classifying expressions for: @add-basic
 ; CHECK-NEXT:    %res = add nuw nsw i32 %a, %b
-; CHECK-NEXT:    --> (%a + %b) U: full-set S: full-set
+; CHECK-NEXT:    --> (%a + %b)<nuw><nsw> U: full-set S: full-set
 ; CHECK-NEXT:    %res2 = udiv i32 255, %res
-; CHECK-NEXT:    --> (255 /u (%a + %b)) U: [0,256) S: [0,256)
+; CHECK-NEXT:    --> (255 /u (%a + %b)<nuw><nsw>) U: [0,256) S: [0,256)
 ; CHECK-NEXT:  Determining loop execution counts for: @add-basic
 ;
   %res = add nuw nsw i32 %a, %b
@@ -1656,9 +1656,9 @@ define noundef i32 @mul-basic(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'mul-basic'
 ; CHECK-NEXT:  Classifying expressions for: @mul-basic
 ; CHECK-NEXT:    %res = mul nuw nsw i32 %a, %b
-; CHECK-NEXT:    --> (%a * %b) U: full-set S: full-set
+; CHECK-NEXT:    --> (%a * %b)<nuw><nsw> U: full-set S: full-set
 ; CHECK-NEXT:    %res2 = udiv i32 255, %res
-; CHECK-NEXT:    --> (255 /u (%a * %b)) U: [0,256) S: [0,256)
+; CHECK-NEXT:    --> (255 /u (%a * %b)<nuw><nsw>) U: [0,256) S: [0,256)
 ; CHECK-NEXT:  Determining loop execution counts for: @mul-basic
 ;
   %res = mul nuw nsw i32 %a, %b
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -8,10 +8,6 @@ target triple = "x86_64-apple-macosx10.9.0"
 @C = common global [2000 x float] zeroinitializer, align 16
 @D = common global [2000 x float] zeroinitializer, align 16
 
-; Currently SCEV isn't smart enough to figure out that accesses
-; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future
-; that would hopefully be fixed. For now, check that this isn't
-; vectorized.
 ; Function Attrs: nounwind ssp uwtable
 define void @foo_3double(i32 %u) #0 {
 ; CHECK-LABEL: @foo_3double(
@@ -21,26 +17,25 @@ define void @foo_3double(i32 %u) #0 {
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 3
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8
-; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    store double [[ADD5]], double* [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[MUL]], 1
 ; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load double, double* [[ARRAYIDX13]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load double, double* [[ARRAYIDX17]], align 8
-; CHECK-NEXT:    [[ADD18:%.*]] = fadd double [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    store double [[ADD18]], double* [[ARRAYIDX13]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
 ; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[MUL]], 2
 ; CHECK-NEXT:    [[IDXPROM25:%.*]] = sext i32 [[ADD24]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM25]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX26]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX26]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM25]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load double, double* [[ARRAYIDX30]], align 8
-; CHECK-NEXT:    [[ADD31:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[ARRAYIDX30]], align 8
+; CHECK-NEXT:    [[ADD31:%.*]] = fadd double [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    store double [[ADD31]], double* [[ARRAYIDX26]], align 8
 ; CHECK-NEXT:    ret void
 ;