|
| 1 | +From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 |
| 2 | +From: Aleksander Us < [email protected]> |
| 3 | +Date: Mon, 26 Aug 2019 15:45:47 +0300 |
| 4 | +Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in |
| 5 | + LFTR when possible. |
| 6 | + |
| 7 | +SCEV analysis cannot properly cache instruction with poison flags |
| 8 | +(for example, add nsw outside of loop will not be reused by expander). |
| 9 | +This can lead to generating of additional instructions by SCEV expander. |
| 10 | + |
| 11 | +Example IR: |
| 12 | + |
| 13 | + ... |
| 14 | + %maxval = add nuw nsw i32 %a1, %a2 |
| 15 | + ... |
| 16 | +for.body: |
| 17 | + ... |
| 18 | + %cmp22 = icmp ult i32 %ivadd, %maxval |
| 19 | + br i1 %cmp22, label %for.body, label %for.end |
| 20 | + ... |
| 21 | + |
| 22 | +SCEV expander will generate copy of %maxval in preheader but without |
| 23 | +nuw/nsw flags. This can be avoided by explicit check that iv count |
| 24 | +value gives the same SCEV expressions as calculated by LFTR. |
| 25 | +--- |
| 26 | + lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- |
| 27 | + test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ |
| 28 | + test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- |
| 29 | + test/Transforms/IndVarSimplify/udiv.ll | 1 + |
| 30 | + 4 files changed, 38 insertions(+), 7 deletions(-) |
| 31 | + create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll |
| 32 | + |
| 33 | +diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp |
| 34 | +index f9fc698a4a9..5e04dac8aa6 100644 |
| 35 | +--- a/lib/Transforms/Scalar/IndVarSimplify.cpp |
| 36 | ++++ b/lib/Transforms/Scalar/IndVarSimplify.cpp |
| 37 | +@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, |
| 38 | + if (UsePostInc) |
| 39 | + IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); |
| 40 | + |
| 41 | ++ // If computed limit is equal to old limit then do not use SCEV expander |
| 42 | ++ // because it can lost NUW/NSW flags and create extra instructions. |
| 43 | ++ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); |
| 44 | ++ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) { |
| 45 | ++ Value *Limit = Cmp->getOperand(0); |
| 46 | ++ if (!L->isLoopInvariant(Limit)) |
| 47 | ++ Limit = Cmp->getOperand(1); |
| 48 | ++ if (SE->getSCEV(Limit) == IVLimit) |
| 49 | ++ return Limit; |
| 50 | ++ } |
| 51 | ++ |
| 52 | + // Expand the code for the iteration count. |
| 53 | + assert(SE->isLoopInvariant(IVLimit, L) && |
| 54 | + "Computed iteration count is not loop invariant!"); |
| 55 | +@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, |
| 56 | + // SCEV expression (IVInit) for a pointer type IV value (IndVar). |
| 57 | + Type *LimitTy = ExitCount->getType()->isPointerTy() ? |
| 58 | + IndVar->getType() : ExitCount->getType(); |
| 59 | +- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); |
| 60 | + return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); |
| 61 | + } |
| 62 | + } |
| 63 | +diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll |
| 64 | +new file mode 100644 |
| 65 | +index 00000000000..abd1cbb6c51 |
| 66 | +--- /dev/null |
| 67 | ++++ b/test/Transforms/IndVarSimplify/add_nsw.ll |
| 68 | +@@ -0,0 +1,23 @@ |
| 69 | ++; RUN: opt -indvars -S %s | FileCheck %s |
| 70 | ++ |
| 71 | ++target datalayout = "e-p:32:32-i64:64-n8:16:32" |
| 72 | ++ |
| 73 | ++; CHECK: for.body.preheader: |
| 74 | ++; CHECK-NOT: add |
| 75 | ++; CHECK: for.body: |
| 76 | ++ |
| 77 | ++define void @foo(i32 %a1, i32 %a2) { |
| 78 | ++entry: |
| 79 | ++ %maxval = add nuw nsw i32 %a1, %a2 |
| 80 | ++ %cmp = icmp slt i32 %maxval, 1 |
| 81 | ++ br i1 %cmp, label %for.end, label %for.body |
| 82 | ++ |
| 83 | ++for.body: ; preds = %entry, %for.body |
| 84 | ++ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] |
| 85 | ++ %add31 = add nuw nsw i32 %j.02, 1 |
| 86 | ++ %cmp22 = icmp slt i32 %add31, %maxval |
| 87 | ++ br i1 %cmp22, label %for.body, label %for.end |
| 88 | ++ |
| 89 | ++for.end: ; preds = %for.body |
| 90 | ++ ret void |
| 91 | ++} |
| 92 | +diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll |
| 93 | +index 14ae9738696..509d662b767 100644 |
| 94 | +--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll |
| 95 | ++++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll |
| 96 | +@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { |
| 97 | + ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] |
| 98 | + ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] |
| 99 | + ; CHECK: outer.preheader: |
| 100 | +-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 |
| 101 | + ; CHECK-NEXT: br label [[OUTER:%.*]] |
| 102 | + ; CHECK: outer: |
| 103 | +-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] |
| 104 | +-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] |
| 105 | ++; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] |
| 106 | + ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] |
| 107 | + ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 |
| 108 | + ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] |
| 109 | +@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { |
| 110 | + ; CHECK: inner: |
| 111 | + ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] |
| 112 | + ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 |
| 113 | +-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] |
| 114 | ++; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] |
| 115 | + ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] |
| 116 | + ; CHECK: outer.inc.loopexit: |
| 117 | + ; CHECK-NEXT: br label [[OUTER_INC]] |
| 118 | + ; CHECK: outer.inc: |
| 119 | + ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 |
| 120 | +-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 |
| 121 | +-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] |
| 122 | ++; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] |
| 123 | + ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] |
| 124 | + ; CHECK: exit.loopexit: |
| 125 | + ; CHECK-NEXT: br label [[EXIT]] |
| 126 | +diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll |
| 127 | +index b3f2c2a6a66..3530343ef4a 100644 |
| 128 | +--- a/test/Transforms/IndVarSimplify/udiv.ll |
| 129 | ++++ b/test/Transforms/IndVarSimplify/udiv.ll |
| 130 | +@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind |
| 131 | + ; CHECK-LABEL: @foo( |
| 132 | + ; CHECK: for.body.preheader: |
| 133 | + ; CHECK-NOT: udiv |
| 134 | ++; CHECK: for.body: |
| 135 | + |
| 136 | + define void @foo(double* %p, i64 %n) nounwind { |
| 137 | + entry: |
| 138 | +-- |
| 139 | +2.18.0 |
| 140 | + |
0 commit comments