Skip to content

Commit 2cbba56

Browse files
author
Max Kazantsev
committed
[IndVars] Fix usage of SCEVExpander to not mess with SCEVConstant. PR38674
This patch removes the function `expandSCEVIfNeeded` which behaves not as it was intended. This function tries to make a lookup for exact existing expansion and only goes to normal expansion via `expandCodeFor` if this lookup hasn't found anything. As a result of this, if some instruction above the loop has a `SCEVConstant` SCEV, this logic will return this instruction when asked for this `SCEVConstant` rather than return a constant value. This is both non-profitable and in some cases leads to breach of LCSSA form (as in PR38674). Whether or not it is possible to break LCSSA with this algorithm and with some non-constant SCEVs is still in question, this is still being investigated. I wasn't able to construct such a test so far, so maybe this situation is impossible. If it is, it will go as a separate fix. Rather than do it, it is always correct to just invoke `expandCodeFor` unconditionally: it behaves smarter about insertion points, and as side effect of this it will choose a constant value for SCEVConstants. For other SCEVs it may end up finding a better insertion point. So it should not be worse in any case. NOTE: So far the only known case for which this transform may break LCSSA is mapping of SCEVConstant to an instruction. However there is a suspicion that the entire algorithm can compromise LCSSA form for other cases as well (yet not proved). Differential Revision: https://reviews.llvm.org/D51286 Reviewed By: etherzhhb llvm-svn: 341345
1 parent bd203e0 commit 2cbba56

File tree

2 files changed

+142
-18
lines changed

2 files changed

+142
-18
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,6 @@ class IndVarSimplify {
152152

153153
void sinkUnusedInvariants(Loop *L);
154154

155-
Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
156-
Instruction *InsertPt, Type *Ty);
157-
158155
public:
159156
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
160157
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -521,19 +518,6 @@ struct RewritePhi {
521518

522519
} // end anonymous namespace
523520

524-
Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
525-
Loop *L, Instruction *InsertPt,
526-
Type *ResultTy) {
527-
// Before expanding S into an expensive LLVM expression, see if we can use an
528-
// already existing value as the expansion for S.
529-
if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
530-
if (ExistingValue->getType() == ResultTy)
531-
return ExistingValue;
532-
533-
// We didn't find anything, fall back to using SCEVExpander.
534-
return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
535-
}
536-
537521
//===----------------------------------------------------------------------===//
538522
// rewriteLoopExitValues - Optimize IV users outside the loop.
539523
// As a side effect, reduces the amount of IV processing within the loop.
@@ -650,8 +634,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
650634
}
651635

652636
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
653-
Value *ExitVal =
654-
expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
637+
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
655638

656639
LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
657640
<< '\n'
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -indvars < %s | FileCheck %s
3+
4+
; Check that we don't reuse %zext instead of %inc11 for LCSSA Phi node. Case
5+
; with constants SCEV.
6+
7+
define i32 @test_01() {
8+
; CHECK-LABEL: @test_01(
9+
; CHECK-NEXT: entry:
10+
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
11+
; CHECK: for.cond1.preheader:
12+
; CHECK-NEXT: br label [[FOR_COND4_PREHEADER:%.*]]
13+
; CHECK: for.cond4.preheader:
14+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32
15+
; CHECK-NEXT: br label [[FOR_BODY6:%.*]]
16+
; CHECK: for.cond4:
17+
; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2
18+
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]]
19+
; CHECK: for.body6:
20+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ]
21+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]]
22+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
23+
; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]]
24+
; CHECK: for.end:
25+
; CHECK-NEXT: br i1 false, label [[FOR_COND4_PREHEADER]], label [[FOR_END9:%.*]]
26+
; CHECK: for.end9:
27+
; CHECK-NEXT: br i1 false, label [[FOR_COND1_PREHEADER]], label [[RETURN_LOOPEXIT3:%.*]]
28+
; CHECK: return.loopexit:
29+
; CHECK-NEXT: unreachable
30+
; CHECK: return.loopexit3:
31+
; CHECK-NEXT: br label [[RETURN:%.*]]
32+
; CHECK: return:
33+
; CHECK-NEXT: ret i32 1
34+
;
35+
entry:
36+
br label %for.cond1.preheader
37+
38+
for.cond1.preheader: ; preds = %for.end9, %entry
39+
br label %for.cond4.preheader
40+
41+
for.cond4.preheader: ; preds = %for.end, %for.cond1.preheader
42+
%zext = zext i16 1 to i32
43+
br label %for.body6
44+
45+
for.cond4: ; preds = %for.body6
46+
%cmp5 = icmp ult i32 %inc, 2
47+
br i1 %cmp5, label %for.body6, label %for.end
48+
49+
for.body6: ; preds = %for.cond4, %for.cond4.preheader
50+
%iv = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.cond4 ]
51+
%0 = icmp eq i32 %iv, %zext
52+
%inc = add nuw nsw i32 %iv, 1
53+
br i1 %0, label %return.loopexit, label %for.cond4
54+
55+
for.end: ; preds = %for.cond4
56+
br i1 false, label %for.cond4.preheader, label %for.end9
57+
58+
for.end9: ; preds = %for.end
59+
%inc11 = add nuw nsw i32 0, 1
60+
br i1 false, label %for.cond1.preheader, label %return.loopexit3
61+
62+
return.loopexit: ; preds = %for.body6
63+
unreachable
64+
65+
return.loopexit3: ; preds = %for.end9
66+
%inc11.lcssa = phi i32 [ %inc11, %for.end9 ]
67+
br label %return
68+
69+
return: ; preds = %return.loopexit3
70+
ret i32 %inc11.lcssa
71+
}
72+
73+
; Same as test_01, but the instructions with the same SCEV have a non-constant
74+
; SCEV.
75+
define i32 @test_02(i32 %x) {
76+
; CHECK-LABEL: @test_02(
77+
; CHECK-NEXT: entry:
78+
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
79+
; CHECK: for.cond1.preheader:
80+
; CHECK-NEXT: br label [[FOR_COND4_PREHEADER:%.*]]
81+
; CHECK: for.cond4.preheader:
82+
; CHECK-NEXT: [[ZEXT:%.*]] = mul i32 [[X:%.*]], 1
83+
; CHECK-NEXT: br label [[FOR_BODY6:%.*]]
84+
; CHECK: for.cond4:
85+
; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2
86+
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]]
87+
; CHECK: for.body6:
88+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ]
89+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]]
90+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
91+
; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]]
92+
; CHECK: for.end:
93+
; CHECK-NEXT: br i1 false, label [[FOR_COND4_PREHEADER]], label [[FOR_END9:%.*]]
94+
; CHECK: for.end9:
95+
; CHECK-NEXT: [[INC11:%.*]] = add nuw nsw i32 0, [[X]]
96+
; CHECK-NEXT: br i1 false, label [[FOR_COND1_PREHEADER]], label [[RETURN_LOOPEXIT3:%.*]]
97+
; CHECK: return.loopexit:
98+
; CHECK-NEXT: unreachable
99+
; CHECK: return.loopexit3:
100+
; CHECK-NEXT: [[INC11_LCSSA:%.*]] = phi i32 [ [[INC11]], [[FOR_END9]] ]
101+
; CHECK-NEXT: br label [[RETURN:%.*]]
102+
; CHECK: return:
103+
; CHECK-NEXT: ret i32 [[INC11_LCSSA]]
104+
;
105+
entry:
106+
br label %for.cond1.preheader
107+
108+
for.cond1.preheader: ; preds = %for.end9, %entry
109+
br label %for.cond4.preheader
110+
111+
for.cond4.preheader: ; preds = %for.end, %for.cond1.preheader
112+
%zext = mul i32 %x, 1
113+
br label %for.body6
114+
115+
for.cond4: ; preds = %for.body6
116+
%cmp5 = icmp ult i32 %inc, 2
117+
br i1 %cmp5, label %for.body6, label %for.end
118+
119+
for.body6: ; preds = %for.cond4, %for.cond4.preheader
120+
%iv = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.cond4 ]
121+
%0 = icmp eq i32 %iv, %zext
122+
%inc = add nuw nsw i32 %iv, 1
123+
br i1 %0, label %return.loopexit, label %for.cond4
124+
125+
for.end: ; preds = %for.cond4
126+
br i1 false, label %for.cond4.preheader, label %for.end9
127+
128+
for.end9: ; preds = %for.end
129+
%inc11 = add nuw nsw i32 0, %x
130+
br i1 false, label %for.cond1.preheader, label %return.loopexit3
131+
132+
return.loopexit: ; preds = %for.body6
133+
unreachable
134+
135+
return.loopexit3: ; preds = %for.end9
136+
%inc11.lcssa = phi i32 [ %inc11, %for.end9 ]
137+
br label %return
138+
139+
return: ; preds = %return.loopexit3
140+
ret i32 %inc11.lcssa
141+
}

0 commit comments

Comments
 (0)