Skip to content

Commit 44edc6f

Browse files
committed
[SCEV] rewriteLoopExitValues(): even if have hard uses, still rewrite if cheap (PR44668)
Summary: Replacing uses of IV outside of the loop is likely generally useful, but `rewriteLoopExitValues()` is cautious, and if it isn't told to always perform the replacement, and there are hard uses of IV in loop, it doesn't replace. In [[ https://bugs.llvm.org/show_bug.cgi?id=44668 | PR44668 ]], that prevents `-indvars` from replacing uses of induction variable after the loop, which might be one of the optimization failures preventing that code from being vectorized. Instead, now that the cost model is fixed, i believe we should be a little bit more optimistic, and also perform replacement if we believe it is within our budget. Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=44668 | PR44668 ]]. Reviewers: reames, mkazantsev, asbirlea, fhahn, skatkov Reviewed By: mkazantsev Subscribers: nikic, hiraditya, zzheng, javed.absar, dmgreen, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73501
1 parent d6f47ae commit 44edc6f

File tree

6 files changed

+35
-36
lines changed

6 files changed

+35
-36
lines changed

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,16 +1353,16 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
13531353

13541354
// Computing the value outside of the loop brings no benefit if it is
13551355
// definitely used inside the loop in a way which can not be optimized
1356-
// away. Avoid doing so unless we know we have a value which computes
1357-
// the ExitValue already. TODO: This should be merged into SCEV
1358-
// expander to leverage its knowledge of existing expressions.
1359-
if (ReplaceExitValue != AlwaysRepl &&
1360-
!isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
1356+
// away. Avoid doing so unless either we know we have a value
1357+
// which computes the ExitValue already, or it is cheap to do so.
1358+
// TODO: This should be merged into SCEV expander to leverage
1359+
// its knowledge of existing expressions.
1360+
bool HighCost = Rewriter.isHighCostExpansion(
1361+
ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1362+
if (ReplaceExitValue != AlwaysRepl && HighCost &&
13611363
hasHardUserWithinLoop(L, Inst))
13621364
continue;
13631365

1364-
bool HighCost = Rewriter.isHighCostExpansion(
1365-
ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
13661366
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
13671367

13681368
LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "

llvm/test/Transforms/IndVarSimplify/dont-recompute.ll renamed to llvm/test/Transforms/IndVarSimplify/do-recompute-if-cheap.ll

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -indvars -S | FileCheck %s
33

4-
; This tests that the IV is not recomputed outside of the loop when it is known
5-
; to be computed by the loop and used in the loop any way. In the example below
6-
; although a's value can be computed outside of the loop, there is no benefit
7-
; in doing so as it has to be computed by the loop anyway.
4+
; This tests that the IV is recomputed outside of the loop even when it is known
5+
; to be computed by the loop and used in the loop any way, if it is cheap to do
6+
; so. In the example below the value can be computed outside of the loop,
7+
; and we should do so because after that IV is no longer used outside of
8+
; the loop, which is likely beneficial for vectorization.
89
;
910
; extern void func(unsigned val);
1011
;
@@ -35,8 +36,8 @@ define void @test(i32 %m) nounwind uwtable {
3536
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
3637
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
3738
; CHECK: for.end:
38-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
39-
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
39+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
40+
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
4041
; CHECK-NEXT: ret void
4142
;
4243
entry:
@@ -69,8 +70,8 @@ define i32 @test2(i32 %m) nounwind uwtable {
6970
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
7071
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
7172
; CHECK: for.end:
72-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
73-
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
73+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
74+
; CHECK-NEXT: ret i32 [[TMP0]]
7475
;
7576
entry:
7677
br label %for.body
@@ -101,8 +102,8 @@ define void @test3(i32 %m) nounwind uwtable {
101102
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
102103
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
103104
; CHECK: for.end:
104-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
105-
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
105+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
106+
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
106107
; CHECK-NEXT: ret void
107108
;
108109
entry:
@@ -141,8 +142,8 @@ define void @test4(i32 %m) nounwind uwtable {
141142
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
142143
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
143144
; CHECK: for.end:
144-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
145-
; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[ADD_LCSSA]], 123
145+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
146+
; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[TMP0]], 123
146147
; CHECK-NEXT: tail call void @func(i32 [[SOFT_USE]])
147148
; CHECK-NEXT: ret void
148149
;
@@ -178,8 +179,8 @@ define void @test5(i32 %m) nounwind uwtable {
178179
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
179180
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
180181
; CHECK: for.end:
181-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
182-
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
182+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
183+
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
183184
; CHECK-NEXT: ret void
184185
;
185186
entry:
@@ -215,8 +216,8 @@ define void @test6(i32 %m, i32* %p) nounwind uwtable {
215216
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186
216217
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
217218
; CHECK: for.end:
218-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
219-
; CHECK-NEXT: tail call void @func(i32 [[ADD_LCSSA]])
219+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[M]], 186
220+
; CHECK-NEXT: tail call void @func(i32 [[TMP0]])
220221
; CHECK-NEXT: ret void
221222
;
222223
entry:

llvm/test/Transforms/IndVarSimplify/elim-extend.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,7 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind {
143143
; CHECK-NEXT: [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
144144
; CHECK-NEXT: br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
145145
; CHECK: innerexit:
146-
; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
147-
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
146+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i32
148147
; CHECK-NEXT: br label [[OUTERMERGE]]
149148
; CHECK: outermerge:
150149
; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]

llvm/test/Transforms/IndVarSimplify/lrev-existing-umin.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp
2626
; CHECK-NEXT: [[TMP23:%.*]] = icmp slt i32 [[TMP22]], [[TMP14]]
2727
; CHECK-NEXT: br i1 [[TMP23]], label [[NOT_ZERO11]], label [[MAIN_EXIT_SELECTOR:%.*]]
2828
; CHECK: main.exit.selector:
29-
; CHECK-NEXT: [[TMP22_LCSSA:%.*]] = phi i32 [ [[TMP22]], [[NOT_ZERO11]] ]
30-
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[TMP22_LCSSA]], [[LENGTH_I]]
29+
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[TMP14]], [[LENGTH_I]]
3130
; CHECK-NEXT: br i1 [[TMP24]], label [[NOT_ZERO11_POSTLOOP]], label [[LEAVE:%.*]]
3231
; CHECK: leave:
3332
; CHECK-NEXT: ret void

llvm/test/Transforms/IndVarSimplify/pr28705.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {
1616
; CHECK: for.body650.lr.ph:
1717
; CHECK-NEXT: br label [[FOR_BODY650:%.*]]
1818
; CHECK: loopexit:
19-
; CHECK-NEXT: [[INC_I_I_LCSSA:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY650]] ]
19+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[DOTSROA_SPECULATED]], 1
2020
; CHECK-NEXT: br label [[XZ_EXIT]]
2121
; CHECK: XZ.exit:
22-
; CHECK-NEXT: [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC_I_I_LCSSA]], [[LOOPEXIT:%.*]] ]
22+
; CHECK-NEXT: [[DB_SROA_9_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP0]], [[LOOPEXIT:%.*]] ]
2323
; CHECK-NEXT: br label [[END:%.*]]
2424
; CHECK: for.body650:
2525
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_BODY650_LR_PH]] ], [ [[INC655:%.*]], [[FOR_BODY650]] ]
26-
; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I]], [[FOR_BODY650]] ]
26+
; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 1, [[FOR_BODY650_LR_PH]] ], [ [[INC_I_I:%.*]], [[FOR_BODY650]] ]
2727
; CHECK-NEXT: [[ARRAYIDX_I_I1105:%.*]] = getelementptr inbounds i8, i8* [[REF_I1174:%.*]], i32 [[IV2]]
2828
; CHECK-NEXT: store i8 7, i8* [[ARRAYIDX_I_I1105]], align 1
2929
; CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[IV2]], 1

llvm/test/Transforms/IndVarSimplify/pr39673.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ define i16 @dom_argument(i16 %arg1, i16 %arg2) {
7272
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
7373
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
7474
; CHECK: loop2.end:
75-
; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
76-
; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]]
75+
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG2]], 2
76+
; CHECK-NEXT: ret i16 [[TMP0]]
7777
;
7878
entry:
7979
br label %loop1
@@ -121,8 +121,8 @@ define i16 @dummy_phi_outside_loop(i16 %arg) {
121121
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
122122
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
123123
; CHECK: loop2.end:
124-
; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
125-
; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]]
124+
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[DUMMY]], 2
125+
; CHECK-NEXT: ret i16 [[TMP0]]
126126
;
127127
entry:
128128
br label %loop2.preheader
@@ -166,8 +166,8 @@ define i16 @neg_loop_carried(i16 %arg) {
166166
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[L2_ADD]], 2
167167
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP2]], label [[LOOP2_END:%.*]]
168168
; CHECK: loop2.end:
169-
; CHECK-NEXT: [[K2_ADD_LCSSA:%.*]] = phi i16 [ [[K2_ADD]], [[LOOP2]] ]
170-
; CHECK-NEXT: ret i16 [[K2_ADD_LCSSA]]
169+
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 2
170+
; CHECK-NEXT: ret i16 [[TMP1]]
171171
;
172172
entry:
173173
br label %loop1

0 commit comments

Comments
 (0)