Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit c5f6b53

Browse files
author
Chad Rosier
committed
[IndVarSimplify] Widen loop compare instructions.
This improves other optimizations such as LSR. A sext may be added to the compare's other operand, but this can often be hoisted outside of the loop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217953 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a34cab9 commit c5f6b53

File tree

4 files changed

+208
-8
lines changed

4 files changed

+208
-8
lines changed

lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,8 @@ class WidenIV {
762762

763763
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
764764

765+
bool WidenLoopCompare(NarrowIVDefUse DU);
766+
765767
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
766768
};
767769
} // anonymous namespace
@@ -926,6 +928,32 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
926928
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
927929
}
928930

931+
/// If the narrow use is a compare instruction, then widen the compare
932+
// (and possibly the other operand). The extend operation is hoisted into the
933+
// loop preheader as far as possible.
934+
bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
935+
ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
936+
if (!Cmp)
937+
return false;
938+
939+
Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
940+
unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
941+
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
942+
assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
943+
944+
// Widen the compare instruction.
945+
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
946+
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
947+
948+
// Widen the other operand of the compare, if necessary.
949+
if (CastWidth < IVWidth) {
950+
bool IsSigned = CmpInst::isSigned(Cmp->getPredicate());
951+
Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
952+
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
953+
}
954+
return true;
955+
}
956+
929957
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
930958
/// widened. If so, return the wide clone of the user.
931959
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
@@ -993,10 +1021,15 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
9931021

9941022
// Does this user itself evaluate to a recurrence after widening?
9951023
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
1024+
if (!WideAddRec)
1025+
WideAddRec = GetExtendedOperandRecurrence(DU);
1026+
9961027
if (!WideAddRec) {
997-
WideAddRec = GetExtendedOperandRecurrence(DU);
998-
}
999-
if (!WideAddRec) {
1028+
// If use is a loop condition, try to promote the condition instead of
1029+
// truncating the IV first.
1030+
if (WidenLoopCompare(DU))
1031+
return nullptr;
1032+
10001033
// This user does not evaluate to a recurence after widening, so don't
10011034
// follow it. Instead insert a Trunc to kill off the original use,
10021035
// eventually isolating the original narrow IV so it can be removed.

test/Transforms/IndVarSimplify/no-iv-rewrite.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,11 @@ entry:
229229
; loop and the OR instruction is replaced by an ADD keeping the result
230230
; equivalent.
231231
;
232+
; CHECK: sext
232233
; CHECK: loop:
233234
; CHECK: phi i64
234235
; CHECK-NOT: sext
235-
; CHECK: icmp slt i32
236+
; CHECK: icmp slt i64
236237
; CHECK: exit:
237238
; CHECK: add i64
238239
loop:

test/Transforms/IndVarSimplify/verify-scev.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,11 +380,11 @@ for.body48: ; preds = %for.inc221, %for.bo
380380

381381
for.body65.lr.ph: ; preds = %for.body48
382382
%0 = load i32* undef, align 4
383+
%1 = sext i32 %0 to i64
383384
br label %for.body65.us
384385

385386
for.body65.us: ; preds = %for.inc219.us, %for.body65.lr.ph
386-
%k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
387-
%idxprom66.us = sext i32 %k.09.us to i64
387+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
388388
br i1 undef, label %for.inc219.us, label %if.end72.us
389389

390390
if.end72.us: ; preds = %for.body65.us
@@ -406,8 +406,8 @@ for.cond152.us: ; preds = %for.cond152.us, %fo
406406
br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
407407

408408
for.inc219.us: ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
409-
%inc.us = add nsw i32 %k.09.us, 1
410-
%cmp64.us = icmp sgt i32 %inc.us, %0
409+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
410+
%cmp64.us = icmp sgt i64 %indvars.iv.next, %1
411411
br i1 %cmp64.us, label %for.inc221, label %for.body65.us
412412

413413
for.cond139.loopexit.us: ; preds = %for.cond152.us
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; RUN: opt < %s -indvars -S | FileCheck %s
2+
target triple = "aarch64--linux-gnu"
3+
4+
; Check the loop exit i32 compare instruction and operand are widened to i64
5+
; instead of truncating IV before its use in the i32 compare instruction.
6+
7+
@idx = common global i32 0, align 4
8+
@e = common global i32 0, align 4
9+
@ptr = common global i32* null, align 8
10+
11+
; CHECK-LABEL: @test1
12+
; CHECK: for.body.lr.ph:
13+
; CHECK: sext i32
14+
; CHECK: for.cond:
15+
; CHECK: icmp slt i64
16+
; CHECK: for.body:
17+
; CHECK: phi i64
18+
19+
define i32 @test1() {
20+
entry:
21+
store i32 -1, i32* @idx, align 4
22+
%0 = load i32* @e, align 4
23+
%cmp4 = icmp slt i32 %0, 0
24+
br i1 %cmp4, label %for.end.loopexit, label %for.body.lr.ph
25+
26+
for.body.lr.ph:
27+
%1 = load i32** @ptr, align 8
28+
%2 = load i32* @e, align 4
29+
br label %for.body
30+
31+
for.cond:
32+
%inc = add nsw i32 %i.05, 1
33+
%cmp = icmp slt i32 %i.05, %2
34+
br i1 %cmp, label %for.body, label %for.cond.for.end.loopexit_crit_edge
35+
36+
for.body:
37+
%i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
38+
%idxprom = sext i32 %i.05 to i64
39+
%arrayidx = getelementptr inbounds i32* %1, i64 %idxprom
40+
%3 = load i32* %arrayidx, align 4
41+
%tobool = icmp eq i32 %3, 0
42+
br i1 %tobool, label %if.then, label %for.cond
43+
44+
if.then:
45+
%i.05.lcssa = phi i32 [ %i.05, %for.body ]
46+
store i32 %i.05.lcssa, i32* @idx, align 4
47+
br label %for.end
48+
49+
for.cond.for.end.loopexit_crit_edge:
50+
br label %for.end.loopexit
51+
52+
for.end.loopexit:
53+
br label %for.end
54+
55+
for.end:
56+
%4 = load i32* @idx, align 4
57+
ret i32 %4
58+
}
59+
60+
; CHECK-LABEL: @test2
61+
; CHECK: for.body4.us
62+
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
63+
; CHECK: %cmp2.us = icmp slt i64
64+
; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32
65+
; CHECK-NOT: %cmp2.us = icmp slt i32
66+
67+
define void @test2([8 x i8]* %a, i8* %b, i8 %limit) {
68+
entry:
69+
%conv = zext i8 %limit to i32
70+
%cmp23 = icmp eq i8 %limit, 0
71+
br i1 %cmp23, label %for.cond1.preheader, label %for.cond1.preheader.us
72+
73+
for.cond1.preheader.us:
74+
%storemerge5.us = phi i32 [ 0, %entry ], [ %inc14.us, %for.inc13.us ]
75+
br i1 true, label %for.body4.lr.ph.us, label %for.inc13.us
76+
77+
for.inc13.us:
78+
%inc14.us = add nsw i32 %storemerge5.us, 1
79+
%cmp.us = icmp slt i32 %inc14.us, 4
80+
br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end
81+
82+
for.body4.us:
83+
%storemerge14.us = phi i32 [ 0, %for.body4.lr.ph.us ], [ %inc.us, %for.body4.us ]
84+
%idxprom.us = sext i32 %storemerge14.us to i64
85+
%arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %idxprom5.us, i64 %idxprom.us
86+
%0 = load i8* %arrayidx6.us, align 1
87+
%idxprom7.us = zext i8 %0 to i64
88+
%arrayidx8.us = getelementptr inbounds i8* %b, i64 %idxprom7.us
89+
%1 = load i8* %arrayidx8.us, align 1
90+
store i8 %1, i8* %arrayidx6.us, align 1
91+
%inc.us = add nsw i32 %storemerge14.us, 1
92+
%cmp2.us = icmp slt i32 %inc.us, %conv
93+
br i1 %cmp2.us, label %for.body4.us, label %for.inc13.us
94+
95+
for.body4.lr.ph.us:
96+
%idxprom5.us = sext i32 %storemerge5.us to i64
97+
br label %for.body4.us
98+
99+
for.cond1.preheader:
100+
%storemerge5 = phi i32 [ 0, %entry ], [ %inc14, %for.inc13 ]
101+
br i1 false, label %for.inc13, label %for.inc13
102+
103+
for.inc13:
104+
%inc14 = add nsw i32 %storemerge5, 1
105+
%cmp = icmp slt i32 %inc14, 4
106+
br i1 %cmp, label %for.cond1.preheader, label %for.end
107+
108+
for.end:
109+
ret void
110+
}
111+
112+
; CHECK-LABEL: @test3
113+
; CHECK: sext i32 %b
114+
; CHECK: for.cond:
115+
; CHECK: phi i64
116+
; CHECK: icmp slt i64
117+
118+
define i32 @test3(i32* %a, i32 %b) {
119+
entry:
120+
br label %for.cond
121+
122+
for.cond:
123+
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
124+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
125+
%cmp = icmp slt i32 %i.0, %b
126+
br i1 %cmp, label %for.body, label %for.end
127+
128+
for.body:
129+
%idxprom = sext i32 %i.0 to i64
130+
%arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
131+
%0 = load i32* %arrayidx, align 4
132+
%add = add nsw i32 %sum.0, %0
133+
%inc = add nsw i32 %i.0, 1
134+
br label %for.cond
135+
136+
for.end:
137+
ret i32 %sum.0
138+
}
139+
140+
; CHECK-LABEL: @test4
141+
; CHECK: zext i32 %b
142+
; CHECK: for.cond:
143+
; CHECK: phi i64
144+
; CHECK: icmp ule i64
145+
146+
define i32 @test4(i32* %a, i32 %b) {
147+
entry:
148+
br label %for.cond
149+
150+
for.cond:
151+
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
152+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
153+
%cmp = icmp ule i32 %i.0, %b
154+
br i1 %cmp, label %for.body, label %for.end
155+
156+
for.body:
157+
%idxprom = sext i32 %i.0 to i64
158+
%arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
159+
%0 = load i32* %arrayidx, align 4
160+
%add = add nsw i32 %sum.0, %0
161+
%inc = add nsw i32 %i.0, 1
162+
br label %for.cond
163+
164+
for.end:
165+
ret i32 %sum.0
166+
}

0 commit comments

Comments
 (0)