Skip to content

Commit 480e7ab

Browse files
[InstCombine] Retain inbounds when canonicalising add+gep
When canonicalising add+gep to gep+gep retain the inbounds qualifier when we can prove both of the geps will be inbounds. This is done by checking that one index is a loop induction variable starting at 0 and the other index is the same in all loop iterations.
1 parent e309667 commit 480e7ab

File tree

6 files changed

+317
-48
lines changed

6 files changed

+317
-48
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2450,10 +2450,51 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
24502450
// as:
24512451
// %newptr = getelementptr i32, ptr %ptr, i64 %idx1
24522452
// %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2453-
auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
2454-
GEP.getPointerOperand(), Idx1);
2455-
return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
2456-
Idx2);
2453+
// If %gep is inbounds then %newgep can be inbounds only if %newptr is as
2454+
// well, as an inbounds gep requires the base pointer to be inbounds. We
2455+
// can mark %newptr as inbounds if we have a loop like
2456+
// for (i = 0; ...)
2457+
// ptr[i+x]
2458+
// If x is the same in each loop iteration then we know that we have a
2459+
// series of geps starting with ptr[x], which means that ptr[x] must be
2460+
// inbounds.
2461+
auto CheckIdx = [&](Value *LoopIdx, Value *FixedIdx) {
2462+
// Check that LoopIdx is a loop induction variable that starts at 0.
2463+
auto *PHI = dyn_cast<PHINode>(LoopIdx);
2464+
BinaryOperator *BO;
2465+
Value *Start, *End;
2466+
if (!PHI || !matchSimpleRecurrence(PHI, BO, Start, End) ||
2467+
!match(Start, m_Zero()))
2468+
return false;
2469+
// If FixedIdx dominates the phi then it's the same in each loop
2470+
// iteration.
2471+
if (DT.dominates(FixedIdx, PHI))
2472+
return true;
2473+
// If FixedIdx is a binary expression of values that dominate the phi
2474+
// then it's the same in each loop iteration.
2475+
Value *Left, *Right;
2476+
if (match(FixedIdx, m_BinOp(m_Value(Left), m_Value(Right))) &&
2477+
DT.dominates(Left, PHI) && DT.dominates(Right, PHI))
2478+
return true;
2479+
// We can't handle anything else.
2480+
return false;
2481+
};
2482+
bool InBounds = false;
2483+
if (GEP.isInBounds()) {
2484+
if (CheckIdx(Idx2, Idx1)) {
2485+
InBounds = true;
2486+
} else if (CheckIdx(Idx1, Idx2)) {
2487+
std::swap(Idx1, Idx2);
2488+
InBounds = true;
2489+
}
2490+
}
2491+
auto *NewPtr =
2492+
Builder.CreateGEP(GEP.getResultElementType(), GEP.getPointerOperand(),
2493+
Idx1, "", InBounds);
2494+
auto *NewGEP =
2495+
GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr, Idx2);
2496+
NewGEP->setIsInBounds(InBounds);
2497+
return NewGEP;
24572498
}
24582499
ConstantInt *C;
24592500
if (match(GEP.getOperand(1), m_OneUse(m_SExt(m_OneUse(m_NSWAdd(
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
; RUN: opt < %s -S -passes=instcombine | FileCheck %s
2+
3+
target datalayout = "e-p:32:32"
4+
5+
; CHECK-LABEL: @test1
6+
define void @test1(i32 %N, i32 %k, ptr %A) {
7+
entry:
8+
br label %for.cond
9+
10+
for.cond:
11+
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
12+
%cmp = icmp ult i32 %i, %N
13+
br i1 %cmp, label %for.body, label %for.end
14+
15+
; CHECK-LABEL: for.body:
16+
; CHECK: [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %k
17+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %i
18+
for.body:
19+
%add = add i32 %i, %k
20+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
21+
store i8 1, ptr %arrayidx, align 4
22+
%inc = add i32 %i, 1
23+
br label %for.cond
24+
25+
for.end:
26+
ret void
27+
}
28+
29+
; CHECK-LABEL: @test2
30+
define void @test2(i32 %N, i32 %k, ptr %A) {
31+
entry:
32+
br label %for.cond
33+
34+
for.cond:
35+
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
36+
%cmp = icmp ult i32 %i, %N
37+
br i1 %cmp, label %for.body, label %for.end
38+
39+
; CHECK-LABEL: for.body:
40+
; CHECK: [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %mul
41+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %i
42+
for.body:
43+
%mul = mul i32 %k, 42
44+
%add = add i32 %i, %mul
45+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
46+
store i8 1, ptr %arrayidx, align 4
47+
%inc = add i32 %i, 1
48+
br label %for.cond
49+
50+
for.end:
51+
ret void
52+
}
53+
54+
; CHECK-LABEL: @test3
55+
define void @test3(i32 %N, ptr %A, i32 %val) {
56+
entry:
57+
br label %for.cond
58+
59+
for.cond:
60+
%i = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
61+
%cmp = icmp ult i32 %i, %N
62+
br i1 %cmp, label %for.body, label %for.end7
63+
64+
for.body:
65+
br label %for.cond1
66+
67+
for.cond1:
68+
%j = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
69+
%cmp2 = icmp ult i32 %j, %N
70+
br i1 %cmp2, label %for.body3, label %for.inc5
71+
72+
; CHECK-LABEL: for.body3:
73+
; CHECK: [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %i
74+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %j
75+
for.body3:
76+
%add = add i32 %i, %j
77+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
78+
store i8 1, ptr %arrayidx, align 4
79+
%inc = add i32 %j, 1
80+
br label %for.cond1
81+
82+
for.inc5:
83+
%inc6 = add i32 %i, 1
84+
br label %for.cond
85+
86+
for.end7:
87+
ret void
88+
}
89+
90+
; CHECK-LABEL: @test4
91+
define void @test4(i32 %N, ptr %A, i32 %val) {
92+
entry:
93+
br label %for.cond
94+
95+
for.cond:
96+
%i = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
97+
%cmp = icmp ult i32 %i, %N
98+
br i1 %cmp, label %for.body, label %for.end7
99+
100+
for.body:
101+
br label %for.cond1
102+
103+
for.cond1:
104+
%j = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
105+
%cmp2 = icmp ult i32 %j, %N
106+
br i1 %cmp2, label %for.body3, label %for.inc5
107+
108+
; CHECK-LABEL: for.body3:
109+
; CHECK: [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %mul
110+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %j
111+
for.body3:
112+
%mul = mul i32 %i, %N
113+
%add = add i32 %mul, %j
114+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
115+
store i8 1, ptr %arrayidx, align 4
116+
%inc = add i32 %j, 1
117+
br label %for.cond1
118+
119+
for.inc5:
120+
%inc6 = add i32 %i, 1
121+
br label %for.cond
122+
123+
for.end7:
124+
ret void
125+
}
126+
127+
; We can't use inbounds here because the add operand doesn't dominate the loop
128+
; CHECK-LABEL: @test5
129+
define void @test5(i32 %N, ptr %A, ptr %B) {
130+
entry:
131+
br label %for.cond
132+
133+
for.cond:
134+
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
135+
%cmp = icmp ult i32 %i, %N
136+
br i1 %cmp, label %for.body, label %for.end
137+
138+
; CHECK-LABEL: for.body:
139+
; CHECK: [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %i
140+
; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %0
141+
for.body:
142+
%0 = load i32, ptr %B, align 4
143+
%add = add i32 %i, %0
144+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
145+
store i8 1, ptr %arrayidx, align 4
146+
%inc = add i32 %i, 1
147+
br label %for.cond
148+
149+
for.end:
150+
ret void
151+
}
152+
153+
; We can't use inbounds here because we don't have a loop
154+
; CHECK-LABEL: @test6
155+
define void @test6(i32 %k, i32 %j, ptr %A) {
156+
entry:
157+
%cmp = icmp ugt i32 %k, 10
158+
br i1 %cmp, label %if.then, label %if.else
159+
160+
if.then:
161+
br label %if.end
162+
163+
if.else:
164+
br label %if.end
165+
166+
; CHECK-LABEL: if.end:
167+
; CHECK: [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %val
168+
; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %j
169+
if.end:
170+
%val = phi i32 [ 0, %if.then ], [ 1, %if.else ]
171+
%add = add i32 %val, %j
172+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
173+
store i8 1, ptr %arrayidx, align 4
174+
ret void
175+
}
176+
177+
; Inbounds gep would be invalid because of potential overflow in the add, though
178+
; we don't convert to gep+gep as we insert an explicit sext instead of using i16
179+
; gep offset.
180+
; CHECK-LABEL: @test7
181+
define void @test7(i16 %N, i16 %k, ptr %A) {
182+
entry:
183+
br label %for.cond
184+
185+
for.cond:
186+
%i = phi i16 [ 0, %entry ], [ %inc, %for.body ]
187+
%cmp = icmp ult i16 %i, %N
188+
br i1 %cmp, label %for.body, label %for.end
189+
190+
; CHECK-LABEL: for.body:
191+
; CHECK: %add = add i16 %i, %k
192+
; CHECK-NEXT: [[SEXT:%.*]] = sext i16 %add to i32
193+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr %A, i32 [[SEXT]]
194+
for.body:
195+
%add = add i16 %i, %k
196+
%arrayidx = getelementptr inbounds i8, ptr %A, i16 %add
197+
store i8 1, ptr %arrayidx, align 4
198+
%inc = add i16 %i, 1
199+
br label %for.cond
200+
201+
for.end:
202+
ret void
203+
}
204+
205+
; %i starts at 1 so we can't use inbounds
206+
; CHECK-LABEL: @test8
207+
define void @test8(i32 %N, i32 %k, ptr %A) {
208+
entry:
209+
br label %for.cond
210+
211+
for.cond:
212+
%i = phi i32 [ 1, %entry ], [ %inc, %for.body ]
213+
%cmp = icmp ult i32 %i, %N
214+
br i1 %cmp, label %for.body, label %for.end
215+
216+
; CHECK-LABEL: for.body:
217+
; CHECK: [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %i
218+
; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %k
219+
for.body:
220+
%add = add i32 %i, %k
221+
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
222+
store i8 1, ptr %arrayidx, align 4
223+
%inc = add i32 %i, 1
224+
br label %for.cond
225+
226+
for.end:
227+
ret void
228+
}

llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ define void @_Z4testPcl(ptr %out, i64 %size) {
2323
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
2424
; CHECK: for.body:
2525
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[OUT:%.*]], i64 [[I_0]]
26-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[I_0]]
27-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[SIZE]]
26+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 [[SIZE]]
27+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[I_0]]
2828
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
2929
; CHECK-NEXT: store i16 [[TMP1]], ptr [[ARRAYIDX]], align 1, !llvm.access.group [[ACC_GRP0]]
3030
; CHECK-NEXT: br label [[FOR_INC]]

llvm/test/Transforms/LoopVectorize/induction.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -348,11 +348,11 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
348348
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
349349
; IND: vector.body:
350350
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
351-
; IND-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
352-
; IND-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
351+
; IND-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
352+
; IND-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
353353
; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
354-
; IND-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
355-
; IND-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP7]], i64 [[OFFSET2]]
354+
; IND-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
355+
; IND-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[INDEX]]
356356
; IND-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP8]], align 4, !alias.scope [[META7]]
357357
; IND-NEXT: [[TMP9:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
358358
; IND-NEXT: [[TMP10:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP9]]
@@ -408,13 +408,13 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
408408
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
409409
; UNROLL: vector.body:
410410
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
411-
; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
412-
; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
411+
; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
412+
; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
413413
; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
414414
; UNROLL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 2
415415
; UNROLL-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]]
416-
; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
417-
; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
416+
; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
417+
; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[INDEX]]
418418
; UNROLL-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope [[META7]]
419419
; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 2
420420
; UNROLL-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP10]], align 4, !alias.scope [[META7]]
@@ -551,13 +551,13 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
551551
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
552552
; INTERLEAVE: vector.body:
553553
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
554-
; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
555-
; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
554+
; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
555+
; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
556556
; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
557557
; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
558558
; INTERLEAVE-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]]
559-
; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
560-
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
559+
; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
560+
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[INDEX]]
561561
; INTERLEAVE-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META7]]
562562
; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 4
563563
; INTERLEAVE-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !alias.scope [[META7]]

0 commit comments

Comments
 (0)