Skip to content

Commit 91ee1e3

Browse files
committed
[LV] Add initial test cases with small clamped indices.
1 parent 4ff440b commit 91ee1e3

File tree

1 file changed

+358
-0
lines changed

1 file changed

+358
-0
lines changed
Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
3+
4+
; Tests where the indices of some accesses are clamped to a small value.
5+
; FIXME: At the moment, the runtime checks require that the indices do not wrap.
6+
; The clamped indices do wrap, so the vector loops are dead at the
7+
; moment.
8+
9+
; The relevant bounds for %gep.A are [%A, %A+4).
10+
define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
11+
; CHECK-LABEL: @load_clamped_index(
12+
; CHECK-NEXT: entry:
13+
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
14+
; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
15+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 2
16+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
17+
; CHECK: vector.scevcheck:
18+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
19+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
20+
; CHECK-NEXT: [[MUL:%.*]] = call { i2, i1 } @llvm.umul.with.overflow.i2(i2 1, i2 [[TMP1]])
21+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i2, i1 } [[MUL]], 0
22+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i2, i1 } [[MUL]], 1
23+
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[MUL_RESULT]]
24+
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[MUL_RESULT]]
25+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
26+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
27+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
28+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
29+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
30+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
31+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]]
32+
; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
33+
; CHECK: vector.memcheck:
34+
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[N]], -1
35+
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
36+
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1
37+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP13]]
38+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
39+
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP13]]
40+
; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
41+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
42+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
43+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
44+
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
45+
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
46+
; CHECK: vector.ph:
47+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
48+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
49+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
50+
; CHECK: vector.body:
51+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
52+
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 0
53+
; CHECK-NEXT: [[TMP15:%.*]] = urem i32 [[TMP14]], 4
54+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP15]]
55+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0
56+
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>*
57+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP18]], align 4, !alias.scope !0
58+
; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 10, i32 10>
59+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP14]]
60+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
61+
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <2 x i32>*
62+
; CHECK-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP22]], align 4, !alias.scope !3, !noalias !0
63+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
64+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
65+
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
66+
; CHECK: middle.block:
67+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
68+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
69+
; CHECK: scalar.ph:
70+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
71+
; CHECK-NEXT: br label [[LOOP:%.*]]
72+
; CHECK: loop:
73+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
74+
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV]], 4
75+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
76+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP_A]], align 4
77+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], 10
78+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[IV]]
79+
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_B]], align 4
80+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
81+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
82+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
83+
; CHECK: exit:
84+
; CHECK-NEXT: ret void
85+
;
86+
entry:
87+
br label %loop
88+
89+
loop:
90+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
91+
%clamped.index = urem i32 %iv, 4
92+
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
93+
%lv = load i32, i32* %gep.A
94+
%add = add i32 %lv, 10
95+
%gep.B = getelementptr inbounds i32, i32* %B, i32 %iv
96+
store i32 %add, i32* %gep.B
97+
%iv.next = add nuw nsw i32 %iv, 1
98+
%cond = icmp eq i32 %iv.next, %N
99+
br i1 %cond, label %exit, label %loop
100+
101+
exit:
102+
ret void
103+
}
104+
105+
; The relevant bounds for %gep.A are [%A, %A+4).
106+
define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
107+
; CHECK-LABEL: @store_clamped_index(
108+
; CHECK-NEXT: entry:
109+
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
110+
; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
111+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 2
112+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
113+
; CHECK: vector.scevcheck:
114+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
115+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
116+
; CHECK-NEXT: [[MUL:%.*]] = call { i2, i1 } @llvm.umul.with.overflow.i2(i2 1, i2 [[TMP1]])
117+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i2, i1 } [[MUL]], 0
118+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i2, i1 } [[MUL]], 1
119+
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[MUL_RESULT]]
120+
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[MUL_RESULT]]
121+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
122+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
123+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
124+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
125+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
126+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
127+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]]
128+
; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
129+
; CHECK: vector.memcheck:
130+
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[N]], -1
131+
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
132+
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1
133+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP13]]
134+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
135+
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP13]]
136+
; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
137+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
138+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
139+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
140+
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
141+
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
142+
; CHECK: vector.ph:
143+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
144+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
145+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
146+
; CHECK: vector.body:
147+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
148+
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 0
149+
; CHECK-NEXT: [[TMP15:%.*]] = urem i32 [[TMP14]], 4
150+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP14]]
151+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0
152+
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>*
153+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP18]], align 4, !alias.scope !8, !noalias !11
154+
; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 10, i32 10>
155+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP15]]
156+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
157+
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <2 x i32>*
158+
; CHECK-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP22]], align 4, !alias.scope !11
159+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
160+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
161+
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
162+
; CHECK: middle.block:
163+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
164+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
165+
; CHECK: scalar.ph:
166+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
167+
; CHECK-NEXT: br label [[LOOP:%.*]]
168+
; CHECK: loop:
169+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
170+
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV]], 4
171+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[IV]]
172+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP_B]], align 4
173+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], 10
174+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
175+
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
176+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
177+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
178+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
179+
; CHECK: exit:
180+
; CHECK-NEXT: ret void
181+
;
182+
entry:
183+
br label %loop
184+
185+
loop:
186+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
187+
%clamped.index = urem i32 %iv, 4
188+
%gep.B = getelementptr inbounds i32, i32* %B, i32 %iv
189+
%lv = load i32, i32* %gep.B
190+
%add = add i32 %lv, 10
191+
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
192+
store i32 %add, i32* %gep.A
193+
%iv.next = add nuw nsw i32 %iv, 1
194+
%cond = icmp eq i32 %iv.next, %N
195+
br i1 %cond, label %exit, label %loop
196+
197+
exit:
198+
ret void
199+
}
200+
201+
define void @clamped_index_dependence_non_clamped(i32* %A, i32* %B, i32 %N) {
202+
; CHECK-LABEL: @clamped_index_dependence_non_clamped(
203+
; CHECK-NEXT: entry:
204+
; CHECK-NEXT: br label [[LOOP:%.*]]
205+
; CHECK: loop:
206+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
207+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[IV]]
208+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP_B]], align 4
209+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[IV]]
210+
; CHECK-NEXT: [[LV_A:%.*]] = load i32, i32* [[GEP_A_1]], align 4
211+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], [[LV_A]]
212+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
213+
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV_NEXT]], 4
214+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
215+
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
216+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
217+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
218+
; CHECK: exit:
219+
; CHECK-NEXT: ret void
220+
;
221+
entry:
222+
br label %loop
223+
224+
loop:
225+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
226+
%gep.B = getelementptr inbounds i32, i32* %B, i32 %iv
227+
%lv = load i32, i32* %gep.B
228+
%gep.A.1 = getelementptr inbounds i32, i32* %A, i32 %iv
229+
%lv.A = load i32, i32* %gep.A.1
230+
%add = add i32 %lv, %lv.A
231+
232+
%iv.next = add nuw nsw i32 %iv, 1
233+
%clamped.index = urem i32 %iv.next, 4
234+
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
235+
store i32 %add, i32* %gep.A
236+
%cond = icmp eq i32 %iv.next, %N
237+
br i1 %cond, label %exit, label %loop
238+
239+
exit:
240+
ret void
241+
}
242+
243+
define void @clamped_index_dependence_clamped_index(i32* %A, i32* %B, i32 %N) {
244+
; CHECK-LABEL: @clamped_index_dependence_clamped_index(
245+
; CHECK-NEXT: entry:
246+
; CHECK-NEXT: br label [[LOOP:%.*]]
247+
; CHECK: loop:
248+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
249+
; CHECK-NEXT: [[CLAMPED_INDEX_1:%.*]] = urem i32 [[IV]], 4
250+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[CLAMPED_INDEX_1]]
251+
; CHECK-NEXT: [[LV_A:%.*]] = load i32, i32* [[GEP_A_1]], align 4
252+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV_A]], 10
253+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
254+
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV_NEXT]], 4
255+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
256+
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
257+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
258+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
259+
; CHECK: exit:
260+
; CHECK-NEXT: ret void
261+
;
262+
entry:
263+
br label %loop
264+
265+
loop:
266+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
267+
%clamped.index.1 = urem i32 %iv, 4
268+
%gep.A.1 = getelementptr inbounds i32, i32* %A, i32 %clamped.index.1
269+
%lv.A = load i32, i32* %gep.A.1
270+
%add = add i32 %lv.A, 10
271+
272+
%iv.next = add nuw nsw i32 %iv, 1
273+
%clamped.index = urem i32 %iv.next, 4
274+
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
275+
store i32 %add, i32* %gep.A
276+
%cond = icmp eq i32 %iv.next, %N
277+
br i1 %cond, label %exit, label %loop
278+
279+
exit:
280+
ret void
281+
}
282+
283+
define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
284+
; CHECK-LABEL: @clamped_index_equal_dependence(
285+
; CHECK-NEXT: entry:
286+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 2
287+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
288+
; CHECK: vector.scevcheck:
289+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
290+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
291+
; CHECK-NEXT: [[MUL:%.*]] = call { i2, i1 } @llvm.umul.with.overflow.i2(i2 1, i2 [[TMP1]])
292+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i2, i1 } [[MUL]], 0
293+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i2, i1 } [[MUL]], 1
294+
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[MUL_RESULT]]
295+
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[MUL_RESULT]]
296+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
297+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
298+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
299+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
300+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
301+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
302+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]]
303+
; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
304+
; CHECK: vector.ph:
305+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
306+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
307+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
308+
; CHECK: vector.body:
309+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
310+
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0
311+
; CHECK-NEXT: [[TMP12:%.*]] = urem i32 [[TMP11]], 4
312+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP12]]
313+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0
314+
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>*
315+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP15]], align 4
316+
; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 10, i32 10>
317+
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>*
318+
; CHECK-NEXT: store <2 x i32> [[TMP16]], <2 x i32>* [[TMP17]], align 4
319+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
320+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
321+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
322+
; CHECK: middle.block:
323+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
324+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
325+
; CHECK: scalar.ph:
326+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
327+
; CHECK-NEXT: br label [[LOOP:%.*]]
328+
; CHECK: loop:
329+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
330+
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV]], 4
331+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
332+
; CHECK-NEXT: [[LV_A:%.*]] = load i32, i32* [[GEP_A]], align 4
333+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV_A]], 10
334+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
335+
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
336+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
337+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]]
338+
; CHECK: exit:
339+
; CHECK-NEXT: ret void
340+
;
341+
entry:
342+
br label %loop
343+
344+
loop:
345+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
346+
%clamped.index = urem i32 %iv, 4
347+
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
348+
%lv.A = load i32, i32* %gep.A
349+
%add = add i32 %lv.A, 10
350+
351+
%iv.next = add nuw nsw i32 %iv, 1
352+
store i32 %add, i32* %gep.A
353+
%cond = icmp eq i32 %iv.next, %N
354+
br i1 %cond, label %exit, label %loop
355+
356+
exit:
357+
ret void
358+
}

0 commit comments

Comments
 (0)