Skip to content

Commit cf5db39

Browse files
committed
[LV] Add tests with trip counts containing UDIV expressions.
Add test cases for #89958.
1 parent 18ba0cc commit cf5db39

File tree

1 file changed

+368
-0
lines changed

1 file changed

+368
-0
lines changed
Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
3+
4+
; Test cases with trip counts containing UDIV expressions for
5+
; https://github.com/llvm/llvm-project/issues/89958.
6+
7+
define i64 @multi_exit_1_exit_count_with_udiv_in_header(ptr %dst, i64 %N) {
8+
; CHECK-LABEL: define i64 @multi_exit_1_exit_count_with_udiv_in_header(
9+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
10+
; CHECK-NEXT: entry:
11+
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
12+
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
13+
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
14+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
15+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
16+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17+
; CHECK: vector.ph:
18+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
19+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
20+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
21+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
22+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
23+
; CHECK: vector.body:
24+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
26+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
27+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
28+
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
29+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
30+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
31+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
32+
; CHECK: middle.block:
33+
; CHECK-NEXT: br label [[SCALAR_PH]]
34+
; CHECK: scalar.ph:
35+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
36+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
37+
; CHECK: loop.header:
38+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
39+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
40+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
41+
; CHECK-NEXT: [[D:%.*]] = udiv i64 42, [[N]]
42+
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
43+
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
44+
; CHECK: loop.latch:
45+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
46+
; CHECK-NEXT: [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
47+
; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
48+
; CHECK: exit:
49+
; CHECK-NEXT: [[P:%.*]] = phi i64 [ 1, [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ]
50+
; CHECK-NEXT: ret i64 [[P]]
51+
;
52+
entry:
53+
br label %loop.header
54+
55+
loop.header:
56+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
57+
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
58+
store i32 1, ptr %gep
59+
%d = udiv i64 42, %N
60+
%c.1 = icmp slt i64 %iv, %d
61+
br i1 %c.1, label %loop.latch, label %exit
62+
63+
loop.latch:
64+
%iv.next = add i64 %iv, 1
65+
%c.0 = icmp slt i64 %iv, %N
66+
br i1 %c.0, label %loop.header, label %exit
67+
68+
exit:
69+
%p = phi i64 [ 1, %loop.header ], [ 0, %loop.latch]
70+
ret i64 %p
71+
}
72+
73+
define i64 @multi_exit_2_exit_count_with_udiv_in_block_executed_unconditionally(ptr %A, i64 %N) {
74+
; CHECK-LABEL: define i64 @multi_exit_2_exit_count_with_udiv_in_block_executed_unconditionally(
75+
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
76+
; CHECK-NEXT: entry:
77+
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
78+
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
79+
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
80+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
81+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
82+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
83+
; CHECK: vector.ph:
84+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
85+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
86+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
87+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
88+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
89+
; CHECK: vector.body:
90+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
91+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
92+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
93+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
94+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
95+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
96+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
97+
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
98+
; CHECK: pred.store.if:
99+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
100+
; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4
101+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
102+
; CHECK: pred.store.continue:
103+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
104+
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
105+
; CHECK: pred.store.if1:
106+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
107+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
108+
; CHECK-NEXT: store i32 1, ptr [[TMP12]], align 4
109+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
110+
; CHECK: pred.store.continue2:
111+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
112+
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
113+
; CHECK: pred.store.if3:
114+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2
115+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
116+
; CHECK-NEXT: store i32 1, ptr [[TMP15]], align 4
117+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
118+
; CHECK: pred.store.continue4:
119+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
120+
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
121+
; CHECK: pred.store.if5:
122+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3
123+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]]
124+
; CHECK-NEXT: store i32 1, ptr [[TMP18]], align 4
125+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
126+
; CHECK: pred.store.continue6:
127+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
128+
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
129+
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
130+
; CHECK: middle.block:
131+
; CHECK-NEXT: br label [[SCALAR_PH]]
132+
; CHECK: scalar.ph:
133+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
134+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
135+
; CHECK: loop.header:
136+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
137+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
138+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
139+
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[L]], 10
140+
; CHECK-NEXT: br i1 [[C_2]], label [[THEN:%.*]], label [[CONTINUE:%.*]]
141+
; CHECK: then:
142+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
143+
; CHECK-NEXT: br label [[CONTINUE]]
144+
; CHECK: continue:
145+
; CHECK-NEXT: [[D:%.*]] = udiv i64 42, [[N]]
146+
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
147+
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
148+
; CHECK: loop.latch:
149+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
150+
; CHECK-NEXT: [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
151+
; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
152+
; CHECK: exit:
153+
; CHECK-NEXT: [[P:%.*]] = phi i64 [ 1, [[CONTINUE]] ], [ 0, [[LOOP_LATCH]] ]
154+
; CHECK-NEXT: ret i64 [[P]]
155+
;
156+
entry:
157+
br label %loop.header
158+
159+
loop.header:
160+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
161+
%gep = getelementptr inbounds i32, ptr %A, i64 %iv
162+
%l = load i32, ptr %gep
163+
%c.2 = icmp eq i32 %l, 10
164+
br i1 %c.2, label %then, label %continue
165+
166+
then:
167+
store i32 1, ptr %gep
168+
br label %continue
169+
170+
continue:
171+
%d = udiv i64 42, %N
172+
%c.1 = icmp slt i64 %iv, %d
173+
br i1 %c.1, label %loop.latch, label %exit
174+
175+
loop.latch:
176+
%iv.next = add i64 %iv, 1
177+
%c.0 = icmp slt i64 %iv, %N
178+
br i1 %c.0, label %loop.header, label %exit
179+
180+
exit:
181+
%p = phi i64 [ 1, %continue ], [ 0, %loop.latch]
182+
ret i64 %p
183+
}
184+
185+
define i64 @multi_exit_3_exit_count_with_udiv_in_block_executed_conditionally(ptr %A, i64 %N) {
186+
; CHECK-LABEL: define i64 @multi_exit_3_exit_count_with_udiv_in_block_executed_conditionally(
187+
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
188+
; CHECK-NEXT: entry:
189+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
190+
; CHECK: loop.header:
191+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
192+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
193+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
194+
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[L]], 10
195+
; CHECK-NEXT: br i1 [[C_2]], label [[THEN:%.*]], label [[LOOP_LATCH]]
196+
; CHECK: then:
197+
; CHECK-NEXT: [[D:%.*]] = udiv i64 42, [[N]]
198+
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
199+
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
200+
; CHECK: loop.latch:
201+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
202+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
203+
; CHECK-NEXT: [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
204+
; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_HEADER]], label [[EXIT]]
205+
; CHECK: exit:
206+
; CHECK-NEXT: [[P:%.*]] = phi i64 [ 1, [[THEN]] ], [ 0, [[LOOP_LATCH]] ]
207+
; CHECK-NEXT: ret i64 [[P]]
208+
;
209+
entry:
210+
br label %loop.header
211+
212+
loop.header:
213+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
214+
%gep = getelementptr inbounds i32, ptr %A, i64 %iv
215+
%l = load i32, ptr %gep
216+
%c.2 = icmp eq i32 %l, 10
217+
br i1 %c.2, label %then, label %loop.latch
218+
219+
then:
220+
%d = udiv i64 42, %N
221+
%c.1 = icmp slt i64 %iv, %d
222+
br i1 %c.1, label %loop.latch, label %exit
223+
224+
loop.latch:
225+
store i32 1, ptr %gep
226+
%iv.next = add i64 %iv, 1
227+
%c.0 = icmp slt i64 %iv, %N
228+
br i1 %c.0, label %loop.header, label %exit
229+
230+
exit:
231+
%p = phi i64 [ 1, %then ], [ 0, %loop.latch]
232+
ret i64 %p
233+
}
234+
235+
; FIXME: Currently miscompiled as we unconditionally execute udiv after
236+
; vectorization.
237+
define i64 @multi_exit_4_exit_count_with_udiv_in_latch(ptr %dst, i64 %N) {
238+
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_in_latch(
239+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
240+
; CHECK-NEXT: entry:
241+
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
242+
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
243+
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
244+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
245+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
246+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
247+
; CHECK: vector.ph:
248+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
249+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
250+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
251+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
252+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
253+
; CHECK: vector.body:
254+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
255+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
256+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
257+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
258+
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
259+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
260+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
261+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
262+
; CHECK: middle.block:
263+
; CHECK-NEXT: br label [[SCALAR_PH]]
264+
; CHECK: scalar.ph:
265+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
266+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
267+
; CHECK: loop.header:
268+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
269+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
270+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
271+
; CHECK-NEXT: [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
272+
; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
273+
; CHECK: loop.latch:
274+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
275+
; CHECK-NEXT: [[D:%.*]] = udiv i64 42, [[N]]
276+
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
277+
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
278+
; CHECK: exit:
279+
; CHECK-NEXT: [[P:%.*]] = phi i64 [ 1, [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ]
280+
; CHECK-NEXT: ret i64 [[P]]
281+
;
282+
entry:
283+
br label %loop.header
284+
285+
loop.header:
286+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
287+
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
288+
store i32 1, ptr %gep
289+
%c.0 = icmp slt i64 %iv, %N
290+
br i1 %c.0, label %loop.latch, label %exit
291+
292+
loop.latch:
293+
%iv.next = add i64 %iv, 1
294+
%d = udiv i64 42, %N
295+
%c.1 = icmp slt i64 %iv, %d
296+
br i1 %c.1, label %loop.header, label %exit
297+
298+
exit:
299+
%p = phi i64 [ 1, %loop.header ], [ 0, %loop.latch]
300+
ret i64 %p
301+
}
302+
303+
define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) {
304+
; CHECK-LABEL: define void @single_exit_tc_with_udiv(
305+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
306+
; CHECK-NEXT: entry:
307+
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
308+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
309+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4
310+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
311+
; CHECK: vector.ph:
312+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
313+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
314+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
315+
; CHECK: vector.body:
316+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
317+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
318+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]]
319+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
320+
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP4]], align 4
321+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
322+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
323+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
324+
; CHECK: middle.block:
325+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
326+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
327+
; CHECK: scalar.ph:
328+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
329+
; CHECK-NEXT: br label [[LOOP:%.*]]
330+
; CHECK: loop:
331+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
332+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
333+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
334+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
335+
; CHECK-NEXT: [[D:%.*]] = udiv i64 42, [[N]]
336+
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
337+
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
338+
; CHECK: exit:
339+
; CHECK-NEXT: ret void
340+
;
341+
entry:
342+
br label %loop
343+
344+
loop:
345+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
346+
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
347+
store i32 1, ptr %gep
348+
%iv.next = add i64 %iv, 1
349+
%d = udiv i64 42, %N
350+
%c.1 = icmp slt i64 %iv, %d
351+
br i1 %c.1, label %loop, label %exit
352+
353+
exit:
354+
ret void
355+
}
356+
357+
;.
358+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
359+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
360+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
361+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
362+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
363+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
364+
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
365+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
366+
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
367+
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
368+
;.

0 commit comments

Comments
 (0)