Skip to content

Commit ab9cd27

Browse files
authored
[LV][NFC] Move and add truncated-related FindLastIV reduction test cases. (#67674)
1 parent cbbe555 commit ab9cd27

File tree

2 files changed

+285
-166
lines changed

2 files changed

+285
-166
lines changed
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
2+
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
3+
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK
4+
5+
; This test can theoretically be vectorized without a runtime-check, by
6+
; pattern-matching on the constructs that are introduced by IndVarSimplify.
7+
; We can check two things:
8+
; %1 = trunc i64 %iv to i32
9+
; This indicates that the %iv is truncated to i32. We can then check the loop
10+
; guard is a signed i32:
11+
; %cmp.sgt = icmp sgt i32 %n, 0
12+
; and successfully vectorize the case without a runtime-check.
13+
define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) {
14+
; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit
15+
; CHECK-NOT: vector.body:
16+
;
17+
entry:
18+
%cmp.sgt = icmp sgt i32 %n, 0
19+
br i1 %cmp.sgt, label %for.body.preheader, label %exit
20+
21+
for.body.preheader: ; preds = %entry
22+
%wide.trip.count = zext i32 %n to i64
23+
br label %for.body
24+
25+
for.body: ; preds = %for.body.preheader, %for.body
26+
%iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
27+
%rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
28+
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
29+
%0 = load i64, ptr %arrayidx, align 8
30+
%cmp = icmp sgt i64 %0, 3
31+
%1 = trunc i64 %iv to i32
32+
%spec.select = select i1 %cmp, i32 %1, i32 %rdx
33+
%inc = add nuw nsw i64 %iv, 1
34+
%exitcond.not = icmp eq i64 %inc, %wide.trip.count
35+
br i1 %exitcond.not, label %exit, label %for.body
36+
37+
exit: ; preds = %for.body, %entry
38+
%rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
39+
ret i32 %rdx.lcssa
40+
}
41+
42+
; This test can theoretically be vectorized without a runtime-check, by
43+
; pattern-matching on the constructs that are introduced by IndVarSimplify.
44+
; We can check two things:
45+
; %1 = trunc i64 %iv to i32
46+
; This indicates that the %iv is truncated to i32. We can then check the loop
47+
; exit condition, which compares to a constant that fits within i32:
48+
; %exitcond.not = icmp eq i64 %inc, 20000
49+
; and successfully vectorize the case without a runtime-check.
50+
define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) {
51+
; CHECK-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit
52+
; CHECK-NOT: vector.body:
53+
;
54+
entry:
55+
br label %for.body
56+
57+
for.body: ; preds = %entry, %for.body
58+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
59+
%rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
60+
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
61+
%0 = load i64, ptr %arrayidx, align 8
62+
%cmp = icmp sgt i64 %0, 3
63+
%1 = trunc i64 %iv to i32
64+
%spec.select = select i1 %cmp, i32 %1, i32 %rdx
65+
%inc = add nuw nsw i64 %iv, 1
66+
%exitcond.not = icmp eq i64 %inc, 20000
67+
br i1 %exitcond.not, label %exit, label %for.body
68+
69+
exit: ; preds = %for.body
70+
ret i32 %spec.select
71+
}
72+
73+
; Without loop guard, the maximum constant trip count that can be vectorized is
74+
; the signed maximum value of reduction type.
75+
define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
76+
; CHECK-LABEL: define i32 @select_fcmp_max_valid_const_ub
77+
; CHECK-NOT: vector.body:
78+
;
79+
entry:
80+
br label %for.body
81+
82+
for.body: ; preds = %entry, %for.body
83+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
84+
%rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ]
85+
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
86+
%0 = load float, ptr %arrayidx, align 4
87+
%cmp = fcmp fast olt float %0, 0.000000e+00
88+
%1 = trunc i64 %iv to i32
89+
%spec.select = select i1 %cmp, i32 %1, i32 %rdx
90+
%inc = add nuw nsw i64 %iv, 1
91+
%exitcond.not = icmp eq i64 %inc, 2147483648
92+
br i1 %exitcond.not, label %exit, label %for.body
93+
94+
exit: ; preds = %for.body
95+
ret i32 %spec.select
96+
}
97+
98+
; Negative tests
99+
100+
; This test can theoretically be vectorized, but only with a runtime-check.
101+
; The construct that are introduced by IndVarSimplify is:
102+
; %1 = trunc i64 %iv to i32
103+
; However, the loop guard is an i64:
104+
; %cmp.sgt = icmp sgt i64 %n, 0
105+
; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
106+
; sentinel value), and need a runtime-check to vectorize this case.
107+
define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr %a, i64 %n) {
108+
; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit
109+
; CHECK-NOT: vector.body:
110+
;
111+
entry:
112+
%cmp.sgt = icmp sgt i64 %n, 0
113+
br i1 %cmp.sgt, label %for.body, label %exit
114+
115+
for.body: ; preds = %entry, %for.body
116+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
117+
%rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
118+
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
119+
%0 = load i32, ptr %arrayidx, align 4
120+
%cmp = icmp sgt i32 %0, 3
121+
%1 = trunc i64 %iv to i32
122+
%spec.select = select i1 %cmp, i32 %1, i32 %rdx
123+
%inc = add nuw nsw i64 %iv, 1
124+
%exitcond.not = icmp eq i64 %inc, %n
125+
br i1 %exitcond.not, label %exit, label %for.body
126+
127+
exit: ; preds = %for.body, %entry
128+
%rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
129+
ret i32 %rdx.lcssa
130+
}
131+
132+
; This test can theoretically be vectorized, but only with a runtime-check.
133+
; The construct that are introduced by IndVarSimplify is:
134+
; %1 = trunc i64 %iv to i32
135+
; However, the loop guard is unsigned:
136+
; %cmp.not = icmp eq i32 %n, 0
137+
; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
138+
; sentinel value), and need a runtime-check to vectorize this case.
139+
define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
140+
; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard
141+
; CHECK-NOT: vector.body:
142+
;
143+
entry:
144+
%cmp.not = icmp eq i32 %n, 0
145+
br i1 %cmp.not, label %exit, label %for.body.preheader
146+
147+
for.body.preheader: ; preds = %entry
148+
%wide.trip.count = zext i32 %n to i64
149+
br label %for.body
150+
151+
for.body: ; preds = %for.body.preheader, %for.body
152+
%iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
153+
%rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
154+
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
155+
%0 = load i32, ptr %arrayidx, align 4
156+
%cmp1 = icmp sgt i32 %0, 3
157+
%1 = trunc i64 %iv to i32
158+
%spec.select = select i1 %cmp1, i32 %1, i32 %rdx
159+
%inc = add nuw nsw i64 %iv, 1
160+
%exitcond.not = icmp eq i64 %inc, %wide.trip.count
161+
br i1 %exitcond.not, label %exit, label %for.body
162+
163+
exit: ; preds = %for.body, %entry
164+
%rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
165+
ret i32 %rdx.lcssa
166+
}
167+
168+
; This test cannot be vectorized, even with a runtime check.
169+
; The construct that are introduced by IndVarSimplify is:
170+
; %1 = trunc i64 %iv to i32
171+
; However, the loop exit condition is a constant that overflows i32:
172+
; %exitcond.not = icmp eq i64 %inc, 4294967294
173+
; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
174+
; cannot vectorize this case.
175+
define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
176+
; CHECK-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound
177+
; CHECK-NOT: vector.body:
178+
;
179+
entry:
180+
br label %for.body
181+
182+
for.body: ; preds = %entry, %for.body
183+
%iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ]
184+
%rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
185+
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
186+
%0 = load i32, ptr %arrayidx, align 4
187+
%cmp = icmp sgt i32 %0, 3
188+
%conv = trunc i64 %iv to i32
189+
%spec.select = select i1 %cmp, i32 %conv, i32 %rdx
190+
%inc = add nuw nsw i64 %iv, 1
191+
%exitcond.not = icmp eq i64 %inc, 4294967294
192+
br i1 %exitcond.not, label %exit, label %for.body
193+
194+
exit: ; preds = %for.body
195+
ret i32 %spec.select
196+
}
197+
198+
; Forbidding vectorization of the FindLastIV pattern involving a truncated
199+
; induction variable in the absence of any loop guard.
200+
define i32 @not_vectorized_select_iv_icmp_no_guard(ptr %a, ptr %b, i32 %start, i32 %n) {
201+
; CHECK-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard
202+
; CHECK-NOT: vector.body:
203+
;
204+
entry:
205+
%wide.trip.count = zext i32 %n to i64
206+
br label %for.body
207+
208+
for.body: ; preds = %entry, %for.body
209+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
210+
%rdx = phi i32 [ %start, %entry ], [ %cond, %for.body ]
211+
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
212+
%0 = load i32, ptr %arrayidx, align 4
213+
%arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
214+
%1 = load i32, ptr %arrayidx2, align 4
215+
%cmp = icmp sgt i32 %0, %1
216+
%2 = trunc i64 %iv to i32
217+
%cond = select i1 %cmp, i32 %2, i32 %rdx
218+
%inc = add nuw nsw i64 %iv, 1
219+
%exitcond.not = icmp eq i64 %inc, %wide.trip.count
220+
br i1 %exitcond.not, label %exit, label %for.body
221+
222+
exit: ; preds = %for.body
223+
ret i32 %cond
224+
}
225+
226+
; Without loop guard, when the constant trip count exceeds the maximum signed
227+
; value of the reduction type, truncation may cause overflow. Therefore,
228+
; vectorizer is unable to guarantee that the induction variable is monotonic
229+
; increasing.
230+
define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
231+
; CHECK-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub
232+
; CHECK-NOT: vector.body:
233+
;
234+
entry:
235+
br label %for.body
236+
237+
for.body: ; preds = %entry, %for.body
238+
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
239+
%rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ]
240+
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
241+
%0 = load float, ptr %arrayidx, align 4
242+
%cmp = fcmp fast olt float %0, 0.000000e+00
243+
%1 = trunc i64 %iv to i32
244+
%spec.select = select i1 %cmp, i32 %1, i32 %rdx
245+
%inc = add nuw nsw i64 %iv, 1
246+
%exitcond.not = icmp eq i64 %inc, 2147483649
247+
br i1 %exitcond.not, label %exit, label %for.body
248+
249+
exit: ; preds = %for.body
250+
ret i32 %spec.select
251+
}
252+
253+
; Even with loop guard protection, if the destination type of the truncation
254+
; instruction is smaller than the trip count type before extension, overflow
255+
; could still occur.
256+
define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(ptr %a, ptr %b, i16 %start, i32 %n) {
257+
; CHECK-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount
258+
; CHECK-NOT: vector.body:
259+
;
260+
entry:
261+
%cmp9 = icmp sgt i32 %n, 0
262+
br i1 %cmp9, label %for.body.preheader, label %exit
263+
264+
for.body.preheader: ; preds = %entry
265+
%wide.trip.count = zext i32 %n to i64
266+
br label %for.body
267+
268+
for.body: ; preds = %for.body.preheader, %for.body
269+
%iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
270+
%rdx = phi i16 [ %start, %for.body.preheader ], [ %cond, %for.body ]
271+
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
272+
%0 = load i32, ptr %arrayidx, align 4
273+
%arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
274+
%1 = load i32, ptr %arrayidx2, align 4
275+
%cmp3 = icmp sgt i32 %0, %1
276+
%2 = trunc i64 %iv to i16
277+
%cond = select i1 %cmp3, i16 %2, i16 %rdx
278+
%inc = add nuw nsw i64 %iv, 1
279+
%exitcond.not = icmp eq i64 %inc, %wide.trip.count
280+
br i1 %exitcond.not, label %exit, label %for.body
281+
282+
exit: ; preds = %for.body, %entry
283+
%rdx.0.lcssa = phi i16 [ %start, %entry ], [ %cond, %for.body ]
284+
ret i16 %rdx.0.lcssa
285+
}

0 commit comments

Comments
 (0)