|
3 | 3 | ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK
|
4 | 4 | ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK
|
5 | 5 |
|
6 |
| -; This test can theoretically be vectorized without a runtime-check, by |
7 |
| -; pattern-matching on the constructs that are introduced by IndVarSimplify. |
8 |
| -; We can check two things: |
9 |
| -; %1 = trunc i64 %iv to i32 |
10 |
| -; This indicates that the %iv is truncated to i32. We can then check the loop |
11 |
| -; guard is a signed i32: |
12 |
| -; %cmp.sgt = icmp sgt i32 %n, 0 |
13 |
| -; and successfully vectorize the case without a runtime-check. |
| 6 | +; About the truncated test cases, the range analysis of induction variable is |
| 7 | +; used to ensure the induction variable is always greater than the sentinal |
| 8 | +; value. The case is vectorizable if the truncated induction variable is |
| 9 | +; monotonic increasing, and not equals to the sentinal. |
14 | 10 | define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) {
|
15 | 11 | ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
|
16 | 12 | ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
|
@@ -251,14 +247,8 @@ exit: ; preds = %for.body, %entry
|
251 | 247 | ret i32 %rdx.lcssa
|
252 | 248 | }
|
253 | 249 |
|
254 |
| -; This test can theoretically be vectorized without a runtime-check, by |
255 |
| -; pattern-matching on the constructs that are introduced by IndVarSimplify. |
256 |
| -; We can check two things: |
257 |
| -; %1 = trunc i64 %iv to i32 |
258 |
| -; This indicates that the %iv is truncated to i32. We can then check the loop |
259 |
| -; exit condition, which compares to a constant that fits within i32: |
260 |
| -; %exitcond.not = icmp eq i64 %inc, 20000 |
261 |
| -; and successfully vectorize the case without a runtime-check. |
| 250 | +; Without loop guard, the range analysis is also able to base on the constant |
| 251 | +; trip count. |
262 | 252 | define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) {
|
263 | 253 | ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
|
264 | 254 | ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
|
|
0 commit comments