1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2
2
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
3
3
4
- ; FIXME: The vectorizer should refuse to fold the tail by masking because
4
+
5
+ ; The vectorizer should refuse to fold the tail by masking because
5
6
; %conv is used outside of the loop. Test for this by checking that
6
7
; %n.vec, the vector trip count, is rounded down to the next multiple of
7
8
; 4. If folding the tail, it would have been rounded up instead.
@@ -14,7 +15,8 @@ define i32 @test(ptr %arr, i64 %n) {
14
15
; CHECK-NEXT: br i1 [[CMP1]], label [[PREHEADER:%.*]], label [[DONE:%.*]]
15
16
; CHECK: preheader:
16
17
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
17
- ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
18
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
19
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
18
20
; CHECK: vector.scevcheck:
19
21
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], -2
20
22
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8
@@ -30,69 +32,31 @@ define i32 @test(ptr %arr, i64 %n) {
30
32
; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP6]], [[TMP11]]
31
33
; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
32
34
; CHECK: vector.ph:
33
- ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3
34
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
35
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
35
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
36
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
36
37
; CHECK-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
37
38
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i8
38
39
; CHECK-NEXT: [[IND_END1:%.*]] = add i8 1, [[DOTCAST]]
39
- ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
40
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
41
- ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
42
40
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
43
41
; CHECK: vector.body:
44
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
45
- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE10]] ]
42
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
46
43
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
47
44
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0
48
45
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 1
49
46
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 2
50
47
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 3
51
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
52
- ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer
53
- ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], <i64 0, i64 1, i64 2, i64 3>
54
- ; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
55
- ; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[VEC_IND]], <i64 -1, i64 -1, i64 -1, i64 -1>
56
- ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP17]], i32 0
57
- ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
58
- ; CHECK: pred.store.if:
59
- ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP18]], i32 0
60
- ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP20]]
61
- ; CHECK-NEXT: store i32 65, ptr [[TMP21]], align 4
62
- ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
63
- ; CHECK: pred.store.continue:
64
- ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP17]], i32 1
65
- ; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
66
- ; CHECK: pred.store.if5:
67
- ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1
68
- ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP23]]
69
- ; CHECK-NEXT: store i32 65, ptr [[TMP24]], align 4
70
- ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
71
- ; CHECK: pred.store.continue6:
72
- ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP17]], i32 2
73
- ; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
74
- ; CHECK: pred.store.if7:
75
- ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2
76
- ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP26]]
77
- ; CHECK-NEXT: store i32 65, ptr [[TMP27]], align 4
78
- ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
79
- ; CHECK: pred.store.continue8:
80
- ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP17]], i32 3
81
- ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
82
- ; CHECK: pred.store.if9:
83
- ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3
84
- ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP29]]
85
- ; CHECK-NEXT: store i32 65, ptr [[TMP30]], align 4
86
- ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
87
- ; CHECK: pred.store.continue10:
88
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
89
- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
90
- ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
91
- ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
48
+ ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[TMP13]], -1
49
+ ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]]
50
+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
51
+ ; CHECK-NEXT: store <4 x i32> <i32 65, i32 65, i32 65, i32 65>, ptr [[TMP19]], align 4
52
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
53
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54
+ ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
92
55
; CHECK: middle.block:
56
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
93
57
; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
94
58
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = add i64 1, [[CMO]]
95
- ; CHECK-NEXT: br i1 true , label [[LOAD_VAL:%.*]], label [[SCALAR_PH]]
59
+ ; CHECK-NEXT: br i1 [[CMP_N]] , label [[LOAD_VAL:%.*]], label [[SCALAR_PH]]
96
60
; CHECK: scalar.ph:
97
61
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
98
62
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
0 commit comments