@@ -4,35 +4,12 @@ target triple = "aarch64-unknown-linux-gnu"
4
4
5
5
define void @inv_store_i16 (i16* noalias %dst , i16* noalias readonly %src , i64 %N ) #0 {
6
6
; CHECK-LABEL: @inv_store_i16(
7
- ; CHECK-NEXT: entry:
8
- ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
9
- ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
10
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
11
- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
12
7
; CHECK: vector.ph:
13
- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
14
- ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
15
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
16
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
17
- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
8
+ ; CHECK: %[[TMP1:.*]] = insertelement <vscale x 4 x i16*> poison, i16* %dst, i32 0
9
+ ; CHECK-NEXT: %[[SPLAT_PTRS:.*]] = shufflevector <vscale x 4 x i16*> %[[TMP1]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
18
10
; CHECK: vector.body:
19
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20
- ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
21
- ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[SRC:%.*]], i64 [[TMP4]]
22
- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
23
- ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <vscale x 4 x i16>*
24
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP7]], align 2
25
- ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
26
- ; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 4
27
- ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
28
- ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_LOAD]], i32 [[TMP10]]
29
- ; CHECK-NEXT: store i16 [[TMP11]], i16* [[DST:%.*]], align 2
30
- ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
31
- ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
32
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
33
- ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34
- ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
35
- ;
11
+ ; CHECK: %[[VECLOAD:.*]] = load <vscale x 4 x i16>, <vscale x 4 x i16>* %{{.*}}, align 2
12
+ ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %[[VECLOAD]], <vscale x 4 x i16*> %[[SPLAT_PTRS]], i32 2
36
13
entry:
37
14
br label %for.body14
38
15
@@ -82,98 +59,6 @@ for.end: ; preds = %for.inc, %entry
82
59
ret void
83
60
}
84
61
85
- define void @uniform_store_i1 (i1* noalias %dst , i64* noalias %start , i64 %N ) #0 {
86
- ; CHECK-LABEL: @uniform_store_i1(
87
- ; CHECK-NEXT: entry:
88
- ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
89
- ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
90
- ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
91
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
92
- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
93
- ; CHECK: vector.ph:
94
- ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
95
- ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
96
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
97
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
98
- ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[START:%.*]], i64 [[N_VEC]]
99
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64*> poison, i64* [[START]], i32 0
100
- ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
101
- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
102
- ; CHECK: vector.body:
103
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
104
- ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
105
- ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i32 0
106
- ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
107
- ; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP5]]
108
- ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
109
- ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i64, i64* [[START]], <vscale x 2 x i64> [[TMP7]]
110
- ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
111
- ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i64, i64* [[START]], i64 [[TMP8]]
112
- ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
113
- ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i64, i64* [[START]], i64 [[TMP9]]
114
- ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
115
- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[NEXT_GEP2]], i32 0
116
- ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <vscale x 2 x i64>*
117
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP12]], align 4
118
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, <vscale x 2 x i64*> [[NEXT_GEP]], i64 1
119
- ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 2 x i64*> [[TMP13]], [[BROADCAST_SPLAT]]
120
- ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
121
- ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2
122
- ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1
123
- ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 [[TMP17]]
124
- ; CHECK-NEXT: store i1 [[TMP18]], i1* [[DST:%.*]], align 1
125
- ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
126
- ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
127
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]]
128
- ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
129
- ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
130
- ;
131
- entry:
132
- br label %for.body
133
-
134
- for.body:
135
- %first.sroa = phi i64* [ %incdec.ptr , %for.body ], [ %start , %entry ]
136
- %iv = phi i64 [ %iv.next , %for.body ], [ 0 , %entry ]
137
- %iv.next = add i64 %iv , 1
138
- %0 = load i64 , i64* %first.sroa
139
- %incdec.ptr = getelementptr inbounds i64 , i64* %first.sroa , i64 1
140
- %cmp.not = icmp eq i64* %incdec.ptr , %start
141
- store i1 %cmp.not , i1* %dst
142
- %cmp = icmp ult i64 %iv , %N
143
- br i1 %cmp , label %for.body , label %end , !llvm.loop !6
144
-
145
- end:
146
- ret void
147
- }
148
-
149
- ; Ensure conditional i1 stores do not vectorize
150
- define void @cond_store_i1 (i1* noalias %dst , i8* noalias %start , i32 %cond , i64 %N ) #0 {
151
- ; CHECK-LABEL: @cond_store_i1(
152
- ; CHECK-NOT: vector.body
153
- ;
154
- entry:
155
- br label %for.body
156
-
157
- for.body:
158
- %first.sroa = phi i8* [ %incdec.ptr , %if.end ], [ null , %entry ]
159
- %incdec.ptr = getelementptr inbounds i8 , i8* %first.sroa , i64 1
160
- %0 = load i8 , i8* %incdec.ptr
161
- %tobool.not = icmp eq i8 %0 , 10
162
- br i1 %tobool.not , label %if.end , label %if.then
163
-
164
- if.then:
165
- %cmp.store = icmp eq i8* %start , %incdec.ptr
166
- store i1 %cmp.store , i1* %dst
167
- br label %if.end
168
-
169
- if.end:
170
- %cmp.not = icmp eq i8* %incdec.ptr , %start
171
- br i1 %cmp.not , label %for.end , label %for.body
172
-
173
- for.end:
174
- ret void
175
- }
176
-
177
62
attributes #0 = { "target-features" ="+neon,+sve" vscale_range(0 , 16 ) }
178
63
179
64
!0 = distinct !{!0 , !1 , !2 , !3 , !4 , !5 }
@@ -182,7 +67,3 @@ attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
182
67
!3 = !{!"llvm.loop.vectorize.scalable.enable" , i1 true }
183
68
!4 = !{!"llvm.loop.vectorize.enable" , i1 true }
184
69
!5 = !{!"llvm.loop.interleave.count" , i32 1 }
185
-
186
- !6 = distinct !{!6 , !1 , !7 , !3 , !4 , !5 }
187
- !7 = !{!"llvm.loop.vectorize.width" , i32 2 }
188
-
0 commit comments