1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s
3
+ ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefix=NEON
4
+ ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefix=DOT
5
+ ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefix=SVE
3
6
4
7
; Function Attrs: nobuiltin nounwind readonly
5
8
define i8 @popcount128 (ptr nocapture nonnull readonly %0 ) {
@@ -12,6 +15,36 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) {
12
15
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
13
16
; CHECK-NEXT: fmov w0, s0
14
17
; CHECK-NEXT: ret
18
+ ;
19
+ ; NEON-LABEL: popcount128:
20
+ ; NEON: // %bb.0: // %Entry
21
+ ; NEON-NEXT: ldr d0, [x0]
22
+ ; NEON-NEXT: add x8, x0, #8
23
+ ; NEON-NEXT: ld1 { v0.d }[1], [x8]
24
+ ; NEON-NEXT: cnt v0.16b, v0.16b
25
+ ; NEON-NEXT: uaddlv h0, v0.16b
26
+ ; NEON-NEXT: fmov w0, s0
27
+ ; NEON-NEXT: ret
28
+ ;
29
+ ; DOT-LABEL: popcount128:
30
+ ; DOT: // %bb.0: // %Entry
31
+ ; DOT-NEXT: ldr d0, [x0]
32
+ ; DOT-NEXT: add x8, x0, #8
33
+ ; DOT-NEXT: ld1 { v0.d }[1], [x8]
34
+ ; DOT-NEXT: cnt v0.16b, v0.16b
35
+ ; DOT-NEXT: uaddlv h0, v0.16b
36
+ ; DOT-NEXT: fmov w0, s0
37
+ ; DOT-NEXT: ret
38
+ ;
39
+ ; SVE-LABEL: popcount128:
40
+ ; SVE: // %bb.0: // %Entry
41
+ ; SVE-NEXT: ldr d0, [x0]
42
+ ; SVE-NEXT: add x8, x0, #8
43
+ ; SVE-NEXT: ld1 { v0.d }[1], [x8]
44
+ ; SVE-NEXT: cnt v0.16b, v0.16b
45
+ ; SVE-NEXT: uaddlv h0, v0.16b
46
+ ; SVE-NEXT: fmov w0, s0
47
+ ; SVE-NEXT: ret
15
48
Entry:
16
49
%1 = load i128 , ptr %0 , align 16
17
50
%2 = tail call i128 @llvm.ctpop.i128 (i128 %1 )
@@ -56,6 +89,57 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
56
89
; CHECK-NEXT: adds x8, x8, x9
57
90
; CHECK-NEXT: mov w0, w8
58
91
; CHECK-NEXT: ret
92
+ ;
93
+ ; NEON-LABEL: popcount256:
94
+ ; NEON: // %bb.0: // %Entry
95
+ ; NEON-NEXT: ldr d0, [x0, #16]
96
+ ; NEON-NEXT: ldr d1, [x0]
97
+ ; NEON-NEXT: add x8, x0, #8
98
+ ; NEON-NEXT: add x9, x0, #24
99
+ ; NEON-NEXT: ld1 { v0.d }[1], [x9]
100
+ ; NEON-NEXT: ld1 { v1.d }[1], [x8]
101
+ ; NEON-NEXT: cnt v0.16b, v0.16b
102
+ ; NEON-NEXT: cnt v1.16b, v1.16b
103
+ ; NEON-NEXT: uaddlv h0, v0.16b
104
+ ; NEON-NEXT: uaddlv h1, v1.16b
105
+ ; NEON-NEXT: fmov w8, s0
106
+ ; NEON-NEXT: fmov w9, s1
107
+ ; NEON-NEXT: add w0, w9, w8
108
+ ; NEON-NEXT: ret
109
+ ;
110
+ ; DOT-LABEL: popcount256:
111
+ ; DOT: // %bb.0: // %Entry
112
+ ; DOT-NEXT: ldr d0, [x0, #16]
113
+ ; DOT-NEXT: ldr d1, [x0]
114
+ ; DOT-NEXT: add x8, x0, #8
115
+ ; DOT-NEXT: add x9, x0, #24
116
+ ; DOT-NEXT: ld1 { v0.d }[1], [x9]
117
+ ; DOT-NEXT: ld1 { v1.d }[1], [x8]
118
+ ; DOT-NEXT: cnt v0.16b, v0.16b
119
+ ; DOT-NEXT: cnt v1.16b, v1.16b
120
+ ; DOT-NEXT: uaddlv h0, v0.16b
121
+ ; DOT-NEXT: uaddlv h1, v1.16b
122
+ ; DOT-NEXT: fmov w8, s0
123
+ ; DOT-NEXT: fmov w9, s1
124
+ ; DOT-NEXT: add w0, w9, w8
125
+ ; DOT-NEXT: ret
126
+ ;
127
+ ; SVE-LABEL: popcount256:
128
+ ; SVE: // %bb.0: // %Entry
129
+ ; SVE-NEXT: ldr d0, [x0, #16]
130
+ ; SVE-NEXT: ldr d1, [x0]
131
+ ; SVE-NEXT: add x8, x0, #8
132
+ ; SVE-NEXT: add x9, x0, #24
133
+ ; SVE-NEXT: ld1 { v0.d }[1], [x9]
134
+ ; SVE-NEXT: ld1 { v1.d }[1], [x8]
135
+ ; SVE-NEXT: cnt v0.16b, v0.16b
136
+ ; SVE-NEXT: cnt v1.16b, v1.16b
137
+ ; SVE-NEXT: uaddlv h0, v0.16b
138
+ ; SVE-NEXT: uaddlv h1, v1.16b
139
+ ; SVE-NEXT: fmov w8, s0
140
+ ; SVE-NEXT: fmov w9, s1
141
+ ; SVE-NEXT: add w0, w9, w8
142
+ ; SVE-NEXT: ret
59
143
Entry:
60
144
%1 = load i256 , ptr %0 , align 16
61
145
%2 = tail call i256 @llvm.ctpop.i256 (i256 %1 )
@@ -83,9 +167,220 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
83
167
; CHECK-NEXT: // kill: def $x8 killed $w8
84
168
; CHECK-NEXT: bfi x0, x8, #32, #32
85
169
; CHECK-NEXT: ret
170
+ ;
171
+ ; NEON-LABEL: popcount1x128:
172
+ ; NEON: // %bb.0: // %Entry
173
+ ; NEON-NEXT: fmov d1, x0
174
+ ; NEON-NEXT: movi v0.2d, #0000000000000000
175
+ ; NEON-NEXT: mov v1.d[1], x1
176
+ ; NEON-NEXT: cnt v1.16b, v1.16b
177
+ ; NEON-NEXT: uaddlv h1, v1.16b
178
+ ; NEON-NEXT: mov v0.s[0], v1.s[0]
179
+ ; NEON-NEXT: mov x1, v0.d[1]
180
+ ; NEON-NEXT: fmov x0, d0
181
+ ; NEON-NEXT: ret
182
+ ;
183
+ ; DOT-LABEL: popcount1x128:
184
+ ; DOT: // %bb.0: // %Entry
185
+ ; DOT-NEXT: fmov d1, x0
186
+ ; DOT-NEXT: movi v0.2d, #0000000000000000
187
+ ; DOT-NEXT: mov v1.d[1], x1
188
+ ; DOT-NEXT: cnt v1.16b, v1.16b
189
+ ; DOT-NEXT: uaddlv h1, v1.16b
190
+ ; DOT-NEXT: mov v0.s[0], v1.s[0]
191
+ ; DOT-NEXT: mov x1, v0.d[1]
192
+ ; DOT-NEXT: fmov x0, d0
193
+ ; DOT-NEXT: ret
194
+ ;
195
+ ; SVE-LABEL: popcount1x128:
196
+ ; SVE: // %bb.0: // %Entry
197
+ ; SVE-NEXT: fmov d1, x0
198
+ ; SVE-NEXT: movi v0.2d, #0000000000000000
199
+ ; SVE-NEXT: mov v1.d[1], x1
200
+ ; SVE-NEXT: cnt v1.16b, v1.16b
201
+ ; SVE-NEXT: uaddlv h1, v1.16b
202
+ ; SVE-NEXT: mov v0.s[0], v1.s[0]
203
+ ; SVE-NEXT: mov x1, v0.d[1]
204
+ ; SVE-NEXT: fmov x0, d0
205
+ ; SVE-NEXT: ret
86
206
Entry:
87
207
%1 = tail call <1 x i128 > @llvm.ctpop.v1.i128 (<1 x i128 > %0 )
88
208
ret <1 x i128 > %1
89
209
}
90
210
91
211
declare <1 x i128 > @llvm.ctpop.v1.i128 (<1 x i128 >)
212
+
213
+ define <2 x i64 > @popcount2x64 (<2 x i64 > %0 ) {
214
+ ; CHECK-LABEL: popcount2x64:
215
+ ; CHECK: // %bb.0: // %Entry
216
+ ; CHECK-NEXT: cnt v0.16b, v0.16b
217
+ ; CHECK-NEXT: uaddlp v0.8h, v0.16b
218
+ ; CHECK-NEXT: uaddlp v0.4s, v0.8h
219
+ ; CHECK-NEXT: uaddlp v0.2d, v0.4s
220
+ ; CHECK-NEXT: ret
221
+ ;
222
+ ; NEON-LABEL: popcount2x64:
223
+ ; NEON: // %bb.0: // %Entry
224
+ ; NEON-NEXT: cnt v0.16b, v0.16b
225
+ ; NEON-NEXT: uaddlp v0.8h, v0.16b
226
+ ; NEON-NEXT: uaddlp v0.4s, v0.8h
227
+ ; NEON-NEXT: uaddlp v0.2d, v0.4s
228
+ ; NEON-NEXT: ret
229
+ ;
230
+ ; DOT-LABEL: popcount2x64:
231
+ ; DOT: // %bb.0: // %Entry
232
+ ; DOT-NEXT: movi v1.16b, #1
233
+ ; DOT-NEXT: cnt v0.16b, v0.16b
234
+ ; DOT-NEXT: movi v2.2d, #0000000000000000
235
+ ; DOT-NEXT: udot v2.4s, v1.16b, v0.16b
236
+ ; DOT-NEXT: uaddlp v0.2d, v2.4s
237
+ ; DOT-NEXT: ret
238
+ ;
239
+ ; SVE-LABEL: popcount2x64:
240
+ ; SVE: // %bb.0: // %Entry
241
+ ; SVE-NEXT: cnt v0.16b, v0.16b
242
+ ; SVE-NEXT: uaddlp v0.8h, v0.16b
243
+ ; SVE-NEXT: uaddlp v0.4s, v0.8h
244
+ ; SVE-NEXT: uaddlp v0.2d, v0.4s
245
+ ; SVE-NEXT: ret
246
+ Entry:
247
+ %1 = tail call <2 x i64 > @llvm.ctpop.v2.i64 (<2 x i64 > %0 )
248
+ ret <2 x i64 > %1
249
+ }
250
+
251
+ declare <2 x i64 > @llvm.ctpop.v2.i64 (<2 x i64 >)
252
+
253
+ define <4 x i32 > @popcount4x32 (<4 x i32 > %0 ) {
254
+ ; CHECK-LABEL: popcount4x32:
255
+ ; CHECK: // %bb.0: // %Entry
256
+ ; CHECK-NEXT: cnt v0.16b, v0.16b
257
+ ; CHECK-NEXT: uaddlp v0.8h, v0.16b
258
+ ; CHECK-NEXT: uaddlp v0.4s, v0.8h
259
+ ; CHECK-NEXT: ret
260
+ ;
261
+ ; NEON-LABEL: popcount4x32:
262
+ ; NEON: // %bb.0: // %Entry
263
+ ; NEON-NEXT: cnt v0.16b, v0.16b
264
+ ; NEON-NEXT: uaddlp v0.8h, v0.16b
265
+ ; NEON-NEXT: uaddlp v0.4s, v0.8h
266
+ ; NEON-NEXT: ret
267
+ ;
268
+ ; DOT-LABEL: popcount4x32:
269
+ ; DOT: // %bb.0: // %Entry
270
+ ; DOT-NEXT: movi v1.16b, #1
271
+ ; DOT-NEXT: cnt v2.16b, v0.16b
272
+ ; DOT-NEXT: movi v0.2d, #0000000000000000
273
+ ; DOT-NEXT: udot v0.4s, v1.16b, v2.16b
274
+ ; DOT-NEXT: ret
275
+ ;
276
+ ; SVE-LABEL: popcount4x32:
277
+ ; SVE: // %bb.0: // %Entry
278
+ ; SVE-NEXT: cnt v0.16b, v0.16b
279
+ ; SVE-NEXT: uaddlp v0.8h, v0.16b
280
+ ; SVE-NEXT: uaddlp v0.4s, v0.8h
281
+ ; SVE-NEXT: ret
282
+ Entry:
283
+ %1 = tail call <4 x i32 > @llvm.ctpop.v4.i32 (<4 x i32 > %0 )
284
+ ret <4 x i32 > %1
285
+ }
286
+
287
+ declare <4 x i32 > @llvm.ctpop.v4.i32 (<4 x i32 >)
288
+
289
+ define <2 x i32 > @popcount2x32 (<2 x i32 > %0 ) {
290
+ ; CHECK-LABEL: popcount2x32:
291
+ ; CHECK: // %bb.0: // %Entry
292
+ ; CHECK-NEXT: cnt v0.8b, v0.8b
293
+ ; CHECK-NEXT: uaddlp v0.4h, v0.8b
294
+ ; CHECK-NEXT: uaddlp v0.2s, v0.4h
295
+ ; CHECK-NEXT: ret
296
+ ;
297
+ ; NEON-LABEL: popcount2x32:
298
+ ; NEON: // %bb.0: // %Entry
299
+ ; NEON-NEXT: cnt v0.8b, v0.8b
300
+ ; NEON-NEXT: uaddlp v0.4h, v0.8b
301
+ ; NEON-NEXT: uaddlp v0.2s, v0.4h
302
+ ; NEON-NEXT: ret
303
+ ;
304
+ ; DOT-LABEL: popcount2x32:
305
+ ; DOT: // %bb.0: // %Entry
306
+ ; DOT-NEXT: movi v1.2d, #0000000000000000
307
+ ; DOT-NEXT: cnt v0.8b, v0.8b
308
+ ; DOT-NEXT: movi v2.8b, #1
309
+ ; DOT-NEXT: udot v1.2s, v2.8b, v0.8b
310
+ ; DOT-NEXT: fmov d0, d1
311
+ ; DOT-NEXT: ret
312
+ ;
313
+ ; SVE-LABEL: popcount2x32:
314
+ ; SVE: // %bb.0: // %Entry
315
+ ; SVE-NEXT: cnt v0.8b, v0.8b
316
+ ; SVE-NEXT: uaddlp v0.4h, v0.8b
317
+ ; SVE-NEXT: uaddlp v0.2s, v0.4h
318
+ ; SVE-NEXT: ret
319
+ Entry:
320
+ %1 = tail call <2 x i32 > @llvm.ctpop.v2.i32 (<2 x i32 > %0 )
321
+ ret <2 x i32 > %1
322
+ }
323
+
324
+ declare <2 x i32 > @llvm.ctpop.v2.i32 (<2 x i32 >)
325
+
326
+ define <8 x i16 > @popcount8x16 (<8 x i16 > %0 ) {
327
+ ; CHECK-LABEL: popcount8x16:
328
+ ; CHECK: // %bb.0: // %Entry
329
+ ; CHECK-NEXT: cnt v0.16b, v0.16b
330
+ ; CHECK-NEXT: uaddlp v0.8h, v0.16b
331
+ ; CHECK-NEXT: ret
332
+ ;
333
+ ; NEON-LABEL: popcount8x16:
334
+ ; NEON: // %bb.0: // %Entry
335
+ ; NEON-NEXT: cnt v0.16b, v0.16b
336
+ ; NEON-NEXT: uaddlp v0.8h, v0.16b
337
+ ; NEON-NEXT: ret
338
+ ;
339
+ ; DOT-LABEL: popcount8x16:
340
+ ; DOT: // %bb.0: // %Entry
341
+ ; DOT-NEXT: cnt v0.16b, v0.16b
342
+ ; DOT-NEXT: uaddlp v0.8h, v0.16b
343
+ ; DOT-NEXT: ret
344
+ ;
345
+ ; SVE-LABEL: popcount8x16:
346
+ ; SVE: // %bb.0: // %Entry
347
+ ; SVE-NEXT: cnt v0.16b, v0.16b
348
+ ; SVE-NEXT: uaddlp v0.8h, v0.16b
349
+ ; SVE-NEXT: ret
350
+ Entry:
351
+ %1 = tail call <8 x i16 > @llvm.ctpop.v8.i16 (<8 x i16 > %0 )
352
+ ret <8 x i16 > %1
353
+ }
354
+
355
+ declare <8 x i16 > @llvm.ctpop.v8.i16 (<8 x i16 >)
356
+
357
+ define <4 x i16 > @popcount4x16 (<4 x i16 > %0 ) {
358
+ ; CHECK-LABEL: popcount4x16:
359
+ ; CHECK: // %bb.0: // %Entry
360
+ ; CHECK-NEXT: cnt v0.8b, v0.8b
361
+ ; CHECK-NEXT: uaddlp v0.4h, v0.8b
362
+ ; CHECK-NEXT: ret
363
+ ;
364
+ ; NEON-LABEL: popcount4x16:
365
+ ; NEON: // %bb.0: // %Entry
366
+ ; NEON-NEXT: cnt v0.8b, v0.8b
367
+ ; NEON-NEXT: uaddlp v0.4h, v0.8b
368
+ ; NEON-NEXT: ret
369
+ ;
370
+ ; DOT-LABEL: popcount4x16:
371
+ ; DOT: // %bb.0: // %Entry
372
+ ; DOT-NEXT: cnt v0.8b, v0.8b
373
+ ; DOT-NEXT: uaddlp v0.4h, v0.8b
374
+ ; DOT-NEXT: ret
375
+ ;
376
+ ; SVE-LABEL: popcount4x16:
377
+ ; SVE: // %bb.0: // %Entry
378
+ ; SVE-NEXT: cnt v0.8b, v0.8b
379
+ ; SVE-NEXT: uaddlp v0.4h, v0.8b
380
+ ; SVE-NEXT: ret
381
+ Entry:
382
+ %1 = tail call <4 x i16 > @llvm.ctpop.v4.i16 (<4 x i16 > %0 )
383
+ ret <4 x i16 > %1
384
+ }
385
+
386
+ declare <4 x i16 > @llvm.ctpop.v4.i16 (<4 x i16 >)
0 commit comments