3
3
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
4
4
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=CHECK,DOT
5
5
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
6
+ ; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISEL
7
+ ; RUN: llc < %s -O0 -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISELO0
8
+ ; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=GISEL,NEON-GISEL
9
+ ; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=GISEL,DOT-GISEL
10
+ ; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=GISEL,SVE-GISEL
11
+
6
12
7
13
; Function Attrs: nobuiltin nounwind readonly
8
14
define i8 @popcount128 (ptr nocapture nonnull readonly %0 ) {
@@ -25,6 +31,24 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) {
25
31
; CHECK-NEXT: uaddlv h0, v0.16b
26
32
; CHECK-NEXT: fmov w0, s0
27
33
; CHECK-NEXT: ret
34
+ ;
35
+ ; GISEL-LABEL: popcount128:
36
+ ; GISEL: // %bb.0: // %Entry
37
+ ; GISEL-NEXT: ldr q0, [x0]
38
+ ; GISEL-NEXT: cnt v0.16b, v0.16b
39
+ ; GISEL-NEXT: uaddlv h0, v0.16b
40
+ ; GISEL-NEXT: fmov w0, s0
41
+ ; GISEL-NEXT: ret
42
+ ;
43
+ ; GISELO0-LABEL: popcount128:
44
+ ; GISELO0: // %bb.0: // %Entry
45
+ ; GISELO0-NEXT: ldr q0, [x0]
46
+ ; GISELO0-NEXT: cnt v0.16b, v0.16b
47
+ ; GISELO0-NEXT: uaddlv h0, v0.16b
48
+ ; GISELO0-NEXT: // kill: def $q0 killed $h0
49
+ ; GISELO0-NEXT: // kill: def $s0 killed $s0 killed $q0
50
+ ; GISELO0-NEXT: fmov w0, s0
51
+ ; GISELO0-NEXT: ret
28
52
Entry:
29
53
%1 = load i128 , ptr %0 , align 16
30
54
%2 = tail call i128 @llvm.ctpop.i128 (i128 %1 )
@@ -86,6 +110,57 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
86
110
; CHECK-NEXT: fmov w9, s1
87
111
; CHECK-NEXT: add w0, w9, w8
88
112
; CHECK-NEXT: ret
113
+ ;
114
+ ; GISEL-LABEL: popcount256:
115
+ ; GISEL: // %bb.0: // %Entry
116
+ ; GISEL-NEXT: ldp x8, x9, [x0, #16]
117
+ ; GISEL-NEXT: mov v0.d[0], x8
118
+ ; GISEL-NEXT: ldp x8, x10, [x0]
119
+ ; GISEL-NEXT: mov v1.d[0], x8
120
+ ; GISEL-NEXT: mov v0.d[1], x9
121
+ ; GISEL-NEXT: mov v1.d[1], x10
122
+ ; GISEL-NEXT: cnt v0.16b, v0.16b
123
+ ; GISEL-NEXT: cnt v1.16b, v1.16b
124
+ ; GISEL-NEXT: uaddlv h0, v0.16b
125
+ ; GISEL-NEXT: uaddlv h1, v1.16b
126
+ ; GISEL-NEXT: mov w8, v0.s[0]
127
+ ; GISEL-NEXT: fmov w9, s1
128
+ ; GISEL-NEXT: add x0, x8, w9, uxtw
129
+ ; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0
130
+ ; GISEL-NEXT: ret
131
+ ;
132
+ ; GISELO0-LABEL: popcount256:
133
+ ; GISELO0: // %bb.0: // %Entry
134
+ ; GISELO0-NEXT: ldr x11, [x0]
135
+ ; GISELO0-NEXT: ldr x10, [x0, #8]
136
+ ; GISELO0-NEXT: ldr x9, [x0, #16]
137
+ ; GISELO0-NEXT: ldr x8, [x0, #24]
138
+ ; GISELO0-NEXT: // implicit-def: $q1
139
+ ; GISELO0-NEXT: mov v1.d[0], x11
140
+ ; GISELO0-NEXT: mov v1.d[1], x10
141
+ ; GISELO0-NEXT: // implicit-def: $q0
142
+ ; GISELO0-NEXT: mov v0.d[0], x9
143
+ ; GISELO0-NEXT: mov v0.d[1], x8
144
+ ; GISELO0-NEXT: cnt v1.16b, v1.16b
145
+ ; GISELO0-NEXT: uaddlv h1, v1.16b
146
+ ; GISELO0-NEXT: // kill: def $q1 killed $h1
147
+ ; GISELO0-NEXT: // kill: def $s1 killed $s1 killed $q1
148
+ ; GISELO0-NEXT: fmov w0, s1
149
+ ; GISELO0-NEXT: mov w10, wzr
150
+ ; GISELO0-NEXT: mov w9, w0
151
+ ; GISELO0-NEXT: mov w8, w10
152
+ ; GISELO0-NEXT: bfi x9, x8, #32, #32
153
+ ; GISELO0-NEXT: cnt v0.16b, v0.16b
154
+ ; GISELO0-NEXT: uaddlv h0, v0.16b
155
+ ; GISELO0-NEXT: // kill: def $q0 killed $h0
156
+ ; GISELO0-NEXT: // kill: def $s0 killed $s0 killed $q0
157
+ ; GISELO0-NEXT: fmov w0, s0
158
+ ; GISELO0-NEXT: mov w8, w0
159
+ ; GISELO0-NEXT: // kill: def $x10 killed $w10
160
+ ; GISELO0-NEXT: bfi x8, x10, #32, #32
161
+ ; GISELO0-NEXT: adds x8, x8, x9
162
+ ; GISELO0-NEXT: mov w0, w8
163
+ ; GISELO0-NEXT: ret
89
164
Entry:
90
165
%1 = load i256 , ptr %0 , align 16
91
166
%2 = tail call i256 @llvm.ctpop.i256 (i256 %1 )
@@ -125,6 +200,33 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
125
200
; CHECK-NEXT: mov x1, v0.d[1]
126
201
; CHECK-NEXT: fmov x0, d0
127
202
; CHECK-NEXT: ret
203
+ ;
204
+ ; GISEL-LABEL: popcount1x128:
205
+ ; GISEL: // %bb.0: // %Entry
206
+ ; GISEL-NEXT: mov v0.d[0], x0
207
+ ; GISEL-NEXT: mov v0.d[1], x1
208
+ ; GISEL-NEXT: mov x1, xzr
209
+ ; GISEL-NEXT: cnt v0.16b, v0.16b
210
+ ; GISEL-NEXT: uaddlv h0, v0.16b
211
+ ; GISEL-NEXT: mov w0, v0.s[0]
212
+ ; GISEL-NEXT: ret
213
+ ;
214
+ ; GISELO0-LABEL: popcount1x128:
215
+ ; GISELO0: // %bb.0: // %Entry
216
+ ; GISELO0-NEXT: // implicit-def: $q0
217
+ ; GISELO0-NEXT: mov v0.d[0], x0
218
+ ; GISELO0-NEXT: mov v0.d[1], x1
219
+ ; GISELO0-NEXT: cnt v0.16b, v0.16b
220
+ ; GISELO0-NEXT: uaddlv h0, v0.16b
221
+ ; GISELO0-NEXT: // kill: def $q0 killed $h0
222
+ ; GISELO0-NEXT: mov x1, xzr
223
+ ; GISELO0-NEXT: // kill: def $s0 killed $s0 killed $q0
224
+ ; GISELO0-NEXT: fmov w0, s0
225
+ ; GISELO0-NEXT: mov w8, wzr
226
+ ; GISELO0-NEXT: // kill: def $x0 killed $w0
227
+ ; GISELO0-NEXT: // kill: def $x8 killed $w8
228
+ ; GISELO0-NEXT: bfi x0, x8, #32, #32
229
+ ; GISELO0-NEXT: ret
128
230
Entry:
129
231
%1 = tail call <1 x i128 > @llvm.ctpop.v1i128 (<1 x i128 > %0 )
130
232
ret <1 x i128 > %1
@@ -165,6 +267,39 @@ define <2 x i64> @popcount2x64(<2 x i64> %0) {
165
267
; SVE-NEXT: uaddlp v0.4s, v0.8h
166
268
; SVE-NEXT: uaddlp v0.2d, v0.4s
167
269
; SVE-NEXT: ret
270
+ ;
271
+ ; GISELO0-LABEL: popcount2x64:
272
+ ; GISELO0: // %bb.0: // %Entry
273
+ ; GISELO0-NEXT: cnt v0.16b, v0.16b
274
+ ; GISELO0-NEXT: uaddlp v0.8h, v0.16b
275
+ ; GISELO0-NEXT: uaddlp v0.4s, v0.8h
276
+ ; GISELO0-NEXT: uaddlp v0.2d, v0.4s
277
+ ; GISELO0-NEXT: ret
278
+ ;
279
+ ; NEON-GISEL-LABEL: popcount2x64:
280
+ ; NEON-GISEL: // %bb.0: // %Entry
281
+ ; NEON-GISEL-NEXT: cnt v0.16b, v0.16b
282
+ ; NEON-GISEL-NEXT: uaddlp v0.8h, v0.16b
283
+ ; NEON-GISEL-NEXT: uaddlp v0.4s, v0.8h
284
+ ; NEON-GISEL-NEXT: uaddlp v0.2d, v0.4s
285
+ ; NEON-GISEL-NEXT: ret
286
+ ;
287
+ ; DOT-GISEL-LABEL: popcount2x64:
288
+ ; DOT-GISEL: // %bb.0: // %Entry
289
+ ; DOT-GISEL-NEXT: movi v1.2d, #0000000000000000
290
+ ; DOT-GISEL-NEXT: cnt v0.16b, v0.16b
291
+ ; DOT-GISEL-NEXT: movi v2.16b, #1
292
+ ; DOT-GISEL-NEXT: udot v1.4s, v2.16b, v0.16b
293
+ ; DOT-GISEL-NEXT: uaddlp v0.2d, v1.4s
294
+ ; DOT-GISEL-NEXT: ret
295
+ ;
296
+ ; SVE-GISEL-LABEL: popcount2x64:
297
+ ; SVE-GISEL: // %bb.0: // %Entry
298
+ ; SVE-GISEL-NEXT: cnt v0.16b, v0.16b
299
+ ; SVE-GISEL-NEXT: uaddlp v0.8h, v0.16b
300
+ ; SVE-GISEL-NEXT: uaddlp v0.4s, v0.8h
301
+ ; SVE-GISEL-NEXT: uaddlp v0.2d, v0.4s
302
+ ; SVE-GISEL-NEXT: ret
168
303
Entry:
169
304
%1 = tail call <2 x i64 > @llvm.ctpop.v2i64 (<2 x i64 > %0 )
170
305
ret <2 x i64 > %1
@@ -192,6 +327,26 @@ define <1 x i64> @popcount1x64(<1 x i64> %0) {
192
327
; CHECK-NEXT: uaddlp v0.2s, v0.4h
193
328
; CHECK-NEXT: uaddlp v0.1d, v0.2s
194
329
; CHECK-NEXT: ret
330
+ ;
331
+ ; GISEL-LABEL: popcount1x64:
332
+ ; GISEL: // %bb.0: // %Entry
333
+ ; GISEL-NEXT: cnt v0.8b, v0.8b
334
+ ; GISEL-NEXT: uaddlv h0, v0.8b
335
+ ; GISEL-NEXT: mov w8, v0.s[0]
336
+ ; GISEL-NEXT: fmov d0, x8
337
+ ; GISEL-NEXT: ret
338
+ ;
339
+ ; GISELO0-LABEL: popcount1x64:
340
+ ; GISELO0: // %bb.0: // %Entry
341
+ ; GISELO0-NEXT: fmov x0, d0
342
+ ; GISELO0-NEXT: fmov d0, x0
343
+ ; GISELO0-NEXT: cnt v0.8b, v0.8b
344
+ ; GISELO0-NEXT: uaddlv h0, v0.8b
345
+ ; GISELO0-NEXT: // kill: def $q0 killed $h0
346
+ ; GISELO0-NEXT: mov w8, v0.s[0]
347
+ ; GISELO0-NEXT: // kill: def $x8 killed $w8
348
+ ; GISELO0-NEXT: fmov d0, x8
349
+ ; GISELO0-NEXT: ret
195
350
Entry:
196
351
%1 = tail call <1 x i64 > @llvm.ctpop.v1i64 (<1 x i64 > %0 )
197
352
ret <1 x i64 > %1
@@ -228,6 +383,36 @@ define <4 x i32> @popcount4x32(<4 x i32> %0) {
228
383
; SVE-NEXT: uaddlp v0.8h, v0.16b
229
384
; SVE-NEXT: uaddlp v0.4s, v0.8h
230
385
; SVE-NEXT: ret
386
+ ;
387
+ ; GISELO0-LABEL: popcount4x32:
388
+ ; GISELO0: // %bb.0: // %Entry
389
+ ; GISELO0-NEXT: cnt v0.16b, v0.16b
390
+ ; GISELO0-NEXT: uaddlp v0.8h, v0.16b
391
+ ; GISELO0-NEXT: uaddlp v0.4s, v0.8h
392
+ ; GISELO0-NEXT: ret
393
+ ;
394
+ ; NEON-GISEL-LABEL: popcount4x32:
395
+ ; NEON-GISEL: // %bb.0: // %Entry
396
+ ; NEON-GISEL-NEXT: cnt v0.16b, v0.16b
397
+ ; NEON-GISEL-NEXT: uaddlp v0.8h, v0.16b
398
+ ; NEON-GISEL-NEXT: uaddlp v0.4s, v0.8h
399
+ ; NEON-GISEL-NEXT: ret
400
+ ;
401
+ ; DOT-GISEL-LABEL: popcount4x32:
402
+ ; DOT-GISEL: // %bb.0: // %Entry
403
+ ; DOT-GISEL-NEXT: movi v1.2d, #0000000000000000
404
+ ; DOT-GISEL-NEXT: cnt v0.16b, v0.16b
405
+ ; DOT-GISEL-NEXT: movi v2.16b, #1
406
+ ; DOT-GISEL-NEXT: udot v1.4s, v2.16b, v0.16b
407
+ ; DOT-GISEL-NEXT: mov v0.16b, v1.16b
408
+ ; DOT-GISEL-NEXT: ret
409
+ ;
410
+ ; SVE-GISEL-LABEL: popcount4x32:
411
+ ; SVE-GISEL: // %bb.0: // %Entry
412
+ ; SVE-GISEL-NEXT: cnt v0.16b, v0.16b
413
+ ; SVE-GISEL-NEXT: uaddlp v0.8h, v0.16b
414
+ ; SVE-GISEL-NEXT: uaddlp v0.4s, v0.8h
415
+ ; SVE-GISEL-NEXT: ret
231
416
Entry:
232
417
%1 = tail call <4 x i32 > @llvm.ctpop.v4i32 (<4 x i32 > %0 )
233
418
ret <4 x i32 > %1
@@ -265,6 +450,36 @@ define <2 x i32> @popcount2x32(<2 x i32> %0) {
265
450
; SVE-NEXT: uaddlp v0.4h, v0.8b
266
451
; SVE-NEXT: uaddlp v0.2s, v0.4h
267
452
; SVE-NEXT: ret
453
+ ;
454
+ ; GISELO0-LABEL: popcount2x32:
455
+ ; GISELO0: // %bb.0: // %Entry
456
+ ; GISELO0-NEXT: cnt v0.8b, v0.8b
457
+ ; GISELO0-NEXT: uaddlp v0.4h, v0.8b
458
+ ; GISELO0-NEXT: uaddlp v0.2s, v0.4h
459
+ ; GISELO0-NEXT: ret
460
+ ;
461
+ ; NEON-GISEL-LABEL: popcount2x32:
462
+ ; NEON-GISEL: // %bb.0: // %Entry
463
+ ; NEON-GISEL-NEXT: cnt v0.8b, v0.8b
464
+ ; NEON-GISEL-NEXT: uaddlp v0.4h, v0.8b
465
+ ; NEON-GISEL-NEXT: uaddlp v0.2s, v0.4h
466
+ ; NEON-GISEL-NEXT: ret
467
+ ;
468
+ ; DOT-GISEL-LABEL: popcount2x32:
469
+ ; DOT-GISEL: // %bb.0: // %Entry
470
+ ; DOT-GISEL-NEXT: movi v1.2d, #0000000000000000
471
+ ; DOT-GISEL-NEXT: cnt v0.8b, v0.8b
472
+ ; DOT-GISEL-NEXT: movi v2.8b, #1
473
+ ; DOT-GISEL-NEXT: udot v1.2s, v2.8b, v0.8b
474
+ ; DOT-GISEL-NEXT: fmov d0, d1
475
+ ; DOT-GISEL-NEXT: ret
476
+ ;
477
+ ; SVE-GISEL-LABEL: popcount2x32:
478
+ ; SVE-GISEL: // %bb.0: // %Entry
479
+ ; SVE-GISEL-NEXT: cnt v0.8b, v0.8b
480
+ ; SVE-GISEL-NEXT: uaddlp v0.4h, v0.8b
481
+ ; SVE-GISEL-NEXT: uaddlp v0.2s, v0.4h
482
+ ; SVE-GISEL-NEXT: ret
268
483
Entry:
269
484
%1 = tail call <2 x i32 > @llvm.ctpop.v2i32 (<2 x i32 > %0 )
270
485
ret <2 x i32 > %1
@@ -284,6 +499,18 @@ define <8 x i16> @popcount8x16(<8 x i16> %0) {
284
499
; CHECK-NEXT: cnt v0.16b, v0.16b
285
500
; CHECK-NEXT: uaddlp v0.8h, v0.16b
286
501
; CHECK-NEXT: ret
502
+ ;
503
+ ; GISEL-LABEL: popcount8x16:
504
+ ; GISEL: // %bb.0: // %Entry
505
+ ; GISEL-NEXT: cnt v0.16b, v0.16b
506
+ ; GISEL-NEXT: uaddlp v0.8h, v0.16b
507
+ ; GISEL-NEXT: ret
508
+ ;
509
+ ; GISELO0-LABEL: popcount8x16:
510
+ ; GISELO0: // %bb.0: // %Entry
511
+ ; GISELO0-NEXT: cnt v0.16b, v0.16b
512
+ ; GISELO0-NEXT: uaddlp v0.8h, v0.16b
513
+ ; GISELO0-NEXT: ret
287
514
Entry:
288
515
%1 = tail call <8 x i16 > @llvm.ctpop.v8i16 (<8 x i16 > %0 )
289
516
ret <8 x i16 > %1
@@ -303,6 +530,18 @@ define <4 x i16> @popcount4x16(<4 x i16> %0) {
303
530
; CHECK-NEXT: cnt v0.8b, v0.8b
304
531
; CHECK-NEXT: uaddlp v0.4h, v0.8b
305
532
; CHECK-NEXT: ret
533
+ ;
534
+ ; GISEL-LABEL: popcount4x16:
535
+ ; GISEL: // %bb.0: // %Entry
536
+ ; GISEL-NEXT: cnt v0.8b, v0.8b
537
+ ; GISEL-NEXT: uaddlp v0.4h, v0.8b
538
+ ; GISEL-NEXT: ret
539
+ ;
540
+ ; GISELO0-LABEL: popcount4x16:
541
+ ; GISELO0: // %bb.0: // %Entry
542
+ ; GISELO0-NEXT: cnt v0.8b, v0.8b
543
+ ; GISELO0-NEXT: uaddlp v0.4h, v0.8b
544
+ ; GISELO0-NEXT: ret
306
545
Entry:
307
546
%1 = tail call <4 x i16 > @llvm.ctpop.v4i16 (<4 x i16 > %0 )
308
547
ret <4 x i16 > %1
0 commit comments