1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s
2
+ ; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=CHECKO0
3
+ ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
4
+ ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=CHECK,DOT
5
+ ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
3
6
4
7
; Function Attrs: nobuiltin nounwind readonly
5
8
define i8 @popcount128 (ptr nocapture nonnull readonly %0 ) {
9
+ ; CHECKO0-LABEL: popcount128:
10
+ ; CHECKO0: // %bb.0: // %Entry
11
+ ; CHECKO0-NEXT: ldr q0, [x0]
12
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
13
+ ; CHECKO0-NEXT: uaddlv h0, v0.16b
14
+ ; CHECKO0-NEXT: // kill: def $q0 killed $h0
15
+ ; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
16
+ ; CHECKO0-NEXT: fmov w0, s0
17
+ ; CHECKO0-NEXT: ret
18
+ ;
6
19
; CHECK-LABEL: popcount128:
7
20
; CHECK: // %bb.0: // %Entry
8
- ; CHECK-NEXT: ldr q0, [x0]
21
+ ; CHECK-NEXT: ldr d0, [x0]
22
+ ; CHECK-NEXT: add x8, x0, #8
23
+ ; CHECK-NEXT: ld1 { v0.d }[1], [x8]
9
24
; CHECK-NEXT: cnt v0.16b, v0.16b
10
25
; CHECK-NEXT: uaddlv h0, v0.16b
11
- ; CHECK-NEXT: // kill: def $q0 killed $h0
12
- ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
13
26
; CHECK-NEXT: fmov w0, s0
14
27
; CHECK-NEXT: ret
15
28
Entry:
@@ -24,37 +37,54 @@ declare i128 @llvm.ctpop.i128(i128)
24
37
25
38
; Function Attrs: nobuiltin nounwind readonly
26
39
define i16 @popcount256 (ptr nocapture nonnull readonly %0 ) {
40
+ ; CHECKO0-LABEL: popcount256:
41
+ ; CHECKO0: // %bb.0: // %Entry
42
+ ; CHECKO0-NEXT: ldr x11, [x0]
43
+ ; CHECKO0-NEXT: ldr x10, [x0, #8]
44
+ ; CHECKO0-NEXT: ldr x9, [x0, #16]
45
+ ; CHECKO0-NEXT: ldr x8, [x0, #24]
46
+ ; CHECKO0-NEXT: // implicit-def: $q1
47
+ ; CHECKO0-NEXT: mov v1.d[0], x11
48
+ ; CHECKO0-NEXT: mov v1.d[1], x10
49
+ ; CHECKO0-NEXT: // implicit-def: $q0
50
+ ; CHECKO0-NEXT: mov v0.d[0], x9
51
+ ; CHECKO0-NEXT: mov v0.d[1], x8
52
+ ; CHECKO0-NEXT: cnt v1.16b, v1.16b
53
+ ; CHECKO0-NEXT: uaddlv h1, v1.16b
54
+ ; CHECKO0-NEXT: // kill: def $q1 killed $h1
55
+ ; CHECKO0-NEXT: // kill: def $s1 killed $s1 killed $q1
56
+ ; CHECKO0-NEXT: fmov w0, s1
57
+ ; CHECKO0-NEXT: mov w10, wzr
58
+ ; CHECKO0-NEXT: mov w9, w0
59
+ ; CHECKO0-NEXT: mov w8, w10
60
+ ; CHECKO0-NEXT: bfi x9, x8, #32, #32
61
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
62
+ ; CHECKO0-NEXT: uaddlv h0, v0.16b
63
+ ; CHECKO0-NEXT: // kill: def $q0 killed $h0
64
+ ; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
65
+ ; CHECKO0-NEXT: fmov w0, s0
66
+ ; CHECKO0-NEXT: mov w8, w0
67
+ ; CHECKO0-NEXT: // kill: def $x10 killed $w10
68
+ ; CHECKO0-NEXT: bfi x8, x10, #32, #32
69
+ ; CHECKO0-NEXT: adds x8, x8, x9
70
+ ; CHECKO0-NEXT: mov w0, w8
71
+ ; CHECKO0-NEXT: ret
72
+ ;
27
73
; CHECK-LABEL: popcount256:
28
74
; CHECK: // %bb.0: // %Entry
29
- ; CHECK-NEXT: ldr x11, [x0]
30
- ; CHECK-NEXT: ldr x10, [x0, #8]
31
- ; CHECK-NEXT: ldr x9, [x0, #16]
32
- ; CHECK-NEXT: ldr x8, [x0, #24]
33
- ; CHECK-NEXT: // implicit-def: $q1
34
- ; CHECK-NEXT: mov v1.d[0], x11
35
- ; CHECK-NEXT: mov v1.d[1], x10
36
- ; CHECK-NEXT: // implicit-def: $q0
37
- ; CHECK-NEXT: mov v0.d[0], x9
38
- ; CHECK-NEXT: mov v0.d[1], x8
39
- ; CHECK-NEXT: cnt v1.16b, v1.16b
40
- ; CHECK-NEXT: uaddlv h1, v1.16b
41
- ; CHECK-NEXT: // kill: def $q1 killed $h1
42
- ; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1
43
- ; CHECK-NEXT: fmov w0, s1
44
- ; CHECK-NEXT: mov w10, wzr
45
- ; CHECK-NEXT: mov w9, w0
46
- ; CHECK-NEXT: mov w8, w10
47
- ; CHECK-NEXT: bfi x9, x8, #32, #32
75
+ ; CHECK-NEXT: ldr d0, [x0, #16]
76
+ ; CHECK-NEXT: ldr d1, [x0]
77
+ ; CHECK-NEXT: add x8, x0, #8
78
+ ; CHECK-NEXT: add x9, x0, #24
79
+ ; CHECK-NEXT: ld1 { v0.d }[1], [x9]
80
+ ; CHECK-NEXT: ld1 { v1.d }[1], [x8]
48
81
; CHECK-NEXT: cnt v0.16b, v0.16b
82
+ ; CHECK-NEXT: cnt v1.16b, v1.16b
49
83
; CHECK-NEXT: uaddlv h0, v0.16b
50
- ; CHECK-NEXT: // kill: def $q0 killed $h0
51
- ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
52
- ; CHECK-NEXT: fmov w0, s0
53
- ; CHECK-NEXT: mov w8, w0
54
- ; CHECK-NEXT: // kill: def $x10 killed $w10
55
- ; CHECK-NEXT: bfi x8, x10, #32, #32
56
- ; CHECK-NEXT: adds x8, x8, x9
57
- ; CHECK-NEXT: mov w0, w8
84
+ ; CHECK-NEXT: uaddlv h1, v1.16b
85
+ ; CHECK-NEXT: fmov w8, s0
86
+ ; CHECK-NEXT: fmov w9, s1
87
+ ; CHECK-NEXT: add w0, w9, w8
58
88
; CHECK-NEXT: ret
59
89
Entry:
60
90
%1 = load i256 , ptr %0 , align 16
@@ -67,25 +97,215 @@ Entry:
67
97
declare i256 @llvm.ctpop.i256 (i256 )
68
98
69
99
define <1 x i128 > @popcount1x128 (<1 x i128 > %0 ) {
100
+ ; CHECKO0-LABEL: popcount1x128:
101
+ ; CHECKO0: // %bb.0: // %Entry
102
+ ; CHECKO0-NEXT: // implicit-def: $q0
103
+ ; CHECKO0-NEXT: mov v0.d[0], x0
104
+ ; CHECKO0-NEXT: mov v0.d[1], x1
105
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
106
+ ; CHECKO0-NEXT: uaddlv h0, v0.16b
107
+ ; CHECKO0-NEXT: // kill: def $q0 killed $h0
108
+ ; CHECKO0-NEXT: mov x1, xzr
109
+ ; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
110
+ ; CHECKO0-NEXT: fmov w0, s0
111
+ ; CHECKO0-NEXT: mov w8, wzr
112
+ ; CHECKO0-NEXT: // kill: def $x0 killed $w0
113
+ ; CHECKO0-NEXT: // kill: def $x8 killed $w8
114
+ ; CHECKO0-NEXT: bfi x0, x8, #32, #32
115
+ ; CHECKO0-NEXT: ret
116
+ ;
70
117
; CHECK-LABEL: popcount1x128:
71
118
; CHECK: // %bb.0: // %Entry
72
- ; CHECK-NEXT: // implicit-def: $q0
73
- ; CHECK-NEXT: mov v0.d[0], x0
74
- ; CHECK-NEXT: mov v0.d[1], x1
75
- ; CHECK-NEXT: cnt v0.16b, v0.16b
76
- ; CHECK-NEXT: uaddlv h0, v0.16b
77
- ; CHECK-NEXT: // kill: def $q0 killed $h0
78
- ; CHECK-NEXT: mov x1, xzr
79
- ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
80
- ; CHECK-NEXT: fmov w0, s0
81
- ; CHECK-NEXT: mov w8, wzr
82
- ; CHECK-NEXT: // kill: def $x0 killed $w0
83
- ; CHECK-NEXT: // kill: def $x8 killed $w8
84
- ; CHECK-NEXT: bfi x0, x8, #32, #32
119
+ ; CHECK-NEXT: fmov d1, x0
120
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
121
+ ; CHECK-NEXT: mov v1.d[1], x1
122
+ ; CHECK-NEXT: cnt v1.16b, v1.16b
123
+ ; CHECK-NEXT: uaddlv h1, v1.16b
124
+ ; CHECK-NEXT: mov v0.s[0], v1.s[0]
125
+ ; CHECK-NEXT: mov x1, v0.d[1]
126
+ ; CHECK-NEXT: fmov x0, d0
85
127
; CHECK-NEXT: ret
86
128
Entry:
87
- %1 = tail call <1 x i128 > @llvm.ctpop.v1.i128 (<1 x i128 > %0 )
129
+ %1 = tail call <1 x i128 > @llvm.ctpop.v1i128 (<1 x i128 > %0 )
88
130
ret <1 x i128 > %1
89
131
}
90
132
91
- declare <1 x i128 > @llvm.ctpop.v1.i128 (<1 x i128 >)
133
+ declare <1 x i128 > @llvm.ctpop.v1i128 (<1 x i128 >)
134
+
135
+ define <2 x i64 > @popcount2x64 (<2 x i64 > %0 ) {
136
+ ; CHECKO0-LABEL: popcount2x64:
137
+ ; CHECKO0: // %bb.0: // %Entry
138
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
139
+ ; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
140
+ ; CHECKO0-NEXT: uaddlp v0.4s, v0.8h
141
+ ; CHECKO0-NEXT: uaddlp v0.2d, v0.4s
142
+ ; CHECKO0-NEXT: ret
143
+ ;
144
+ ; NEON-LABEL: popcount2x64:
145
+ ; NEON: // %bb.0: // %Entry
146
+ ; NEON-NEXT: cnt v0.16b, v0.16b
147
+ ; NEON-NEXT: uaddlp v0.8h, v0.16b
148
+ ; NEON-NEXT: uaddlp v0.4s, v0.8h
149
+ ; NEON-NEXT: uaddlp v0.2d, v0.4s
150
+ ; NEON-NEXT: ret
151
+ ;
152
+ ; DOT-LABEL: popcount2x64:
153
+ ; DOT: // %bb.0: // %Entry
154
+ ; DOT-NEXT: movi v1.16b, #1
155
+ ; DOT-NEXT: cnt v0.16b, v0.16b
156
+ ; DOT-NEXT: movi v2.2d, #0000000000000000
157
+ ; DOT-NEXT: udot v2.4s, v1.16b, v0.16b
158
+ ; DOT-NEXT: uaddlp v0.2d, v2.4s
159
+ ; DOT-NEXT: ret
160
+ ;
161
+ ; SVE-LABEL: popcount2x64:
162
+ ; SVE: // %bb.0: // %Entry
163
+ ; SVE-NEXT: cnt v0.16b, v0.16b
164
+ ; SVE-NEXT: uaddlp v0.8h, v0.16b
165
+ ; SVE-NEXT: uaddlp v0.4s, v0.8h
166
+ ; SVE-NEXT: uaddlp v0.2d, v0.4s
167
+ ; SVE-NEXT: ret
168
+ Entry:
169
+ %1 = tail call <2 x i64 > @llvm.ctpop.v2i64 (<2 x i64 > %0 )
170
+ ret <2 x i64 > %1
171
+ }
172
+
173
+ declare <2 x i64 > @llvm.ctpop.v2i64 (<2 x i64 >)
174
+
175
+ define <1 x i64 > @popcount1x64 (<1 x i64 > %0 ) {
176
+ ; CHECKO0-LABEL: popcount1x64:
177
+ ; CHECKO0: // %bb.0: // %Entry
178
+ ; CHECKO0-NEXT: fmov x0, d0
179
+ ; CHECKO0-NEXT: fmov d0, x0
180
+ ; CHECKO0-NEXT: cnt v0.8b, v0.8b
181
+ ; CHECKO0-NEXT: uaddlv h0, v0.8b
182
+ ; CHECKO0-NEXT: // kill: def $q0 killed $h0
183
+ ; CHECKO0-NEXT: mov w8, v0.s[0]
184
+ ; CHECKO0-NEXT: // kill: def $x8 killed $w8
185
+ ; CHECKO0-NEXT: fmov d0, x8
186
+ ; CHECKO0-NEXT: ret
187
+ ;
188
+ ; CHECK-LABEL: popcount1x64:
189
+ ; CHECK: // %bb.0: // %Entry
190
+ ; CHECK-NEXT: cnt v0.8b, v0.8b
191
+ ; CHECK-NEXT: uaddlp v0.4h, v0.8b
192
+ ; CHECK-NEXT: uaddlp v0.2s, v0.4h
193
+ ; CHECK-NEXT: uaddlp v0.1d, v0.2s
194
+ ; CHECK-NEXT: ret
195
+ Entry:
196
+ %1 = tail call <1 x i64 > @llvm.ctpop.v1i64 (<1 x i64 > %0 )
197
+ ret <1 x i64 > %1
198
+ }
199
+
200
+ declare <1 x i64 > @llvm.ctpop.v1i64 (<1 x i64 >)
201
+
202
+ define <4 x i32 > @popcount4x32 (<4 x i32 > %0 ) {
203
+ ; CHECKO0-LABEL: popcount4x32:
204
+ ; CHECKO0: // %bb.0: // %Entry
205
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
206
+ ; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
207
+ ; CHECKO0-NEXT: uaddlp v0.4s, v0.8h
208
+ ; CHECKO0-NEXT: ret
209
+ ;
210
+ ; NEON-LABEL: popcount4x32:
211
+ ; NEON: // %bb.0: // %Entry
212
+ ; NEON-NEXT: cnt v0.16b, v0.16b
213
+ ; NEON-NEXT: uaddlp v0.8h, v0.16b
214
+ ; NEON-NEXT: uaddlp v0.4s, v0.8h
215
+ ; NEON-NEXT: ret
216
+ ;
217
+ ; DOT-LABEL: popcount4x32:
218
+ ; DOT: // %bb.0: // %Entry
219
+ ; DOT-NEXT: movi v1.16b, #1
220
+ ; DOT-NEXT: cnt v2.16b, v0.16b
221
+ ; DOT-NEXT: movi v0.2d, #0000000000000000
222
+ ; DOT-NEXT: udot v0.4s, v1.16b, v2.16b
223
+ ; DOT-NEXT: ret
224
+ ;
225
+ ; SVE-LABEL: popcount4x32:
226
+ ; SVE: // %bb.0: // %Entry
227
+ ; SVE-NEXT: cnt v0.16b, v0.16b
228
+ ; SVE-NEXT: uaddlp v0.8h, v0.16b
229
+ ; SVE-NEXT: uaddlp v0.4s, v0.8h
230
+ ; SVE-NEXT: ret
231
+ Entry:
232
+ %1 = tail call <4 x i32 > @llvm.ctpop.v4i32 (<4 x i32 > %0 )
233
+ ret <4 x i32 > %1
234
+ }
235
+
236
+ declare <4 x i32 > @llvm.ctpop.v4i32 (<4 x i32 >)
237
+
238
+ define <2 x i32 > @popcount2x32 (<2 x i32 > %0 ) {
239
+ ; CHECKO0-LABEL: popcount2x32:
240
+ ; CHECKO0: // %bb.0: // %Entry
241
+ ; CHECKO0-NEXT: cnt v0.8b, v0.8b
242
+ ; CHECKO0-NEXT: uaddlp v0.4h, v0.8b
243
+ ; CHECKO0-NEXT: uaddlp v0.2s, v0.4h
244
+ ; CHECKO0-NEXT: ret
245
+ ;
246
+ ; NEON-LABEL: popcount2x32:
247
+ ; NEON: // %bb.0: // %Entry
248
+ ; NEON-NEXT: cnt v0.8b, v0.8b
249
+ ; NEON-NEXT: uaddlp v0.4h, v0.8b
250
+ ; NEON-NEXT: uaddlp v0.2s, v0.4h
251
+ ; NEON-NEXT: ret
252
+ ;
253
+ ; DOT-LABEL: popcount2x32:
254
+ ; DOT: // %bb.0: // %Entry
255
+ ; DOT-NEXT: movi v1.2d, #0000000000000000
256
+ ; DOT-NEXT: cnt v0.8b, v0.8b
257
+ ; DOT-NEXT: movi v2.8b, #1
258
+ ; DOT-NEXT: udot v1.2s, v2.8b, v0.8b
259
+ ; DOT-NEXT: fmov d0, d1
260
+ ; DOT-NEXT: ret
261
+ ;
262
+ ; SVE-LABEL: popcount2x32:
263
+ ; SVE: // %bb.0: // %Entry
264
+ ; SVE-NEXT: cnt v0.8b, v0.8b
265
+ ; SVE-NEXT: uaddlp v0.4h, v0.8b
266
+ ; SVE-NEXT: uaddlp v0.2s, v0.4h
267
+ ; SVE-NEXT: ret
268
+ Entry:
269
+ %1 = tail call <2 x i32 > @llvm.ctpop.v2i32 (<2 x i32 > %0 )
270
+ ret <2 x i32 > %1
271
+ }
272
+
273
+ declare <2 x i32 > @llvm.ctpop.v2i32 (<2 x i32 >)
274
+
275
+ define <8 x i16 > @popcount8x16 (<8 x i16 > %0 ) {
276
+ ; CHECKO0-LABEL: popcount8x16:
277
+ ; CHECKO0: // %bb.0: // %Entry
278
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
279
+ ; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
280
+ ; CHECKO0-NEXT: ret
281
+ ;
282
+ ; CHECK-LABEL: popcount8x16:
283
+ ; CHECK: // %bb.0: // %Entry
284
+ ; CHECK-NEXT: cnt v0.16b, v0.16b
285
+ ; CHECK-NEXT: uaddlp v0.8h, v0.16b
286
+ ; CHECK-NEXT: ret
287
+ Entry:
288
+ %1 = tail call <8 x i16 > @llvm.ctpop.v8i16 (<8 x i16 > %0 )
289
+ ret <8 x i16 > %1
290
+ }
291
+
292
+ declare <8 x i16 > @llvm.ctpop.v8i16 (<8 x i16 >)
293
+
294
+ define <4 x i16 > @popcount4x16 (<4 x i16 > %0 ) {
295
+ ; CHECKO0-LABEL: popcount4x16:
296
+ ; CHECKO0: // %bb.0: // %Entry
297
+ ; CHECKO0-NEXT: cnt v0.8b, v0.8b
298
+ ; CHECKO0-NEXT: uaddlp v0.4h, v0.8b
299
+ ; CHECKO0-NEXT: ret
300
+ ;
301
+ ; CHECK-LABEL: popcount4x16:
302
+ ; CHECK: // %bb.0: // %Entry
303
+ ; CHECK-NEXT: cnt v0.8b, v0.8b
304
+ ; CHECK-NEXT: uaddlp v0.4h, v0.8b
305
+ ; CHECK-NEXT: ret
306
+ Entry:
307
+ %1 = tail call <4 x i16 > @llvm.ctpop.v4i16 (<4 x i16 > %0 )
308
+ ret <4 x i16 > %1
309
+ }
310
+
311
+ declare <4 x i16 > @llvm.ctpop.v4i16 (<4 x i16 >)
0 commit comments