6
6
7
7
; Function Attrs: nobuiltin nounwind readonly
8
8
define i8 @popcount128 (ptr nocapture nonnull readonly %0 ) {
9
- ; NEON -LABEL: popcount128:
10
- ; NEON : // %bb.0: // %Entry
11
- ; NEON -NEXT: ldr d0 , [x0]
12
- ; NEON -NEXT: add x8, x0, #8
13
- ; NEON -NEXT: ld1 { v0.d }[1], [x8]
14
- ; NEON -NEXT: cnt v0.16b, v0.16b
15
- ; NEON -NEXT: uaddlv h0, v0.16b
16
- ; NEON -NEXT: fmov w0, s0
17
- ; NEON -NEXT: ret
9
+ ; CHECKO0 -LABEL: popcount128:
10
+ ; CHECKO0 : // %bb.0: // %Entry
11
+ ; CHECKO0 -NEXT: ldr q0 , [x0]
12
+ ; CHECKO0 -NEXT: cnt v0.16b, v0.16b
13
+ ; CHECKO0 -NEXT: uaddlv h0, v0.16b
14
+ ; CHECKO0 -NEXT: // kill: def $q0 killed $h0
15
+ ; CHECKO0 -NEXT: // kill: def $s0 killed $s0 killed $q0
16
+ ; CHECKO0 -NEXT: fmov w0, s0
17
+ ; CHECKO0 -NEXT: ret
18
18
;
19
- ; DOT-LABEL: popcount128:
20
- ; DOT: // %bb.0: // %Entry
21
- ; DOT-NEXT: ldr d0, [x0]
22
- ; DOT-NEXT: add x8, x0, #8
23
- ; DOT-NEXT: ld1 { v0.d }[1], [x8]
24
- ; DOT-NEXT: cnt v0.16b, v0.16b
25
- ; DOT-NEXT: uaddlv h0, v0.16b
26
- ; DOT-NEXT: fmov w0, s0
27
- ; DOT-NEXT: ret
28
- ;
29
- ; SVE-LABEL: popcount128:
30
- ; SVE: // %bb.0: // %Entry
31
- ; SVE-NEXT: ldr d0, [x0]
32
- ; SVE-NEXT: add x8, x0, #8
33
- ; SVE-NEXT: ld1 { v0.d }[1], [x8]
34
- ; SVE-NEXT: cnt v0.16b, v0.16b
35
- ; SVE-NEXT: uaddlv h0, v0.16b
36
- ; SVE-NEXT: fmov w0, s0
37
- ; SVE-NEXT: ret
19
+ ; CHECK-LABEL: popcount128:
20
+ ; CHECK: // %bb.0: // %Entry
21
+ ; CHECK-NEXT: ldr d0, [x0]
22
+ ; CHECK-NEXT: add x8, x0, #8
23
+ ; CHECK-NEXT: ld1 { v0.d }[1], [x8]
24
+ ; CHECK-NEXT: cnt v0.16b, v0.16b
25
+ ; CHECK-NEXT: uaddlv h0, v0.16b
26
+ ; CHECK-NEXT: fmov w0, s0
27
+ ; CHECK-NEXT: ret
38
28
Entry:
39
29
%1 = load i128 , ptr %0 , align 16
40
30
%2 = tail call i128 @llvm.ctpop.i128 (i128 %1 )
@@ -47,56 +37,55 @@ declare i128 @llvm.ctpop.i128(i128)
47
37
48
38
; Function Attrs: nobuiltin nounwind readonly
49
39
define i16 @popcount256 (ptr nocapture nonnull readonly %0 ) {
50
- ; NEON-LABEL: popcount256:
51
- ; NEON: // %bb.0: // %Entry
52
- ; NEON-NEXT: ldr d0, [x0, #16]
53
- ; NEON-NEXT: ldr d1, [x0]
54
- ; NEON-NEXT: add x8, x0, #8
55
- ; NEON-NEXT: add x9, x0, #24
56
- ; NEON-NEXT: ld1 { v0.d }[1], [x9]
57
- ; NEON-NEXT: ld1 { v1.d }[1], [x8]
58
- ; NEON-NEXT: cnt v0.16b, v0.16b
59
- ; NEON-NEXT: cnt v1.16b, v1.16b
60
- ; NEON-NEXT: uaddlv h0, v0.16b
61
- ; NEON-NEXT: uaddlv h1, v1.16b
62
- ; NEON-NEXT: fmov w8, s0
63
- ; NEON-NEXT: fmov w9, s1
64
- ; NEON-NEXT: add w0, w9, w8
65
- ; NEON-NEXT: ret
40
+ ; CHECKO0-LABEL: popcount256:
41
+ ; CHECKO0: // %bb.0: // %Entry
42
+ ; CHECKO0-NEXT: ldr x11, [x0]
43
+ ; CHECKO0-NEXT: ldr x10, [x0, #8]
44
+ ; CHECKO0-NEXT: ldr x9, [x0, #16]
45
+ ; CHECKO0-NEXT: ldr x8, [x0, #24]
46
+ ; CHECKO0-NEXT: // implicit-def: $q1
47
+ ; CHECKO0-NEXT: mov v1.d[0], x11
48
+ ; CHECKO0-NEXT: mov v1.d[1], x10
49
+ ; CHECKO0-NEXT: // implicit-def: $q0
50
+ ; CHECKO0-NEXT: mov v0.d[0], x9
51
+ ; CHECKO0-NEXT: mov v0.d[1], x8
52
+ ; CHECKO0-NEXT: cnt v1.16b, v1.16b
53
+ ; CHECKO0-NEXT: uaddlv h1, v1.16b
54
+ ; CHECKO0-NEXT: // kill: def $q1 killed $h1
55
+ ; CHECKO0-NEXT: // kill: def $s1 killed $s1 killed $q1
56
+ ; CHECKO0-NEXT: fmov w0, s1
57
+ ; CHECKO0-NEXT: mov w10, wzr
58
+ ; CHECKO0-NEXT: mov w9, w0
59
+ ; CHECKO0-NEXT: mov w8, w10
60
+ ; CHECKO0-NEXT: bfi x9, x8, #32, #32
61
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
62
+ ; CHECKO0-NEXT: uaddlv h0, v0.16b
63
+ ; CHECKO0-NEXT: // kill: def $q0 killed $h0
64
+ ; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
65
+ ; CHECKO0-NEXT: fmov w0, s0
66
+ ; CHECKO0-NEXT: mov w8, w0
67
+ ; CHECKO0-NEXT: // kill: def $x10 killed $w10
68
+ ; CHECKO0-NEXT: bfi x8, x10, #32, #32
69
+ ; CHECKO0-NEXT: adds x8, x8, x9
70
+ ; CHECKO0-NEXT: mov w0, w8
71
+ ; CHECKO0-NEXT: ret
66
72
;
67
- ; DOT-LABEL: popcount256:
68
- ; DOT: // %bb.0: // %Entry
69
- ; DOT-NEXT: ldr d0, [x0, #16]
70
- ; DOT-NEXT: ldr d1, [x0]
71
- ; DOT-NEXT: add x8, x0, #8
72
- ; DOT-NEXT: add x9, x0, #24
73
- ; DOT-NEXT: ld1 { v0.d }[1], [x9]
74
- ; DOT-NEXT: ld1 { v1.d }[1], [x8]
75
- ; DOT-NEXT: cnt v0.16b, v0.16b
76
- ; DOT-NEXT: cnt v1.16b, v1.16b
77
- ; DOT-NEXT: uaddlv h0, v0.16b
78
- ; DOT-NEXT: uaddlv h1, v1.16b
79
- ; DOT-NEXT: fmov w8, s0
80
- ; DOT-NEXT: fmov w9, s1
81
- ; DOT-NEXT: add w0, w9, w8
82
- ; DOT-NEXT: ret
83
- ;
84
- ; SVE-LABEL: popcount256:
85
- ; SVE: // %bb.0: // %Entry
86
- ; SVE-NEXT: ldr d0, [x0, #16]
87
- ; SVE-NEXT: ldr d1, [x0]
88
- ; SVE-NEXT: add x8, x0, #8
89
- ; SVE-NEXT: add x9, x0, #24
90
- ; SVE-NEXT: ld1 { v0.d }[1], [x9]
91
- ; SVE-NEXT: ld1 { v1.d }[1], [x8]
92
- ; SVE-NEXT: cnt v0.16b, v0.16b
93
- ; SVE-NEXT: cnt v1.16b, v1.16b
94
- ; SVE-NEXT: uaddlv h0, v0.16b
95
- ; SVE-NEXT: uaddlv h1, v1.16b
96
- ; SVE-NEXT: fmov w8, s0
97
- ; SVE-NEXT: fmov w9, s1
98
- ; SVE-NEXT: add w0, w9, w8
99
- ; SVE-NEXT: ret
73
+ ; CHECK-LABEL: popcount256:
74
+ ; CHECK: // %bb.0: // %Entry
75
+ ; CHECK-NEXT: ldr d0, [x0, #16]
76
+ ; CHECK-NEXT: ldr d1, [x0]
77
+ ; CHECK-NEXT: add x8, x0, #8
78
+ ; CHECK-NEXT: add x9, x0, #24
79
+ ; CHECK-NEXT: ld1 { v0.d }[1], [x9]
80
+ ; CHECK-NEXT: ld1 { v1.d }[1], [x8]
81
+ ; CHECK-NEXT: cnt v0.16b, v0.16b
82
+ ; CHECK-NEXT: cnt v1.16b, v1.16b
83
+ ; CHECK-NEXT: uaddlv h0, v0.16b
84
+ ; CHECK-NEXT: uaddlv h1, v1.16b
85
+ ; CHECK-NEXT: fmov w8, s0
86
+ ; CHECK-NEXT: fmov w9, s1
87
+ ; CHECK-NEXT: add w0, w9, w8
88
+ ; CHECK-NEXT: ret
100
89
Entry:
101
90
%1 = load i256 , ptr %0 , align 16
102
91
%2 = tail call i256 @llvm.ctpop.i256 (i256 %1 )
@@ -108,41 +97,34 @@ Entry:
108
97
declare i256 @llvm.ctpop.i256 (i256 )
109
98
110
99
define <1 x i128 > @popcount1x128 (<1 x i128 > %0 ) {
111
- ; NEON-LABEL: popcount1x128:
112
- ; NEON: // %bb.0: // %Entry
113
- ; NEON-NEXT: fmov d1, x0
114
- ; NEON-NEXT: movi v0.2d, #0000000000000000
115
- ; NEON-NEXT: mov v1.d[1], x1
116
- ; NEON-NEXT: cnt v1.16b, v1.16b
117
- ; NEON-NEXT: uaddlv h1, v1.16b
118
- ; NEON-NEXT: mov v0.s[0], v1.s[0]
119
- ; NEON-NEXT: mov x1, v0.d[1]
120
- ; NEON-NEXT: fmov x0, d0
121
- ; NEON-NEXT: ret
122
- ;
123
- ; DOT-LABEL: popcount1x128:
124
- ; DOT: // %bb.0: // %Entry
125
- ; DOT-NEXT: fmov d1, x0
126
- ; DOT-NEXT: movi v0.2d, #0000000000000000
127
- ; DOT-NEXT: mov v1.d[1], x1
128
- ; DOT-NEXT: cnt v1.16b, v1.16b
129
- ; DOT-NEXT: uaddlv h1, v1.16b
130
- ; DOT-NEXT: mov v0.s[0], v1.s[0]
131
- ; DOT-NEXT: mov x1, v0.d[1]
132
- ; DOT-NEXT: fmov x0, d0
133
- ; DOT-NEXT: ret
100
+ ; CHECKO0-LABEL: popcount1x128:
101
+ ; CHECKO0: // %bb.0: // %Entry
102
+ ; CHECKO0-NEXT: // implicit-def: $q0
103
+ ; CHECKO0-NEXT: mov v0.d[0], x0
104
+ ; CHECKO0-NEXT: mov v0.d[1], x1
105
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
106
+ ; CHECKO0-NEXT: uaddlv h0, v0.16b
107
+ ; CHECKO0-NEXT: // kill: def $q0 killed $h0
108
+ ; CHECKO0-NEXT: mov x1, xzr
109
+ ; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
110
+ ; CHECKO0-NEXT: fmov w0, s0
111
+ ; CHECKO0-NEXT: mov w8, wzr
112
+ ; CHECKO0-NEXT: // kill: def $x0 killed $w0
113
+ ; CHECKO0-NEXT: // kill: def $x8 killed $w8
114
+ ; CHECKO0-NEXT: bfi x0, x8, #32, #32
115
+ ; CHECKO0-NEXT: ret
134
116
;
135
- ; SVE -LABEL: popcount1x128:
136
- ; SVE : // %bb.0: // %Entry
137
- ; SVE -NEXT: fmov d1, x0
138
- ; SVE -NEXT: movi v0.2d, #0000000000000000
139
- ; SVE -NEXT: mov v1.d[1], x1
140
- ; SVE -NEXT: cnt v1.16b, v1.16b
141
- ; SVE -NEXT: uaddlv h1, v1.16b
142
- ; SVE -NEXT: mov v0.s[0], v1.s[0]
143
- ; SVE -NEXT: mov x1, v0.d[1]
144
- ; SVE -NEXT: fmov x0, d0
145
- ; SVE -NEXT: ret
117
+ ; CHECK -LABEL: popcount1x128:
118
+ ; CHECK : // %bb.0: // %Entry
119
+ ; CHECK -NEXT: fmov d1, x0
120
+ ; CHECK -NEXT: movi v0.2d, #0000000000000000
121
+ ; CHECK -NEXT: mov v1.d[1], x1
122
+ ; CHECK -NEXT: cnt v1.16b, v1.16b
123
+ ; CHECK -NEXT: uaddlv h1, v1.16b
124
+ ; CHECK -NEXT: mov v0.s[0], v1.s[0]
125
+ ; CHECK -NEXT: mov x1, v0.d[1]
126
+ ; CHECK -NEXT: fmov x0, d0
127
+ ; CHECK -NEXT: ret
146
128
Entry:
147
129
%1 = tail call <1 x i128 > @llvm.ctpop.v1i128 (<1 x i128 > %0 )
148
130
ret <1 x i128 > %1
@@ -151,6 +133,14 @@ Entry:
151
133
declare <1 x i128 > @llvm.ctpop.v1i128 (<1 x i128 >)
152
134
153
135
define <2 x i64 > @popcount2x64 (<2 x i64 > %0 ) {
136
+ ; CHECKO0-LABEL: popcount2x64:
137
+ ; CHECKO0: // %bb.0: // %Entry
138
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
139
+ ; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
140
+ ; CHECKO0-NEXT: uaddlp v0.4s, v0.8h
141
+ ; CHECKO0-NEXT: uaddlp v0.2d, v0.4s
142
+ ; CHECKO0-NEXT: ret
143
+ ;
154
144
; NEON-LABEL: popcount2x64:
155
145
; NEON: // %bb.0: // %Entry
156
146
; NEON-NEXT: cnt v0.16b, v0.16b
@@ -183,6 +173,13 @@ Entry:
183
173
declare <2 x i64 > @llvm.ctpop.v2i64 (<2 x i64 >)
184
174
185
175
define <4 x i32 > @popcount4x32 (<4 x i32 > %0 ) {
176
+ ; CHECKO0-LABEL: popcount4x32:
177
+ ; CHECKO0: // %bb.0: // %Entry
178
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
179
+ ; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
180
+ ; CHECKO0-NEXT: uaddlp v0.4s, v0.8h
181
+ ; CHECKO0-NEXT: ret
182
+ ;
186
183
; NEON-LABEL: popcount4x32:
187
184
; NEON: // %bb.0: // %Entry
188
185
; NEON-NEXT: cnt v0.16b, v0.16b
@@ -212,6 +209,13 @@ Entry:
212
209
declare <4 x i32 > @llvm.ctpop.v4i32 (<4 x i32 >)
213
210
214
211
define <2 x i32 > @popcount2x32 (<2 x i32 > %0 ) {
212
+ ; CHECKO0-LABEL: popcount2x32:
213
+ ; CHECKO0: // %bb.0: // %Entry
214
+ ; CHECKO0-NEXT: cnt v0.8b, v0.8b
215
+ ; CHECKO0-NEXT: uaddlp v0.4h, v0.8b
216
+ ; CHECKO0-NEXT: uaddlp v0.2s, v0.4h
217
+ ; CHECKO0-NEXT: ret
218
+ ;
215
219
; NEON-LABEL: popcount2x32:
216
220
; NEON: // %bb.0: // %Entry
217
221
; NEON-NEXT: cnt v0.8b, v0.8b
@@ -242,6 +246,12 @@ Entry:
242
246
declare <2 x i32 > @llvm.ctpop.v2i32 (<2 x i32 >)
243
247
244
248
define <8 x i16 > @popcount8x16 (<8 x i16 > %0 ) {
249
+ ; CHECKO0-LABEL: popcount8x16:
250
+ ; CHECKO0: // %bb.0: // %Entry
251
+ ; CHECKO0-NEXT: cnt v0.16b, v0.16b
252
+ ; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
253
+ ; CHECKO0-NEXT: ret
254
+ ;
245
255
; CHECK-LABEL: popcount8x16:
246
256
; CHECK: // %bb.0: // %Entry
247
257
; CHECK-NEXT: cnt v0.16b, v0.16b
@@ -255,6 +265,12 @@ Entry:
255
265
declare <8 x i16 > @llvm.ctpop.v8i16 (<8 x i16 >)
256
266
257
267
define <4 x i16 > @popcount4x16 (<4 x i16 > %0 ) {
268
+ ; CHECKO0-LABEL: popcount4x16:
269
+ ; CHECKO0: // %bb.0: // %Entry
270
+ ; CHECKO0-NEXT: cnt v0.8b, v0.8b
271
+ ; CHECKO0-NEXT: uaddlp v0.4h, v0.8b
272
+ ; CHECKO0-NEXT: ret
273
+ ;
258
274
; CHECK-LABEL: popcount4x16:
259
275
; CHECK: // %bb.0: // %Entry
260
276
; CHECK-NEXT: cnt v0.8b, v0.8b
0 commit comments