Skip to content

Commit a78906b

Browse files
committed
update tests
1 parent 7bee852 commit a78906b

File tree

1 file changed

+127
-111
lines changed

1 file changed

+127
-111
lines changed

llvm/test/CodeGen/AArch64/popcount.ll

Lines changed: 127 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -6,35 +6,25 @@
66

77
; Function Attrs: nobuiltin nounwind readonly
88
define i8 @popcount128(ptr nocapture nonnull readonly %0) {
9-
; NEON-LABEL: popcount128:
10-
; NEON: // %bb.0: // %Entry
11-
; NEON-NEXT: ldr d0, [x0]
12-
; NEON-NEXT: add x8, x0, #8
13-
; NEON-NEXT: ld1 { v0.d }[1], [x8]
14-
; NEON-NEXT: cnt v0.16b, v0.16b
15-
; NEON-NEXT: uaddlv h0, v0.16b
16-
; NEON-NEXT: fmov w0, s0
17-
; NEON-NEXT: ret
9+
; CHECKO0-LABEL: popcount128:
10+
; CHECKO0: // %bb.0: // %Entry
11+
; CHECKO0-NEXT: ldr q0, [x0]
12+
; CHECKO0-NEXT: cnt v0.16b, v0.16b
13+
; CHECKO0-NEXT: uaddlv h0, v0.16b
14+
; CHECKO0-NEXT: // kill: def $q0 killed $h0
15+
; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
16+
; CHECKO0-NEXT: fmov w0, s0
17+
; CHECKO0-NEXT: ret
1818
;
19-
; DOT-LABEL: popcount128:
20-
; DOT: // %bb.0: // %Entry
21-
; DOT-NEXT: ldr d0, [x0]
22-
; DOT-NEXT: add x8, x0, #8
23-
; DOT-NEXT: ld1 { v0.d }[1], [x8]
24-
; DOT-NEXT: cnt v0.16b, v0.16b
25-
; DOT-NEXT: uaddlv h0, v0.16b
26-
; DOT-NEXT: fmov w0, s0
27-
; DOT-NEXT: ret
28-
;
29-
; SVE-LABEL: popcount128:
30-
; SVE: // %bb.0: // %Entry
31-
; SVE-NEXT: ldr d0, [x0]
32-
; SVE-NEXT: add x8, x0, #8
33-
; SVE-NEXT: ld1 { v0.d }[1], [x8]
34-
; SVE-NEXT: cnt v0.16b, v0.16b
35-
; SVE-NEXT: uaddlv h0, v0.16b
36-
; SVE-NEXT: fmov w0, s0
37-
; SVE-NEXT: ret
19+
; CHECK-LABEL: popcount128:
20+
; CHECK: // %bb.0: // %Entry
21+
; CHECK-NEXT: ldr d0, [x0]
22+
; CHECK-NEXT: add x8, x0, #8
23+
; CHECK-NEXT: ld1 { v0.d }[1], [x8]
24+
; CHECK-NEXT: cnt v0.16b, v0.16b
25+
; CHECK-NEXT: uaddlv h0, v0.16b
26+
; CHECK-NEXT: fmov w0, s0
27+
; CHECK-NEXT: ret
3828
Entry:
3929
%1 = load i128, ptr %0, align 16
4030
%2 = tail call i128 @llvm.ctpop.i128(i128 %1)
@@ -47,56 +37,55 @@ declare i128 @llvm.ctpop.i128(i128)
4737

4838
; Function Attrs: nobuiltin nounwind readonly
4939
define i16 @popcount256(ptr nocapture nonnull readonly %0) {
50-
; NEON-LABEL: popcount256:
51-
; NEON: // %bb.0: // %Entry
52-
; NEON-NEXT: ldr d0, [x0, #16]
53-
; NEON-NEXT: ldr d1, [x0]
54-
; NEON-NEXT: add x8, x0, #8
55-
; NEON-NEXT: add x9, x0, #24
56-
; NEON-NEXT: ld1 { v0.d }[1], [x9]
57-
; NEON-NEXT: ld1 { v1.d }[1], [x8]
58-
; NEON-NEXT: cnt v0.16b, v0.16b
59-
; NEON-NEXT: cnt v1.16b, v1.16b
60-
; NEON-NEXT: uaddlv h0, v0.16b
61-
; NEON-NEXT: uaddlv h1, v1.16b
62-
; NEON-NEXT: fmov w8, s0
63-
; NEON-NEXT: fmov w9, s1
64-
; NEON-NEXT: add w0, w9, w8
65-
; NEON-NEXT: ret
40+
; CHECKO0-LABEL: popcount256:
41+
; CHECKO0: // %bb.0: // %Entry
42+
; CHECKO0-NEXT: ldr x11, [x0]
43+
; CHECKO0-NEXT: ldr x10, [x0, #8]
44+
; CHECKO0-NEXT: ldr x9, [x0, #16]
45+
; CHECKO0-NEXT: ldr x8, [x0, #24]
46+
; CHECKO0-NEXT: // implicit-def: $q1
47+
; CHECKO0-NEXT: mov v1.d[0], x11
48+
; CHECKO0-NEXT: mov v1.d[1], x10
49+
; CHECKO0-NEXT: // implicit-def: $q0
50+
; CHECKO0-NEXT: mov v0.d[0], x9
51+
; CHECKO0-NEXT: mov v0.d[1], x8
52+
; CHECKO0-NEXT: cnt v1.16b, v1.16b
53+
; CHECKO0-NEXT: uaddlv h1, v1.16b
54+
; CHECKO0-NEXT: // kill: def $q1 killed $h1
55+
; CHECKO0-NEXT: // kill: def $s1 killed $s1 killed $q1
56+
; CHECKO0-NEXT: fmov w0, s1
57+
; CHECKO0-NEXT: mov w10, wzr
58+
; CHECKO0-NEXT: mov w9, w0
59+
; CHECKO0-NEXT: mov w8, w10
60+
; CHECKO0-NEXT: bfi x9, x8, #32, #32
61+
; CHECKO0-NEXT: cnt v0.16b, v0.16b
62+
; CHECKO0-NEXT: uaddlv h0, v0.16b
63+
; CHECKO0-NEXT: // kill: def $q0 killed $h0
64+
; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
65+
; CHECKO0-NEXT: fmov w0, s0
66+
; CHECKO0-NEXT: mov w8, w0
67+
; CHECKO0-NEXT: // kill: def $x10 killed $w10
68+
; CHECKO0-NEXT: bfi x8, x10, #32, #32
69+
; CHECKO0-NEXT: adds x8, x8, x9
70+
; CHECKO0-NEXT: mov w0, w8
71+
; CHECKO0-NEXT: ret
6672
;
67-
; DOT-LABEL: popcount256:
68-
; DOT: // %bb.0: // %Entry
69-
; DOT-NEXT: ldr d0, [x0, #16]
70-
; DOT-NEXT: ldr d1, [x0]
71-
; DOT-NEXT: add x8, x0, #8
72-
; DOT-NEXT: add x9, x0, #24
73-
; DOT-NEXT: ld1 { v0.d }[1], [x9]
74-
; DOT-NEXT: ld1 { v1.d }[1], [x8]
75-
; DOT-NEXT: cnt v0.16b, v0.16b
76-
; DOT-NEXT: cnt v1.16b, v1.16b
77-
; DOT-NEXT: uaddlv h0, v0.16b
78-
; DOT-NEXT: uaddlv h1, v1.16b
79-
; DOT-NEXT: fmov w8, s0
80-
; DOT-NEXT: fmov w9, s1
81-
; DOT-NEXT: add w0, w9, w8
82-
; DOT-NEXT: ret
83-
;
84-
; SVE-LABEL: popcount256:
85-
; SVE: // %bb.0: // %Entry
86-
; SVE-NEXT: ldr d0, [x0, #16]
87-
; SVE-NEXT: ldr d1, [x0]
88-
; SVE-NEXT: add x8, x0, #8
89-
; SVE-NEXT: add x9, x0, #24
90-
; SVE-NEXT: ld1 { v0.d }[1], [x9]
91-
; SVE-NEXT: ld1 { v1.d }[1], [x8]
92-
; SVE-NEXT: cnt v0.16b, v0.16b
93-
; SVE-NEXT: cnt v1.16b, v1.16b
94-
; SVE-NEXT: uaddlv h0, v0.16b
95-
; SVE-NEXT: uaddlv h1, v1.16b
96-
; SVE-NEXT: fmov w8, s0
97-
; SVE-NEXT: fmov w9, s1
98-
; SVE-NEXT: add w0, w9, w8
99-
; SVE-NEXT: ret
73+
; CHECK-LABEL: popcount256:
74+
; CHECK: // %bb.0: // %Entry
75+
; CHECK-NEXT: ldr d0, [x0, #16]
76+
; CHECK-NEXT: ldr d1, [x0]
77+
; CHECK-NEXT: add x8, x0, #8
78+
; CHECK-NEXT: add x9, x0, #24
79+
; CHECK-NEXT: ld1 { v0.d }[1], [x9]
80+
; CHECK-NEXT: ld1 { v1.d }[1], [x8]
81+
; CHECK-NEXT: cnt v0.16b, v0.16b
82+
; CHECK-NEXT: cnt v1.16b, v1.16b
83+
; CHECK-NEXT: uaddlv h0, v0.16b
84+
; CHECK-NEXT: uaddlv h1, v1.16b
85+
; CHECK-NEXT: fmov w8, s0
86+
; CHECK-NEXT: fmov w9, s1
87+
; CHECK-NEXT: add w0, w9, w8
88+
; CHECK-NEXT: ret
10089
Entry:
10190
%1 = load i256, ptr %0, align 16
10291
%2 = tail call i256 @llvm.ctpop.i256(i256 %1)
@@ -108,41 +97,34 @@ Entry:
10897
declare i256 @llvm.ctpop.i256(i256)
10998

11099
define <1 x i128> @popcount1x128(<1 x i128> %0) {
111-
; NEON-LABEL: popcount1x128:
112-
; NEON: // %bb.0: // %Entry
113-
; NEON-NEXT: fmov d1, x0
114-
; NEON-NEXT: movi v0.2d, #0000000000000000
115-
; NEON-NEXT: mov v1.d[1], x1
116-
; NEON-NEXT: cnt v1.16b, v1.16b
117-
; NEON-NEXT: uaddlv h1, v1.16b
118-
; NEON-NEXT: mov v0.s[0], v1.s[0]
119-
; NEON-NEXT: mov x1, v0.d[1]
120-
; NEON-NEXT: fmov x0, d0
121-
; NEON-NEXT: ret
122-
;
123-
; DOT-LABEL: popcount1x128:
124-
; DOT: // %bb.0: // %Entry
125-
; DOT-NEXT: fmov d1, x0
126-
; DOT-NEXT: movi v0.2d, #0000000000000000
127-
; DOT-NEXT: mov v1.d[1], x1
128-
; DOT-NEXT: cnt v1.16b, v1.16b
129-
; DOT-NEXT: uaddlv h1, v1.16b
130-
; DOT-NEXT: mov v0.s[0], v1.s[0]
131-
; DOT-NEXT: mov x1, v0.d[1]
132-
; DOT-NEXT: fmov x0, d0
133-
; DOT-NEXT: ret
100+
; CHECKO0-LABEL: popcount1x128:
101+
; CHECKO0: // %bb.0: // %Entry
102+
; CHECKO0-NEXT: // implicit-def: $q0
103+
; CHECKO0-NEXT: mov v0.d[0], x0
104+
; CHECKO0-NEXT: mov v0.d[1], x1
105+
; CHECKO0-NEXT: cnt v0.16b, v0.16b
106+
; CHECKO0-NEXT: uaddlv h0, v0.16b
107+
; CHECKO0-NEXT: // kill: def $q0 killed $h0
108+
; CHECKO0-NEXT: mov x1, xzr
109+
; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0
110+
; CHECKO0-NEXT: fmov w0, s0
111+
; CHECKO0-NEXT: mov w8, wzr
112+
; CHECKO0-NEXT: // kill: def $x0 killed $w0
113+
; CHECKO0-NEXT: // kill: def $x8 killed $w8
114+
; CHECKO0-NEXT: bfi x0, x8, #32, #32
115+
; CHECKO0-NEXT: ret
134116
;
135-
; SVE-LABEL: popcount1x128:
136-
; SVE: // %bb.0: // %Entry
137-
; SVE-NEXT: fmov d1, x0
138-
; SVE-NEXT: movi v0.2d, #0000000000000000
139-
; SVE-NEXT: mov v1.d[1], x1
140-
; SVE-NEXT: cnt v1.16b, v1.16b
141-
; SVE-NEXT: uaddlv h1, v1.16b
142-
; SVE-NEXT: mov v0.s[0], v1.s[0]
143-
; SVE-NEXT: mov x1, v0.d[1]
144-
; SVE-NEXT: fmov x0, d0
145-
; SVE-NEXT: ret
117+
; CHECK-LABEL: popcount1x128:
118+
; CHECK: // %bb.0: // %Entry
119+
; CHECK-NEXT: fmov d1, x0
120+
; CHECK-NEXT: movi v0.2d, #0000000000000000
121+
; CHECK-NEXT: mov v1.d[1], x1
122+
; CHECK-NEXT: cnt v1.16b, v1.16b
123+
; CHECK-NEXT: uaddlv h1, v1.16b
124+
; CHECK-NEXT: mov v0.s[0], v1.s[0]
125+
; CHECK-NEXT: mov x1, v0.d[1]
126+
; CHECK-NEXT: fmov x0, d0
127+
; CHECK-NEXT: ret
146128
Entry:
147129
%1 = tail call <1 x i128> @llvm.ctpop.v1i128(<1 x i128> %0)
148130
ret <1 x i128> %1
@@ -151,6 +133,14 @@ Entry:
151133
declare <1 x i128> @llvm.ctpop.v1i128(<1 x i128>)
152134

153135
define <2 x i64> @popcount2x64(<2 x i64> %0) {
136+
; CHECKO0-LABEL: popcount2x64:
137+
; CHECKO0: // %bb.0: // %Entry
138+
; CHECKO0-NEXT: cnt v0.16b, v0.16b
139+
; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
140+
; CHECKO0-NEXT: uaddlp v0.4s, v0.8h
141+
; CHECKO0-NEXT: uaddlp v0.2d, v0.4s
142+
; CHECKO0-NEXT: ret
143+
;
154144
; NEON-LABEL: popcount2x64:
155145
; NEON: // %bb.0: // %Entry
156146
; NEON-NEXT: cnt v0.16b, v0.16b
@@ -183,6 +173,13 @@ Entry:
183173
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
184174

185175
define <4 x i32> @popcount4x32(<4 x i32> %0) {
176+
; CHECKO0-LABEL: popcount4x32:
177+
; CHECKO0: // %bb.0: // %Entry
178+
; CHECKO0-NEXT: cnt v0.16b, v0.16b
179+
; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
180+
; CHECKO0-NEXT: uaddlp v0.4s, v0.8h
181+
; CHECKO0-NEXT: ret
182+
;
186183
; NEON-LABEL: popcount4x32:
187184
; NEON: // %bb.0: // %Entry
188185
; NEON-NEXT: cnt v0.16b, v0.16b
@@ -212,6 +209,13 @@ Entry:
212209
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
213210

214211
define <2 x i32> @popcount2x32(<2 x i32> %0) {
212+
; CHECKO0-LABEL: popcount2x32:
213+
; CHECKO0: // %bb.0: // %Entry
214+
; CHECKO0-NEXT: cnt v0.8b, v0.8b
215+
; CHECKO0-NEXT: uaddlp v0.4h, v0.8b
216+
; CHECKO0-NEXT: uaddlp v0.2s, v0.4h
217+
; CHECKO0-NEXT: ret
218+
;
215219
; NEON-LABEL: popcount2x32:
216220
; NEON: // %bb.0: // %Entry
217221
; NEON-NEXT: cnt v0.8b, v0.8b
@@ -242,6 +246,12 @@ Entry:
242246
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
243247

244248
define <8 x i16> @popcount8x16(<8 x i16> %0) {
249+
; CHECKO0-LABEL: popcount8x16:
250+
; CHECKO0: // %bb.0: // %Entry
251+
; CHECKO0-NEXT: cnt v0.16b, v0.16b
252+
; CHECKO0-NEXT: uaddlp v0.8h, v0.16b
253+
; CHECKO0-NEXT: ret
254+
;
245255
; CHECK-LABEL: popcount8x16:
246256
; CHECK: // %bb.0: // %Entry
247257
; CHECK-NEXT: cnt v0.16b, v0.16b
@@ -255,6 +265,12 @@ Entry:
255265
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
256266

257267
define <4 x i16> @popcount4x16(<4 x i16> %0) {
268+
; CHECKO0-LABEL: popcount4x16:
269+
; CHECKO0: // %bb.0: // %Entry
270+
; CHECKO0-NEXT: cnt v0.8b, v0.8b
271+
; CHECKO0-NEXT: uaddlp v0.4h, v0.8b
272+
; CHECKO0-NEXT: ret
273+
;
258274
; CHECK-LABEL: popcount4x16:
259275
; CHECK: // %bb.0: // %Entry
260276
; CHECK-NEXT: cnt v0.8b, v0.8b

0 commit comments

Comments
 (0)