Skip to content

Commit 0e0d53d

Browse files
committed
add tests
1 parent 8f9c994 commit 0e0d53d

File tree

1 file changed

+295
-0
lines changed

1 file changed

+295
-0
lines changed

llvm/test/CodeGen/AArch64/popcount.ll

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s
3+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefix=NEON
4+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefix=DOT
5+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefix=SVE
36

47
; Function Attrs: nobuiltin nounwind readonly
58
define i8 @popcount128(ptr nocapture nonnull readonly %0) {
@@ -12,6 +15,36 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) {
1215
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
1316
; CHECK-NEXT: fmov w0, s0
1417
; CHECK-NEXT: ret
18+
;
19+
; NEON-LABEL: popcount128:
20+
; NEON: // %bb.0: // %Entry
21+
; NEON-NEXT: ldr d0, [x0]
22+
; NEON-NEXT: add x8, x0, #8
23+
; NEON-NEXT: ld1 { v0.d }[1], [x8]
24+
; NEON-NEXT: cnt v0.16b, v0.16b
25+
; NEON-NEXT: uaddlv h0, v0.16b
26+
; NEON-NEXT: fmov w0, s0
27+
; NEON-NEXT: ret
28+
;
29+
; DOT-LABEL: popcount128:
30+
; DOT: // %bb.0: // %Entry
31+
; DOT-NEXT: ldr d0, [x0]
32+
; DOT-NEXT: add x8, x0, #8
33+
; DOT-NEXT: ld1 { v0.d }[1], [x8]
34+
; DOT-NEXT: cnt v0.16b, v0.16b
35+
; DOT-NEXT: uaddlv h0, v0.16b
36+
; DOT-NEXT: fmov w0, s0
37+
; DOT-NEXT: ret
38+
;
39+
; SVE-LABEL: popcount128:
40+
; SVE: // %bb.0: // %Entry
41+
; SVE-NEXT: ldr d0, [x0]
42+
; SVE-NEXT: add x8, x0, #8
43+
; SVE-NEXT: ld1 { v0.d }[1], [x8]
44+
; SVE-NEXT: cnt v0.16b, v0.16b
45+
; SVE-NEXT: uaddlv h0, v0.16b
46+
; SVE-NEXT: fmov w0, s0
47+
; SVE-NEXT: ret
1548
Entry:
1649
%1 = load i128, ptr %0, align 16
1750
%2 = tail call i128 @llvm.ctpop.i128(i128 %1)
@@ -56,6 +89,57 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
5689
; CHECK-NEXT: adds x8, x8, x9
5790
; CHECK-NEXT: mov w0, w8
5891
; CHECK-NEXT: ret
92+
;
93+
; NEON-LABEL: popcount256:
94+
; NEON: // %bb.0: // %Entry
95+
; NEON-NEXT: ldr d0, [x0, #16]
96+
; NEON-NEXT: ldr d1, [x0]
97+
; NEON-NEXT: add x8, x0, #8
98+
; NEON-NEXT: add x9, x0, #24
99+
; NEON-NEXT: ld1 { v0.d }[1], [x9]
100+
; NEON-NEXT: ld1 { v1.d }[1], [x8]
101+
; NEON-NEXT: cnt v0.16b, v0.16b
102+
; NEON-NEXT: cnt v1.16b, v1.16b
103+
; NEON-NEXT: uaddlv h0, v0.16b
104+
; NEON-NEXT: uaddlv h1, v1.16b
105+
; NEON-NEXT: fmov w8, s0
106+
; NEON-NEXT: fmov w9, s1
107+
; NEON-NEXT: add w0, w9, w8
108+
; NEON-NEXT: ret
109+
;
110+
; DOT-LABEL: popcount256:
111+
; DOT: // %bb.0: // %Entry
112+
; DOT-NEXT: ldr d0, [x0, #16]
113+
; DOT-NEXT: ldr d1, [x0]
114+
; DOT-NEXT: add x8, x0, #8
115+
; DOT-NEXT: add x9, x0, #24
116+
; DOT-NEXT: ld1 { v0.d }[1], [x9]
117+
; DOT-NEXT: ld1 { v1.d }[1], [x8]
118+
; DOT-NEXT: cnt v0.16b, v0.16b
119+
; DOT-NEXT: cnt v1.16b, v1.16b
120+
; DOT-NEXT: uaddlv h0, v0.16b
121+
; DOT-NEXT: uaddlv h1, v1.16b
122+
; DOT-NEXT: fmov w8, s0
123+
; DOT-NEXT: fmov w9, s1
124+
; DOT-NEXT: add w0, w9, w8
125+
; DOT-NEXT: ret
126+
;
127+
; SVE-LABEL: popcount256:
128+
; SVE: // %bb.0: // %Entry
129+
; SVE-NEXT: ldr d0, [x0, #16]
130+
; SVE-NEXT: ldr d1, [x0]
131+
; SVE-NEXT: add x8, x0, #8
132+
; SVE-NEXT: add x9, x0, #24
133+
; SVE-NEXT: ld1 { v0.d }[1], [x9]
134+
; SVE-NEXT: ld1 { v1.d }[1], [x8]
135+
; SVE-NEXT: cnt v0.16b, v0.16b
136+
; SVE-NEXT: cnt v1.16b, v1.16b
137+
; SVE-NEXT: uaddlv h0, v0.16b
138+
; SVE-NEXT: uaddlv h1, v1.16b
139+
; SVE-NEXT: fmov w8, s0
140+
; SVE-NEXT: fmov w9, s1
141+
; SVE-NEXT: add w0, w9, w8
142+
; SVE-NEXT: ret
59143
Entry:
60144
%1 = load i256, ptr %0, align 16
61145
%2 = tail call i256 @llvm.ctpop.i256(i256 %1)
@@ -83,9 +167,220 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
83167
; CHECK-NEXT: // kill: def $x8 killed $w8
84168
; CHECK-NEXT: bfi x0, x8, #32, #32
85169
; CHECK-NEXT: ret
170+
;
171+
; NEON-LABEL: popcount1x128:
172+
; NEON: // %bb.0: // %Entry
173+
; NEON-NEXT: fmov d1, x0
174+
; NEON-NEXT: movi v0.2d, #0000000000000000
175+
; NEON-NEXT: mov v1.d[1], x1
176+
; NEON-NEXT: cnt v1.16b, v1.16b
177+
; NEON-NEXT: uaddlv h1, v1.16b
178+
; NEON-NEXT: mov v0.s[0], v1.s[0]
179+
; NEON-NEXT: mov x1, v0.d[1]
180+
; NEON-NEXT: fmov x0, d0
181+
; NEON-NEXT: ret
182+
;
183+
; DOT-LABEL: popcount1x128:
184+
; DOT: // %bb.0: // %Entry
185+
; DOT-NEXT: fmov d1, x0
186+
; DOT-NEXT: movi v0.2d, #0000000000000000
187+
; DOT-NEXT: mov v1.d[1], x1
188+
; DOT-NEXT: cnt v1.16b, v1.16b
189+
; DOT-NEXT: uaddlv h1, v1.16b
190+
; DOT-NEXT: mov v0.s[0], v1.s[0]
191+
; DOT-NEXT: mov x1, v0.d[1]
192+
; DOT-NEXT: fmov x0, d0
193+
; DOT-NEXT: ret
194+
;
195+
; SVE-LABEL: popcount1x128:
196+
; SVE: // %bb.0: // %Entry
197+
; SVE-NEXT: fmov d1, x0
198+
; SVE-NEXT: movi v0.2d, #0000000000000000
199+
; SVE-NEXT: mov v1.d[1], x1
200+
; SVE-NEXT: cnt v1.16b, v1.16b
201+
; SVE-NEXT: uaddlv h1, v1.16b
202+
; SVE-NEXT: mov v0.s[0], v1.s[0]
203+
; SVE-NEXT: mov x1, v0.d[1]
204+
; SVE-NEXT: fmov x0, d0
205+
; SVE-NEXT: ret
86206
Entry:
87207
%1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
88208
ret <1 x i128> %1
89209
}
90210

91211
declare <1 x i128> @llvm.ctpop.v1.i128(<1 x i128>)
212+
213+
define <2 x i64> @popcount2x64(<2 x i64> %0) {
214+
; CHECK-LABEL: popcount2x64:
215+
; CHECK: // %bb.0: // %Entry
216+
; CHECK-NEXT: cnt v0.16b, v0.16b
217+
; CHECK-NEXT: uaddlp v0.8h, v0.16b
218+
; CHECK-NEXT: uaddlp v0.4s, v0.8h
219+
; CHECK-NEXT: uaddlp v0.2d, v0.4s
220+
; CHECK-NEXT: ret
221+
;
222+
; NEON-LABEL: popcount2x64:
223+
; NEON: // %bb.0: // %Entry
224+
; NEON-NEXT: cnt v0.16b, v0.16b
225+
; NEON-NEXT: uaddlp v0.8h, v0.16b
226+
; NEON-NEXT: uaddlp v0.4s, v0.8h
227+
; NEON-NEXT: uaddlp v0.2d, v0.4s
228+
; NEON-NEXT: ret
229+
;
230+
; DOT-LABEL: popcount2x64:
231+
; DOT: // %bb.0: // %Entry
232+
; DOT-NEXT: movi v1.16b, #1
233+
; DOT-NEXT: cnt v0.16b, v0.16b
234+
; DOT-NEXT: movi v2.2d, #0000000000000000
235+
; DOT-NEXT: udot v2.4s, v1.16b, v0.16b
236+
; DOT-NEXT: uaddlp v0.2d, v2.4s
237+
; DOT-NEXT: ret
238+
;
239+
; SVE-LABEL: popcount2x64:
240+
; SVE: // %bb.0: // %Entry
241+
; SVE-NEXT: cnt v0.16b, v0.16b
242+
; SVE-NEXT: uaddlp v0.8h, v0.16b
243+
; SVE-NEXT: uaddlp v0.4s, v0.8h
244+
; SVE-NEXT: uaddlp v0.2d, v0.4s
245+
; SVE-NEXT: ret
246+
Entry:
247+
%1 = tail call <2 x i64> @llvm.ctpop.v2.i64(<2 x i64> %0)
248+
ret <2 x i64> %1
249+
}
250+
251+
declare <2 x i64> @llvm.ctpop.v2.i64(<2 x i64>)
252+
253+
define <4 x i32> @popcount4x32(<4 x i32> %0) {
254+
; CHECK-LABEL: popcount4x32:
255+
; CHECK: // %bb.0: // %Entry
256+
; CHECK-NEXT: cnt v0.16b, v0.16b
257+
; CHECK-NEXT: uaddlp v0.8h, v0.16b
258+
; CHECK-NEXT: uaddlp v0.4s, v0.8h
259+
; CHECK-NEXT: ret
260+
;
261+
; NEON-LABEL: popcount4x32:
262+
; NEON: // %bb.0: // %Entry
263+
; NEON-NEXT: cnt v0.16b, v0.16b
264+
; NEON-NEXT: uaddlp v0.8h, v0.16b
265+
; NEON-NEXT: uaddlp v0.4s, v0.8h
266+
; NEON-NEXT: ret
267+
;
268+
; DOT-LABEL: popcount4x32:
269+
; DOT: // %bb.0: // %Entry
270+
; DOT-NEXT: movi v1.16b, #1
271+
; DOT-NEXT: cnt v2.16b, v0.16b
272+
; DOT-NEXT: movi v0.2d, #0000000000000000
273+
; DOT-NEXT: udot v0.4s, v1.16b, v2.16b
274+
; DOT-NEXT: ret
275+
;
276+
; SVE-LABEL: popcount4x32:
277+
; SVE: // %bb.0: // %Entry
278+
; SVE-NEXT: cnt v0.16b, v0.16b
279+
; SVE-NEXT: uaddlp v0.8h, v0.16b
280+
; SVE-NEXT: uaddlp v0.4s, v0.8h
281+
; SVE-NEXT: ret
282+
Entry:
283+
%1 = tail call <4 x i32> @llvm.ctpop.v4.i32(<4 x i32> %0)
284+
ret <4 x i32> %1
285+
}
286+
287+
declare <4 x i32> @llvm.ctpop.v4.i32(<4 x i32>)
288+
289+
define <2 x i32> @popcount2x32(<2 x i32> %0) {
290+
; CHECK-LABEL: popcount2x32:
291+
; CHECK: // %bb.0: // %Entry
292+
; CHECK-NEXT: cnt v0.8b, v0.8b
293+
; CHECK-NEXT: uaddlp v0.4h, v0.8b
294+
; CHECK-NEXT: uaddlp v0.2s, v0.4h
295+
; CHECK-NEXT: ret
296+
;
297+
; NEON-LABEL: popcount2x32:
298+
; NEON: // %bb.0: // %Entry
299+
; NEON-NEXT: cnt v0.8b, v0.8b
300+
; NEON-NEXT: uaddlp v0.4h, v0.8b
301+
; NEON-NEXT: uaddlp v0.2s, v0.4h
302+
; NEON-NEXT: ret
303+
;
304+
; DOT-LABEL: popcount2x32:
305+
; DOT: // %bb.0: // %Entry
306+
; DOT-NEXT: movi v1.2d, #0000000000000000
307+
; DOT-NEXT: cnt v0.8b, v0.8b
308+
; DOT-NEXT: movi v2.8b, #1
309+
; DOT-NEXT: udot v1.2s, v2.8b, v0.8b
310+
; DOT-NEXT: fmov d0, d1
311+
; DOT-NEXT: ret
312+
;
313+
; SVE-LABEL: popcount2x32:
314+
; SVE: // %bb.0: // %Entry
315+
; SVE-NEXT: cnt v0.8b, v0.8b
316+
; SVE-NEXT: uaddlp v0.4h, v0.8b
317+
; SVE-NEXT: uaddlp v0.2s, v0.4h
318+
; SVE-NEXT: ret
319+
Entry:
320+
%1 = tail call <2 x i32> @llvm.ctpop.v2.i32(<2 x i32> %0)
321+
ret <2 x i32> %1
322+
}
323+
324+
declare <2 x i32> @llvm.ctpop.v2.i32(<2 x i32>)
325+
326+
define <8 x i16> @popcount8x16(<8 x i16> %0) {
327+
; CHECK-LABEL: popcount8x16:
328+
; CHECK: // %bb.0: // %Entry
329+
; CHECK-NEXT: cnt v0.16b, v0.16b
330+
; CHECK-NEXT: uaddlp v0.8h, v0.16b
331+
; CHECK-NEXT: ret
332+
;
333+
; NEON-LABEL: popcount8x16:
334+
; NEON: // %bb.0: // %Entry
335+
; NEON-NEXT: cnt v0.16b, v0.16b
336+
; NEON-NEXT: uaddlp v0.8h, v0.16b
337+
; NEON-NEXT: ret
338+
;
339+
; DOT-LABEL: popcount8x16:
340+
; DOT: // %bb.0: // %Entry
341+
; DOT-NEXT: cnt v0.16b, v0.16b
342+
; DOT-NEXT: uaddlp v0.8h, v0.16b
343+
; DOT-NEXT: ret
344+
;
345+
; SVE-LABEL: popcount8x16:
346+
; SVE: // %bb.0: // %Entry
347+
; SVE-NEXT: cnt v0.16b, v0.16b
348+
; SVE-NEXT: uaddlp v0.8h, v0.16b
349+
; SVE-NEXT: ret
350+
Entry:
351+
%1 = tail call <8 x i16> @llvm.ctpop.v8.i16(<8 x i16> %0)
352+
ret <8 x i16> %1
353+
}
354+
355+
declare <8 x i16> @llvm.ctpop.v8.i16(<8 x i16>)
356+
357+
define <4 x i16> @popcount4x16(<4 x i16> %0) {
358+
; CHECK-LABEL: popcount4x16:
359+
; CHECK: // %bb.0: // %Entry
360+
; CHECK-NEXT: cnt v0.8b, v0.8b
361+
; CHECK-NEXT: uaddlp v0.4h, v0.8b
362+
; CHECK-NEXT: ret
363+
;
364+
; NEON-LABEL: popcount4x16:
365+
; NEON: // %bb.0: // %Entry
366+
; NEON-NEXT: cnt v0.8b, v0.8b
367+
; NEON-NEXT: uaddlp v0.4h, v0.8b
368+
; NEON-NEXT: ret
369+
;
370+
; DOT-LABEL: popcount4x16:
371+
; DOT: // %bb.0: // %Entry
372+
; DOT-NEXT: cnt v0.8b, v0.8b
373+
; DOT-NEXT: uaddlp v0.4h, v0.8b
374+
; DOT-NEXT: ret
375+
;
376+
; SVE-LABEL: popcount4x16:
377+
; SVE: // %bb.0: // %Entry
378+
; SVE-NEXT: cnt v0.8b, v0.8b
379+
; SVE-NEXT: uaddlp v0.4h, v0.8b
380+
; SVE-NEXT: ret
381+
Entry:
382+
%1 = tail call <4 x i16> @llvm.ctpop.v4.i16(<4 x i16> %0)
383+
ret <4 x i16> %1
384+
}
385+
386+
declare <4 x i16> @llvm.ctpop.v4.i16(<4 x i16>)

0 commit comments

Comments
 (0)