Skip to content

Commit c5fcc2e

Browse files
committed
[AArch64] Add addp from shuffles tests. NFC
1 parent 2f0308e commit c5fcc2e

File tree

1 file changed

+224
-0
lines changed

1 file changed

+224
-0
lines changed
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
4+
5+
define <4 x i32> @deinterleave_shuffle_v8i32(<8 x i32> %a) {
6+
; CHECK-LABEL: deinterleave_shuffle_v8i32:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
9+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
10+
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
11+
; CHECK-NEXT: ret
12+
%r0 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
13+
%r1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
14+
%o = add <4 x i32> %r0, %r1
15+
ret <4 x i32> %o
16+
}
17+
18+
define <4 x i32> @deinterleave_shuffle_v8i32_c(<8 x i32> %a) {
19+
; CHECK-LABEL: deinterleave_shuffle_v8i32_c:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
22+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
23+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
24+
; CHECK-NEXT: ret
25+
%r0 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
26+
%r1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
27+
%o = add <4 x i32> %r1, %r0
28+
ret <4 x i32> %o
29+
}
30+
31+
define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
32+
; CHECK-LABEL: deinterleave_shuffle_v4i32:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
35+
; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
36+
; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
37+
; CHECK-NEXT: add v0.2s, v2.2s, v0.2s
38+
; CHECK-NEXT: ret
39+
%r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
40+
%r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
41+
%o = add <2 x i32> %r0, %r1
42+
ret <2 x i32> %o
43+
}
44+
45+
define <8 x i16> @deinterleave_shuffle_v16i16(<16 x i16> %a) {
46+
; CHECK-LABEL: deinterleave_shuffle_v16i16:
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: uzp1 v2.8h, v0.8h, v1.8h
49+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
50+
; CHECK-NEXT: add v0.8h, v2.8h, v0.8h
51+
; CHECK-NEXT: ret
52+
%r0 = shufflevector <16 x i16> %a, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
53+
%r1 = shufflevector <16 x i16> %a, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
54+
%o = add <8 x i16> %r0, %r1
55+
ret <8 x i16> %o
56+
}
57+
58+
define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
59+
; CHECK-LABEL: deinterleave_shuffle_v32i8:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: uzp1 v2.16b, v0.16b, v1.16b
62+
; CHECK-NEXT: uzp2 v0.16b, v0.16b, v1.16b
63+
; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
64+
; CHECK-NEXT: ret
65+
%r0 = shufflevector <32 x i8> %a, <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
66+
%r1 = shufflevector <32 x i8> %a, <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
67+
%o = add <16 x i8> %r0, %r1
68+
ret <16 x i8> %o
69+
}
70+
71+
define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
72+
; CHECK-LABEL: deinterleave_shuffle_v8i64:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: zip1 v4.2d, v2.2d, v3.2d
75+
; CHECK-NEXT: zip1 v5.2d, v0.2d, v1.2d
76+
; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
77+
; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
78+
; CHECK-NEXT: add v1.2d, v4.2d, v2.2d
79+
; CHECK-NEXT: add v0.2d, v5.2d, v0.2d
80+
; CHECK-NEXT: ret
81+
%r0 = shufflevector <8 x i64> %a, <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
82+
%r1 = shufflevector <8 x i64> %a, <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
83+
%o = add <4 x i64> %r0, %r1
84+
ret <4 x i64> %o
85+
}
86+
87+
define <4 x float> @deinterleave_shuffle_v8f32(<8 x float> %a) {
88+
; CHECK-LABEL: deinterleave_shuffle_v8f32:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
91+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
92+
; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
93+
; CHECK-NEXT: ret
94+
%r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
95+
%r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
96+
%o = fadd <4 x float> %r0, %r1
97+
ret <4 x float> %o
98+
}
99+
100+
define <4 x float> @deinterleave_shuffle_v8f32_c(<8 x float> %a) {
101+
; CHECK-LABEL: deinterleave_shuffle_v8f32_c:
102+
; CHECK: // %bb.0:
103+
; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
104+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
105+
; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
106+
; CHECK-NEXT: ret
107+
%r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
108+
%r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
109+
%o = fadd <4 x float> %r1, %r0
110+
ret <4 x float> %o
111+
}
112+
113+
define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
114+
; CHECK-NOFP16-LABEL: deinterleave_shuffle_v16f16:
115+
; CHECK-NOFP16: // %bb.0:
116+
; CHECK-NOFP16-NEXT: uzp1 v2.8h, v0.8h, v1.8h
117+
; CHECK-NOFP16-NEXT: uzp2 v0.8h, v0.8h, v1.8h
118+
; CHECK-NOFP16-NEXT: fcvtl v1.4s, v0.4h
119+
; CHECK-NOFP16-NEXT: fcvtl v3.4s, v2.4h
120+
; CHECK-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
121+
; CHECK-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
122+
; CHECK-NOFP16-NEXT: fadd v1.4s, v3.4s, v1.4s
123+
; CHECK-NOFP16-NEXT: fadd v2.4s, v2.4s, v0.4s
124+
; CHECK-NOFP16-NEXT: fcvtn v0.4h, v1.4s
125+
; CHECK-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
126+
; CHECK-NOFP16-NEXT: ret
127+
;
128+
; CHECK-FP16-LABEL: deinterleave_shuffle_v16f16:
129+
; CHECK-FP16: // %bb.0:
130+
; CHECK-FP16-NEXT: uzp1 v2.8h, v0.8h, v1.8h
131+
; CHECK-FP16-NEXT: uzp2 v0.8h, v0.8h, v1.8h
132+
; CHECK-FP16-NEXT: fadd v0.8h, v2.8h, v0.8h
133+
; CHECK-FP16-NEXT: ret
134+
%r0 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
135+
%r1 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
136+
%o = fadd <8 x half> %r0, %r1
137+
ret <8 x half> %o
138+
}
139+
140+
define <4 x double> @deinterleave_shuffle_v8f64(<8 x double> %a) {
141+
; CHECK-LABEL: deinterleave_shuffle_v8f64:
142+
; CHECK: // %bb.0:
143+
; CHECK-NEXT: zip1 v4.2d, v2.2d, v3.2d
144+
; CHECK-NEXT: zip1 v5.2d, v0.2d, v1.2d
145+
; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
146+
; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
147+
; CHECK-NEXT: fadd v1.2d, v4.2d, v2.2d
148+
; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d
149+
; CHECK-NEXT: ret
150+
%r0 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
151+
%r1 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
152+
%o = fadd <4 x double> %r0, %r1
153+
ret <4 x double> %o
154+
}
155+
156+
define <4 x i32> @udot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) {
157+
; CHECK-LABEL: udot:
158+
; CHECK: // %bb.0:
159+
; CHECK-NEXT: ushll v3.8h, v1.8b, #0
160+
; CHECK-NEXT: ushll v4.8h, v2.8b, #0
161+
; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
162+
; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0
163+
; CHECK-NEXT: umull2 v5.4s, v3.8h, v4.8h
164+
; CHECK-NEXT: umull v3.4s, v3.4h, v4.4h
165+
; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
166+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
167+
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v5.4s
168+
; CHECK-NEXT: uzp2 v3.4s, v3.4s, v5.4s
169+
; CHECK-NEXT: uzp1 v6.4s, v1.4s, v4.4s
170+
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
171+
; CHECK-NEXT: add v2.4s, v2.4s, v3.4s
172+
; CHECK-NEXT: add v1.4s, v6.4s, v1.4s
173+
; CHECK-NEXT: uzp1 v3.4s, v2.4s, v1.4s
174+
; CHECK-NEXT: uzp2 v1.4s, v2.4s, v1.4s
175+
; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
176+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
177+
; CHECK-NEXT: ret
178+
%za = zext <16 x i8> %a to <16 x i32>
179+
%zb = zext <16 x i8> %b to <16 x i32>
180+
%m = mul <16 x i32> %za, %zb
181+
%r0 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
182+
%r1 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
183+
%m2 = add <8 x i32> %r0, %r1
184+
%s0 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
185+
%s1 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
186+
%o = add <4 x i32> %s0, %s1
187+
%n = add <4 x i32> %z, %o
188+
ret <4 x i32> %n
189+
}
190+
191+
define <4 x i32> @sdot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) {
192+
; CHECK-LABEL: sdot:
193+
; CHECK: // %bb.0:
194+
; CHECK-NEXT: sshll v3.8h, v1.8b, #0
195+
; CHECK-NEXT: sshll v4.8h, v2.8b, #0
196+
; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0
197+
; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0
198+
; CHECK-NEXT: smull2 v5.4s, v3.8h, v4.8h
199+
; CHECK-NEXT: smull v3.4s, v3.4h, v4.4h
200+
; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
201+
; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
202+
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v5.4s
203+
; CHECK-NEXT: uzp2 v3.4s, v3.4s, v5.4s
204+
; CHECK-NEXT: uzp1 v6.4s, v1.4s, v4.4s
205+
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
206+
; CHECK-NEXT: add v2.4s, v2.4s, v3.4s
207+
; CHECK-NEXT: add v1.4s, v6.4s, v1.4s
208+
; CHECK-NEXT: uzp1 v3.4s, v2.4s, v1.4s
209+
; CHECK-NEXT: uzp2 v1.4s, v2.4s, v1.4s
210+
; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
211+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
212+
; CHECK-NEXT: ret
213+
%za = sext <16 x i8> %a to <16 x i32>
214+
%zb = sext <16 x i8> %b to <16 x i32>
215+
%m = mul <16 x i32> %za, %zb
216+
%r0 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
217+
%r1 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
218+
%m2 = add <8 x i32> %r0, %r1
219+
%s0 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
220+
%s1 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
221+
%o = add <4 x i32> %s0, %s1
222+
%n = add <4 x i32> %z, %o
223+
ret <4 x i32> %n
224+
}

0 commit comments

Comments
 (0)