Skip to content

Commit 333aad7

Browse files
committed
[AArch64] Add tests for concat binop patterns. NFC
1 parent 4d33082 commit 333aad7

File tree

1 file changed

+182
-0
lines changed

1 file changed

+182
-0
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
5+
define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
6+
; CHECK-LABEL: concat_add:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: add v2.4h, v2.4h, v3.4h
9+
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
10+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
11+
; CHECK-NEXT: ret
12+
%x = add <4 x i16> %a, %b
13+
%y = add <4 x i16> %c, %d
14+
%z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15+
ret <8 x i16> %z
16+
}
17+
18+
define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
19+
; CHECK-LABEL: concat_addtunc:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: add v2.4s, v2.4s, v3.4s
22+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
23+
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
24+
; CHECK-NEXT: ret
25+
%x = add <4 x i32> %a, %b
26+
%y = add <4 x i32> %c, %d
27+
%xt = trunc <4 x i32> %x to <4 x i16>
28+
%yt = trunc <4 x i32> %y to <4 x i16>
29+
%z = shufflevector <4 x i16> %xt, <4 x i16> %yt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
30+
ret <8 x i16> %z
31+
}
32+
33+
define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
34+
; CHECK-LABEL: concat_addtunc2:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: xtn v1.4h, v1.4s
37+
; CHECK-NEXT: xtn v0.4h, v0.4s
38+
; CHECK-NEXT: xtn v2.4h, v2.4s
39+
; CHECK-NEXT: xtn v3.4h, v3.4s
40+
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
41+
; CHECK-NEXT: add v1.4h, v2.4h, v3.4h
42+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
43+
; CHECK-NEXT: ret
44+
%at = trunc <4 x i32> %a to <4 x i16>
45+
%bt = trunc <4 x i32> %b to <4 x i16>
46+
%ct = trunc <4 x i32> %c to <4 x i16>
47+
%dt = trunc <4 x i32> %d to <4 x i16>
48+
%x = add <4 x i16> %at, %bt
49+
%y = add <4 x i16> %ct, %dt
50+
%z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
51+
ret <8 x i16> %z
52+
}
53+
54+
define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
55+
; CHECK-LABEL: concat_sub:
56+
; CHECK: // %bb.0:
57+
; CHECK-NEXT: sub v2.4h, v2.4h, v3.4h
58+
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
59+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
60+
; CHECK-NEXT: ret
61+
%x = sub <4 x i16> %a, %b
62+
%y = sub <4 x i16> %c, %d
63+
%z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
64+
ret <8 x i16> %z
65+
}
66+
67+
define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
68+
; CHECK-LABEL: concat_mul:
69+
; CHECK: // %bb.0:
70+
; CHECK-NEXT: mul v2.4h, v2.4h, v3.4h
71+
; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
72+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
73+
; CHECK-NEXT: ret
74+
%x = mul <4 x i16> %a, %b
75+
%y = mul <4 x i16> %c, %d
76+
%z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
77+
ret <8 x i16> %z
78+
}
79+
80+
define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
81+
; CHECK-LABEL: concat_xor:
82+
; CHECK: // %bb.0:
83+
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
84+
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
85+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
86+
; CHECK-NEXT: ret
87+
%x = xor <4 x i16> %a, %b
88+
%y = xor <4 x i16> %c, %d
89+
%z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
90+
ret <8 x i16> %z
91+
}
92+
93+
define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
94+
; CHECK-LABEL: concat_fadd:
95+
; CHECK: // %bb.0:
96+
; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
97+
; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
98+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
99+
; CHECK-NEXT: ret
100+
%x = fadd <4 x half> %a, %b
101+
%y = fadd <4 x half> %c, %d
102+
%z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
103+
ret <8 x half> %z
104+
}
105+
106+
define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
107+
; CHECK-LABEL: concat_fmul:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: fmul v2.4h, v2.4h, v3.4h
110+
; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
111+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
112+
; CHECK-NEXT: ret
113+
%x = fmul <4 x half> %a, %b
114+
%y = fmul <4 x half> %c, %d
115+
%z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
116+
ret <8 x half> %z
117+
}
118+
119+
define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
120+
; CHECK-LABEL: concat_min:
121+
; CHECK: // %bb.0:
122+
; CHECK-NEXT: fminnm v2.4h, v2.4h, v3.4h
123+
; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
124+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
125+
; CHECK-NEXT: ret
126+
%x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
127+
%y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d)
128+
%z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
129+
ret <8 x half> %z
130+
}
131+
132+
define <8 x half> @concat_minmax(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
133+
; CHECK-LABEL: concat_minmax:
134+
; CHECK: // %bb.0:
135+
; CHECK-NEXT: fmaxnm v2.4h, v2.4h, v3.4h
136+
; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
137+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
138+
; CHECK-NEXT: ret
139+
%x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
140+
%y = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %c, <4 x half> %d)
141+
%z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
142+
ret <8 x half> %z
143+
}
144+
145+
define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
146+
; CHECK-LABEL: signOf_neon:
147+
; CHECK: // %bb.0: // %entry
148+
; CHECK-NEXT: ldp q1, q2, [x0]
149+
; CHECK-NEXT: movi v0.8b, #1
150+
; CHECK-NEXT: ldp q3, q4, [x1]
151+
; CHECK-NEXT: cmhi v5.8h, v1.8h, v3.8h
152+
; CHECK-NEXT: cmhi v6.8h, v2.8h, v4.8h
153+
; CHECK-NEXT: cmhi v1.8h, v3.8h, v1.8h
154+
; CHECK-NEXT: cmhi v2.8h, v4.8h, v2.8h
155+
; CHECK-NEXT: xtn v3.8b, v5.8h
156+
; CHECK-NEXT: xtn v4.8b, v6.8h
157+
; CHECK-NEXT: xtn v1.8b, v1.8h
158+
; CHECK-NEXT: xtn v2.8b, v2.8h
159+
; CHECK-NEXT: and v3.8b, v3.8b, v0.8b
160+
; CHECK-NEXT: and v4.8b, v4.8b, v0.8b
161+
; CHECK-NEXT: orr v0.8b, v3.8b, v1.8b
162+
; CHECK-NEXT: orr v1.8b, v4.8b, v2.8b
163+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
164+
; CHECK-NEXT: ret
165+
entry:
166+
%0 = load <8 x i16>, ptr %a, align 2
167+
%add.ptr = getelementptr inbounds i8, ptr %a, i64 16
168+
%1 = load <8 x i16>, ptr %add.ptr, align 2
169+
%2 = load <8 x i16>, ptr %b, align 2
170+
%add.ptr6 = getelementptr inbounds i8, ptr %b, i64 16
171+
%3 = load <8 x i16>, ptr %add.ptr6, align 2
172+
%cmp.i33 = icmp ugt <8 x i16> %0, %2
173+
%cmp.i31 = icmp ugt <8 x i16> %1, %3
174+
%cmp.i29 = icmp ugt <8 x i16> %2, %0
175+
%cmp.i = icmp ugt <8 x i16> %3, %1
176+
%vmovn.i38.neg = zext <8 x i1> %cmp.i33 to <8 x i8>
177+
%vmovn.i37.neg = zext <8 x i1> %cmp.i31 to <8 x i8>
178+
%4 = select <8 x i1> %cmp.i29, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vmovn.i38.neg
179+
%5 = select <8 x i1> %cmp.i, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vmovn.i37.neg
180+
%or.i = shufflevector <8 x i8> %4, <8 x i8> %5, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
181+
ret <16 x i8> %or.i
182+
}

0 commit comments

Comments
 (0)