Skip to content

Commit 39641b1

Browse files
committed
[AArch64][SME]: Add precursory tests for D138519
Add testing files: - fp-select.ll - int-select.ll - fp-vselect.ll - int-vselect.ll
1 parent 5eec8df commit 39641b1

4 files changed

+1314
-0
lines changed
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) #0 {
7+
; CHECK-LABEL: select_v2f16:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: sub sp, sp, #16
10+
; CHECK-NEXT: .cfi_def_cfa_offset 16
11+
; CHECK-NEXT: tst w0, #0x1
12+
; CHECK-NEXT: adrp x9, .LCPI0_0
13+
; CHECK-NEXT: csetm w8, ne
14+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
15+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16+
; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0]
17+
; CHECK-NEXT: strh w8, [sp, #14]
18+
; CHECK-NEXT: strh w8, [sp, #12]
19+
; CHECK-NEXT: strh w8, [sp, #10]
20+
; CHECK-NEXT: strh w8, [sp, #8]
21+
; CHECK-NEXT: ldr d2, [sp, #8]
22+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
23+
; CHECK-NEXT: and z0.d, z0.d, z2.d
24+
; CHECK-NEXT: and z1.d, z1.d, z3.d
25+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
26+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
27+
; CHECK-NEXT: add sp, sp, #16
28+
; CHECK-NEXT: ret
29+
%sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2
30+
ret <2 x half> %sel
31+
}
32+
33+
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 {
34+
; CHECK-LABEL: select_v4f16:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: sub sp, sp, #16
37+
; CHECK-NEXT: .cfi_def_cfa_offset 16
38+
; CHECK-NEXT: tst w0, #0x1
39+
; CHECK-NEXT: adrp x9, .LCPI1_0
40+
; CHECK-NEXT: csetm w8, ne
41+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
42+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
43+
; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0]
44+
; CHECK-NEXT: strh w8, [sp, #14]
45+
; CHECK-NEXT: strh w8, [sp, #12]
46+
; CHECK-NEXT: strh w8, [sp, #10]
47+
; CHECK-NEXT: strh w8, [sp, #8]
48+
; CHECK-NEXT: ldr d2, [sp, #8]
49+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
50+
; CHECK-NEXT: and z0.d, z0.d, z2.d
51+
; CHECK-NEXT: and z1.d, z1.d, z3.d
52+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
53+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
54+
; CHECK-NEXT: add sp, sp, #16
55+
; CHECK-NEXT: ret
56+
%sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2
57+
ret <4 x half> %sel
58+
}
59+
60+
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 {
61+
; CHECK-LABEL: select_v8f16:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: sub sp, sp, #16
64+
; CHECK-NEXT: .cfi_def_cfa_offset 16
65+
; CHECK-NEXT: tst w0, #0x1
66+
; CHECK-NEXT: adrp x9, .LCPI2_0
67+
; CHECK-NEXT: csetm w8, ne
68+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
69+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
70+
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0]
71+
; CHECK-NEXT: strh w8, [sp, #14]
72+
; CHECK-NEXT: strh w8, [sp, #12]
73+
; CHECK-NEXT: strh w8, [sp, #10]
74+
; CHECK-NEXT: strh w8, [sp, #8]
75+
; CHECK-NEXT: strh w8, [sp, #6]
76+
; CHECK-NEXT: strh w8, [sp, #4]
77+
; CHECK-NEXT: strh w8, [sp, #2]
78+
; CHECK-NEXT: strh w8, [sp]
79+
; CHECK-NEXT: ldr q2, [sp]
80+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
81+
; CHECK-NEXT: and z0.d, z0.d, z2.d
82+
; CHECK-NEXT: and z1.d, z1.d, z3.d
83+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
84+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
85+
; CHECK-NEXT: add sp, sp, #16
86+
; CHECK-NEXT: ret
87+
%sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2
88+
ret <8 x half> %sel
89+
}
90+
91+
define void @select_v16f16(ptr %a, ptr %b, i1 %mask) #0 {
92+
; CHECK-LABEL: select_v16f16:
93+
; CHECK: // %bb.0:
94+
; CHECK-NEXT: sub sp, sp, #16
95+
; CHECK-NEXT: .cfi_def_cfa_offset 16
96+
; CHECK-NEXT: tst w2, #0x1
97+
; CHECK-NEXT: adrp x9, .LCPI3_0
98+
; CHECK-NEXT: csetm w8, ne
99+
; CHECK-NEXT: ldr q0, [x0]
100+
; CHECK-NEXT: ldr q1, [x0, #16]
101+
; CHECK-NEXT: ldr q2, [x1]
102+
; CHECK-NEXT: ldr q3, [x1, #16]
103+
; CHECK-NEXT: strh w8, [sp, #14]
104+
; CHECK-NEXT: strh w8, [sp, #12]
105+
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0]
106+
; CHECK-NEXT: strh w8, [sp, #10]
107+
; CHECK-NEXT: strh w8, [sp, #8]
108+
; CHECK-NEXT: strh w8, [sp, #6]
109+
; CHECK-NEXT: strh w8, [sp, #4]
110+
; CHECK-NEXT: strh w8, [sp, #2]
111+
; CHECK-NEXT: strh w8, [sp]
112+
; CHECK-NEXT: ldr q4, [sp]
113+
; CHECK-NEXT: eor z5.d, z4.d, z5.d
114+
; CHECK-NEXT: and z1.d, z1.d, z4.d
115+
; CHECK-NEXT: and z0.d, z0.d, z4.d
116+
; CHECK-NEXT: and z2.d, z2.d, z5.d
117+
; CHECK-NEXT: and z3.d, z3.d, z5.d
118+
; CHECK-NEXT: orr z0.d, z0.d, z2.d
119+
; CHECK-NEXT: orr z1.d, z1.d, z3.d
120+
; CHECK-NEXT: stp q0, q1, [x0]
121+
; CHECK-NEXT: add sp, sp, #16
122+
; CHECK-NEXT: ret
123+
%op1 = load volatile <16 x half>, ptr %a
124+
%op2 = load volatile <16 x half>, ptr %b
125+
%sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2
126+
store <16 x half> %sel, ptr %a
127+
ret void
128+
}
129+
130+
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #0 {
131+
; CHECK-LABEL: select_v2f32:
132+
; CHECK: // %bb.0:
133+
; CHECK-NEXT: sub sp, sp, #16
134+
; CHECK-NEXT: .cfi_def_cfa_offset 16
135+
; CHECK-NEXT: tst w0, #0x1
136+
; CHECK-NEXT: adrp x9, .LCPI4_0
137+
; CHECK-NEXT: csetm w8, ne
138+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
139+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
140+
; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0]
141+
; CHECK-NEXT: stp w8, w8, [sp, #8]
142+
; CHECK-NEXT: ldr d2, [sp, #8]
143+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
144+
; CHECK-NEXT: and z0.d, z0.d, z2.d
145+
; CHECK-NEXT: and z1.d, z1.d, z3.d
146+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
147+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
148+
; CHECK-NEXT: add sp, sp, #16
149+
; CHECK-NEXT: ret
150+
%sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2
151+
ret <2 x float> %sel
152+
}
153+
154+
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #0 {
155+
; CHECK-LABEL: select_v4f32:
156+
; CHECK: // %bb.0:
157+
; CHECK-NEXT: sub sp, sp, #16
158+
; CHECK-NEXT: .cfi_def_cfa_offset 16
159+
; CHECK-NEXT: tst w0, #0x1
160+
; CHECK-NEXT: adrp x9, .LCPI5_0
161+
; CHECK-NEXT: csetm w8, ne
162+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
163+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
164+
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_0]
165+
; CHECK-NEXT: stp w8, w8, [sp, #8]
166+
; CHECK-NEXT: stp w8, w8, [sp]
167+
; CHECK-NEXT: ldr q2, [sp]
168+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
169+
; CHECK-NEXT: and z0.d, z0.d, z2.d
170+
; CHECK-NEXT: and z1.d, z1.d, z3.d
171+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
172+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
173+
; CHECK-NEXT: add sp, sp, #16
174+
; CHECK-NEXT: ret
175+
%sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2
176+
ret <4 x float> %sel
177+
}
178+
179+
define void @select_v8f32(ptr %a, ptr %b, i1 %mask) #0 {
180+
; CHECK-LABEL: select_v8f32:
181+
; CHECK: // %bb.0:
182+
; CHECK-NEXT: sub sp, sp, #16
183+
; CHECK-NEXT: .cfi_def_cfa_offset 16
184+
; CHECK-NEXT: tst w2, #0x1
185+
; CHECK-NEXT: adrp x9, .LCPI6_0
186+
; CHECK-NEXT: csetm w8, ne
187+
; CHECK-NEXT: ldr q0, [x0]
188+
; CHECK-NEXT: ldr q1, [x0, #16]
189+
; CHECK-NEXT: ldr q2, [x1]
190+
; CHECK-NEXT: ldr q3, [x1, #16]
191+
; CHECK-NEXT: stp w8, w8, [sp, #8]
192+
; CHECK-NEXT: stp w8, w8, [sp]
193+
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI6_0]
194+
; CHECK-NEXT: ldr q4, [sp]
195+
; CHECK-NEXT: eor z5.d, z4.d, z5.d
196+
; CHECK-NEXT: and z1.d, z1.d, z4.d
197+
; CHECK-NEXT: and z0.d, z0.d, z4.d
198+
; CHECK-NEXT: and z2.d, z2.d, z5.d
199+
; CHECK-NEXT: and z3.d, z3.d, z5.d
200+
; CHECK-NEXT: orr z0.d, z0.d, z2.d
201+
; CHECK-NEXT: orr z1.d, z1.d, z3.d
202+
; CHECK-NEXT: stp q0, q1, [x0]
203+
; CHECK-NEXT: add sp, sp, #16
204+
; CHECK-NEXT: ret
205+
%op1 = load volatile <8 x float>, ptr %a
206+
%op2 = load volatile <8 x float>, ptr %b
207+
%sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2
208+
store <8 x float> %sel, ptr %a
209+
ret void
210+
}
211+
212+
define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask) #0 {
213+
; CHECK-LABEL: select_v1f64:
214+
; CHECK: // %bb.0:
215+
; CHECK-NEXT: tst w0, #0x1
216+
; CHECK-NEXT: mov x9, #-1
217+
; CHECK-NEXT: csetm x8, ne
218+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
219+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
220+
; CHECK-NEXT: fmov d3, x9
221+
; CHECK-NEXT: fmov d2, x8
222+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
223+
; CHECK-NEXT: and z0.d, z0.d, z2.d
224+
; CHECK-NEXT: and z1.d, z1.d, z3.d
225+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
226+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
227+
; CHECK-NEXT: ret
228+
%sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
229+
ret <1 x double> %sel
230+
}
231+
232+
define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) #0 {
233+
; CHECK-LABEL: select_v2f64:
234+
; CHECK: // %bb.0:
235+
; CHECK-NEXT: tst w0, #0x1
236+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
237+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
238+
; CHECK-NEXT: adrp x9, .LCPI8_0
239+
; CHECK-NEXT: csetm x8, ne
240+
; CHECK-NEXT: stp x8, x8, [sp, #-16]!
241+
; CHECK-NEXT: .cfi_def_cfa_offset 16
242+
; CHECK-NEXT: ldr q2, [sp]
243+
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI8_0]
244+
; CHECK-NEXT: and z0.d, z0.d, z2.d
245+
; CHECK-NEXT: eor z3.d, z2.d, z3.d
246+
; CHECK-NEXT: and z1.d, z1.d, z3.d
247+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
248+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
249+
; CHECK-NEXT: add sp, sp, #16
250+
; CHECK-NEXT: ret
251+
%sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2
252+
ret <2 x double> %sel
253+
}
254+
255+
define void @select_v4f64(ptr %a, ptr %b, i1 %mask) #0 {
256+
; CHECK-LABEL: select_v4f64:
257+
; CHECK: // %bb.0:
258+
; CHECK-NEXT: tst w2, #0x1
259+
; CHECK-NEXT: ldr q0, [x0]
260+
; CHECK-NEXT: csetm x8, ne
261+
; CHECK-NEXT: ldr q1, [x0, #16]
262+
; CHECK-NEXT: ldr q2, [x1]
263+
; CHECK-NEXT: adrp x9, .LCPI9_0
264+
; CHECK-NEXT: ldr q3, [x1, #16]
265+
; CHECK-NEXT: stp x8, x8, [sp, #-16]!
266+
; CHECK-NEXT: .cfi_def_cfa_offset 16
267+
; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_0]
268+
; CHECK-NEXT: ldr q5, [sp]
269+
; CHECK-NEXT: eor z4.d, z5.d, z4.d
270+
; CHECK-NEXT: and z1.d, z1.d, z5.d
271+
; CHECK-NEXT: and z0.d, z0.d, z5.d
272+
; CHECK-NEXT: and z2.d, z2.d, z4.d
273+
; CHECK-NEXT: and z3.d, z3.d, z4.d
274+
; CHECK-NEXT: orr z0.d, z0.d, z2.d
275+
; CHECK-NEXT: orr z1.d, z1.d, z3.d
276+
; CHECK-NEXT: stp q0, q1, [x0]
277+
; CHECK-NEXT: add sp, sp, #16
278+
; CHECK-NEXT: ret
279+
%op1 = load volatile <4 x double>, ptr %a
280+
%op2 = load volatile <4 x double>, ptr %b
281+
%sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2
282+
store <4 x double> %sel, ptr %a
283+
ret void
284+
}
285+
286+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)