Skip to content

Commit ce6e66d

Browse files
committed
[RISCV] Add coverage for an upcoming select lowering change
A select between an add and a sub can be either a vrsub followed by add (reducing register pressure), or a vmacc. The former will be implemented in an upcoming review.
1 parent 71ff486 commit ce6e66d

File tree

1 file changed

+351
-0
lines changed

1 file changed

+351
-0
lines changed
Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
5+
; RUN: | FileCheck %s
6+
7+
8+
define <1 x i32> @select_addsub_v1i32(<1 x i1> %cc, <1 x i32> %a, <1 x i32> %b) {
9+
; CHECK-LABEL: select_addsub_v1i32:
10+
; CHECK: # %bb.0:
11+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
12+
; CHECK-NEXT: vadd.vv v10, v8, v9
13+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
14+
; CHECK-NEXT: vmv1r.v v8, v10
15+
; CHECK-NEXT: ret
16+
%add = sub nsw <1 x i32> %a, %b
17+
%sub = add nsw <1 x i32> %a, %b
18+
%res = select <1 x i1> %cc, <1 x i32> %add, <1 x i32> %sub
19+
ret <1 x i32> %res
20+
}
21+
22+
define <2 x i32> @select_addsub_v2i32(<2 x i1> %cc, <2 x i32> %a, <2 x i32> %b) {
23+
; CHECK-LABEL: select_addsub_v2i32:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
26+
; CHECK-NEXT: vadd.vv v10, v8, v9
27+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
28+
; CHECK-NEXT: vmv1r.v v8, v10
29+
; CHECK-NEXT: ret
30+
%add = sub nsw <2 x i32> %a, %b
31+
%sub = add nsw <2 x i32> %a, %b
32+
%res = select <2 x i1> %cc, <2 x i32> %add, <2 x i32> %sub
33+
ret <2 x i32> %res
34+
}
35+
36+
define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
37+
; CHECK-LABEL: select_addsub_v4i32:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
40+
; CHECK-NEXT: vadd.vv v10, v8, v9
41+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
42+
; CHECK-NEXT: vmv.v.v v8, v10
43+
; CHECK-NEXT: ret
44+
%add = sub nsw <4 x i32> %a, %b
45+
%sub = add nsw <4 x i32> %a, %b
46+
%res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub
47+
ret <4 x i32> %res
48+
}
49+
50+
define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
51+
; CHECK-LABEL: select_addsub_v4i32_select_swapped:
52+
; CHECK: # %bb.0:
53+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
54+
; CHECK-NEXT: vsub.vv v10, v8, v9
55+
; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t
56+
; CHECK-NEXT: vmv.v.v v8, v10
57+
; CHECK-NEXT: ret
58+
%add = sub nsw <4 x i32> %a, %b
59+
%sub = add nsw <4 x i32> %a, %b
60+
%res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add
61+
ret <4 x i32> %res
62+
}
63+
64+
define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
65+
; CHECK-LABEL: select_addsub_v4i32_add_swapped:
66+
; CHECK: # %bb.0:
67+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
68+
; CHECK-NEXT: vadd.vv v10, v9, v8
69+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
70+
; CHECK-NEXT: vmv.v.v v8, v10
71+
; CHECK-NEXT: ret
72+
%add = sub nsw <4 x i32> %a, %b
73+
%sub = add nsw <4 x i32> %b, %a
74+
%res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub
75+
ret <4 x i32> %res
76+
}
77+
78+
define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
79+
; CHECK-LABEL: select_addsub_v4i32_both_swapped:
80+
; CHECK: # %bb.0:
81+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
82+
; CHECK-NEXT: vsub.vv v10, v8, v9
83+
; CHECK-NEXT: vadd.vv v10, v9, v8, v0.t
84+
; CHECK-NEXT: vmv.v.v v8, v10
85+
; CHECK-NEXT: ret
86+
%add = sub nsw <4 x i32> %a, %b
87+
%sub = add nsw <4 x i32> %b, %a
88+
%res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add
89+
ret <4 x i32> %res
90+
}
91+
92+
define <4 x i32> @select_addsub_v4i32_sub_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) {
93+
; CHECK-LABEL: select_addsub_v4i32_sub_swapped:
94+
; CHECK: # %bb.0:
95+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
96+
; CHECK-NEXT: vadd.vv v10, v9, v8
97+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
98+
; CHECK-NEXT: vmv.v.v v8, v10
99+
; CHECK-NEXT: ret
100+
%add = sub nsw <4 x i32> %a, %b
101+
%sub = add nsw <4 x i32> %b, %a
102+
%res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub
103+
ret <4 x i32> %res
104+
}
105+
106+
define <8 x i32> @select_addsub_v8i32(<8 x i1> %cc, <8 x i32> %a, <8 x i32> %b) {
107+
; CHECK-LABEL: select_addsub_v8i32:
108+
; CHECK: # %bb.0:
109+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
110+
; CHECK-NEXT: vadd.vv v12, v8, v10
111+
; CHECK-NEXT: vsub.vv v12, v8, v10, v0.t
112+
; CHECK-NEXT: vmv.v.v v8, v12
113+
; CHECK-NEXT: ret
114+
%add = sub nsw <8 x i32> %a, %b
115+
%sub = add nsw <8 x i32> %a, %b
116+
%res = select <8 x i1> %cc, <8 x i32> %add, <8 x i32> %sub
117+
ret <8 x i32> %res
118+
}
119+
120+
define <16 x i32> @select_addsub_v16i32(<16 x i1> %cc, <16 x i32> %a, <16 x i32> %b) {
121+
; CHECK-LABEL: select_addsub_v16i32:
122+
; CHECK: # %bb.0:
123+
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
124+
; CHECK-NEXT: vadd.vv v16, v8, v12
125+
; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t
126+
; CHECK-NEXT: vmv.v.v v8, v16
127+
; CHECK-NEXT: ret
128+
%add = sub nsw <16 x i32> %a, %b
129+
%sub = add nsw <16 x i32> %a, %b
130+
%res = select <16 x i1> %cc, <16 x i32> %add, <16 x i32> %sub
131+
ret <16 x i32> %res
132+
}
133+
134+
define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> %b) {
135+
; CHECK-LABEL: select_addsub_v32i32:
136+
; CHECK: # %bb.0:
137+
; CHECK-NEXT: li a0, 32
138+
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
139+
; CHECK-NEXT: vadd.vv v24, v8, v16
140+
; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t
141+
; CHECK-NEXT: vmv.v.v v8, v24
142+
; CHECK-NEXT: ret
143+
%add = sub nsw <32 x i32> %a, %b
144+
%sub = add nsw <32 x i32> %a, %b
145+
%res = select <32 x i1> %cc, <32 x i32> %add, <32 x i32> %sub
146+
ret <32 x i32> %res
147+
}
148+
149+
define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> %b) {
150+
; CHECK-LABEL: select_addsub_v64i32:
151+
; CHECK: # %bb.0:
152+
; CHECK-NEXT: addi sp, sp, -16
153+
; CHECK-NEXT: .cfi_def_cfa_offset 16
154+
; CHECK-NEXT: csrr a1, vlenb
155+
; CHECK-NEXT: slli a1, a1, 3
156+
; CHECK-NEXT: mv a2, a1
157+
; CHECK-NEXT: slli a1, a1, 1
158+
; CHECK-NEXT: add a1, a1, a2
159+
; CHECK-NEXT: sub sp, sp, a1
160+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
161+
; CHECK-NEXT: csrr a1, vlenb
162+
; CHECK-NEXT: slli a1, a1, 4
163+
; CHECK-NEXT: add a1, sp, a1
164+
; CHECK-NEXT: addi a1, a1, 16
165+
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
166+
; CHECK-NEXT: li a1, 32
167+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
168+
; CHECK-NEXT: vle32.v v16, (a0)
169+
; CHECK-NEXT: addi a0, a0, 128
170+
; CHECK-NEXT: vle32.v v24, (a0)
171+
; CHECK-NEXT: csrr a0, vlenb
172+
; CHECK-NEXT: slli a0, a0, 3
173+
; CHECK-NEXT: add a0, sp, a0
174+
; CHECK-NEXT: addi a0, a0, 16
175+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
176+
; CHECK-NEXT: vadd.vv v24, v8, v16
177+
; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t
178+
; CHECK-NEXT: addi a0, sp, 16
179+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
180+
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
181+
; CHECK-NEXT: vslidedown.vi v0, v0, 4
182+
; CHECK-NEXT: csrr a0, vlenb
183+
; CHECK-NEXT: slli a0, a0, 3
184+
; CHECK-NEXT: add a0, sp, a0
185+
; CHECK-NEXT: addi a0, a0, 16
186+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
187+
; CHECK-NEXT: csrr a0, vlenb
188+
; CHECK-NEXT: slli a0, a0, 4
189+
; CHECK-NEXT: add a0, sp, a0
190+
; CHECK-NEXT: addi a0, a0, 16
191+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
192+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
193+
; CHECK-NEXT: vadd.vv v16, v16, v8
194+
; CHECK-NEXT: csrr a0, vlenb
195+
; CHECK-NEXT: slli a0, a0, 3
196+
; CHECK-NEXT: add a0, sp, a0
197+
; CHECK-NEXT: addi a0, a0, 16
198+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
199+
; CHECK-NEXT: csrr a0, vlenb
200+
; CHECK-NEXT: slli a0, a0, 4
201+
; CHECK-NEXT: add a0, sp, a0
202+
; CHECK-NEXT: addi a0, a0, 16
203+
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
204+
; CHECK-NEXT: vsub.vv v16, v24, v8, v0.t
205+
; CHECK-NEXT: addi a0, sp, 16
206+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
207+
; CHECK-NEXT: csrr a0, vlenb
208+
; CHECK-NEXT: slli a0, a0, 3
209+
; CHECK-NEXT: mv a1, a0
210+
; CHECK-NEXT: slli a0, a0, 1
211+
; CHECK-NEXT: add a0, a0, a1
212+
; CHECK-NEXT: add sp, sp, a0
213+
; CHECK-NEXT: .cfi_def_cfa sp, 16
214+
; CHECK-NEXT: addi sp, sp, 16
215+
; CHECK-NEXT: .cfi_def_cfa_offset 0
216+
; CHECK-NEXT: ret
217+
%add = sub nsw <64 x i32> %a, %b
218+
%sub = add nsw <64 x i32> %a, %b
219+
%res = select <64 x i1> %cc, <64 x i32> %add, <64 x i32> %sub
220+
ret <64 x i32> %res
221+
}
222+
223+
define <8 x i64> @select_addsub_v8i64(<8 x i1> %cc, <8 x i64> %a, <8 x i64> %b) {
224+
; CHECK-LABEL: select_addsub_v8i64:
225+
; CHECK: # %bb.0:
226+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
227+
; CHECK-NEXT: vadd.vv v16, v8, v12
228+
; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t
229+
; CHECK-NEXT: vmv.v.v v8, v16
230+
; CHECK-NEXT: ret
231+
%add = sub nsw <8 x i64> %a, %b
232+
%sub = add nsw <8 x i64> %a, %b
233+
%res = select <8 x i1> %cc, <8 x i64> %add, <8 x i64> %sub
234+
ret <8 x i64> %res
235+
}
236+
237+
define <8 x i16> @select_addsub_v8i16(<8 x i1> %cc, <8 x i16> %a, <8 x i16> %b) {
238+
; CHECK-LABEL: select_addsub_v8i16:
239+
; CHECK: # %bb.0:
240+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
241+
; CHECK-NEXT: vadd.vv v10, v8, v9
242+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
243+
; CHECK-NEXT: vmv.v.v v8, v10
244+
; CHECK-NEXT: ret
245+
%add = sub nsw <8 x i16> %a, %b
246+
%sub = add nsw <8 x i16> %a, %b
247+
%res = select <8 x i1> %cc, <8 x i16> %add, <8 x i16> %sub
248+
ret <8 x i16> %res
249+
}
250+
251+
define <8 x i8> @select_addsub_v8i8(<8 x i1> %cc, <8 x i8> %a, <8 x i8> %b) {
252+
; CHECK-LABEL: select_addsub_v8i8:
253+
; CHECK: # %bb.0:
254+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
255+
; CHECK-NEXT: vadd.vv v10, v8, v9
256+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
257+
; CHECK-NEXT: vmv1r.v v8, v10
258+
; CHECK-NEXT: ret
259+
%add = sub nsw <8 x i8> %a, %b
260+
%sub = add nsw <8 x i8> %a, %b
261+
%res = select <8 x i1> %cc, <8 x i8> %add, <8 x i8> %sub
262+
ret <8 x i8> %res
263+
}
264+
265+
define <8 x i1> @select_addsub_v8i1(<8 x i1> %cc, <8 x i1> %a, <8 x i1> %b) {
266+
; CHECK-LABEL: select_addsub_v8i1:
267+
; CHECK: # %bb.0:
268+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
269+
; CHECK-NEXT: vmxor.mm v0, v8, v9
270+
; CHECK-NEXT: ret
271+
%add = sub nsw <8 x i1> %a, %b
272+
%sub = add nsw <8 x i1> %a, %b
273+
%res = select <8 x i1> %cc, <8 x i1> %add, <8 x i1> %sub
274+
ret <8 x i1> %res
275+
}
276+
277+
define <8 x i2> @select_addsub_v8i2(<8 x i1> %cc, <8 x i2> %a, <8 x i2> %b) {
278+
; CHECK-LABEL: select_addsub_v8i2:
279+
; CHECK: # %bb.0:
280+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
281+
; CHECK-NEXT: vadd.vv v10, v8, v9
282+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
283+
; CHECK-NEXT: vmv1r.v v8, v10
284+
; CHECK-NEXT: ret
285+
%add = sub nsw <8 x i2> %a, %b
286+
%sub = add nsw <8 x i2> %a, %b
287+
%res = select <8 x i1> %cc, <8 x i2> %add, <8 x i2> %sub
288+
ret <8 x i2> %res
289+
}
290+
291+
define <4 x i32> @select_addsub_v4i32_constmask(<4 x i32> %a, <4 x i32> %b) {
292+
; CHECK-LABEL: select_addsub_v4i32_constmask:
293+
; CHECK: # %bb.0:
294+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
295+
; CHECK-NEXT: vmv.v.i v0, 5
296+
; CHECK-NEXT: vadd.vv v10, v8, v9
297+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
298+
; CHECK-NEXT: vmv.v.v v8, v10
299+
; CHECK-NEXT: ret
300+
%add = sub nsw <4 x i32> %a, %b
301+
%sub = add nsw <4 x i32> %a, %b
302+
%res = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %add, <4 x i32> %sub
303+
ret <4 x i32> %res
304+
}
305+
306+
define <4 x i32> @select_addsub_v4i32_constmask2(<4 x i32> %a, <4 x i32> %b) {
307+
; CHECK-LABEL: select_addsub_v4i32_constmask2:
308+
; CHECK: # %bb.0:
309+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
310+
; CHECK-NEXT: vmv.v.i v0, 5
311+
; CHECK-NEXT: vadd.vv v10, v9, v8
312+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
313+
; CHECK-NEXT: vmv.v.v v8, v10
314+
; CHECK-NEXT: ret
315+
%add = sub nsw <4 x i32> %a, %b
316+
%sub = add nsw <4 x i32> %b, %a
317+
%res = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %add, <4 x i32> %sub
318+
ret <4 x i32> %res
319+
}
320+
321+
; Same pattern as above, but the select is disguised as a shuffle
322+
define <4 x i32> @select_addsub_v4i32_as_shuffle(<4 x i32> %a, <4 x i32> %b) {
323+
; CHECK-LABEL: select_addsub_v4i32_as_shuffle:
324+
; CHECK: # %bb.0:
325+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
326+
; CHECK-NEXT: vmv.v.i v0, 5
327+
; CHECK-NEXT: vadd.vv v10, v8, v9
328+
; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
329+
; CHECK-NEXT: vmv.v.v v8, v10
330+
; CHECK-NEXT: ret
331+
%add = sub nsw <4 x i32> %a, %b
332+
%sub = add nsw <4 x i32> %a, %b
333+
%res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
334+
ret <4 x i32> %res
335+
}
336+
337+
; Same pattern as above, but the select is disguised as a shuffle
338+
define <4 x i32> @select_addsub_v4i32_as_shuffle2(<4 x i32> %a, <4 x i32> %b) {
339+
; CHECK-LABEL: select_addsub_v4i32_as_shuffle2:
340+
; CHECK: # %bb.0:
341+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
342+
; CHECK-NEXT: vmv.v.i v0, 5
343+
; CHECK-NEXT: vadd.vv v10, v8, v9
344+
; CHECK-NEXT: vsub.vv v10, v9, v8, v0.t
345+
; CHECK-NEXT: vmv.v.v v8, v10
346+
; CHECK-NEXT: ret
347+
%add = sub nsw <4 x i32> %b, %a
348+
%sub = add nsw <4 x i32> %a, %b
349+
%res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
350+
ret <4 x i32> %res
351+
}

0 commit comments

Comments
 (0)