Skip to content

Commit 4635801

Browse files
Dinar TemirbulatovDinar Temirbulatov
authored andcommitted
[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub.
Allow to fold or/and-and to BSL instuction for scalable vectors.
1 parent 3e64f8a commit 4635801

File tree

2 files changed

+258
-2
lines changed

2 files changed

+258
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17927,11 +17927,13 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1792717927
} else
1792817928
continue;
1792917929

17930-
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
17930+
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()) &&
17931+
!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
1793117932
continue;
1793217933

1793317934
// Constant ones is always righthand operand of the Add.
17934-
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
17935+
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()) &&
17936+
!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
1793517937
continue;
1793617938

1793717939
if (Sub.getOperand(1) != Add.getOperand(0))
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64"
5+
6+
; Check that an expanded vbsl(vneg(pre_cond), left, right) lowers to a VBSL
7+
; during ISEL.
8+
;
9+
; Subtly different from a plain vector bit select: operand representing the
10+
; condition has been negated (-v, not to be confused with bitwise_not(v)).
11+
12+
; Each vbsl_neg_cond_xxxx tests one of the 16 permutations of the operands.
13+
14+
define <vscale x 4 x i32> @vbsl_neg_cond_0000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
15+
; CHECK-LABEL: vbsl_neg_cond_0000:
16+
; CHECK: // %bb.0:
17+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
18+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
19+
; CHECK-NEXT: mov z0.d, z1.d
20+
; CHECK-NEXT: ret
21+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
22+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
23+
%left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
24+
%right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
25+
%bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
26+
ret <vscale x 4 x i32> %bsl0000
27+
}
28+
29+
define <vscale x 4 x i32> @vbsl_neg_cond_0001(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
30+
; CHECK-LABEL: vbsl_neg_cond_0001:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
33+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
34+
; CHECK-NEXT: mov z0.d, z1.d
35+
; CHECK-NEXT: ret
36+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
37+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
38+
%left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
39+
%right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
40+
%bsl0001 = or <vscale x 4 x i32> %right_bits_0, %left_bits_1
41+
ret <vscale x 4 x i32> %bsl0001
42+
}
43+
44+
define <vscale x 4 x i32> @vbsl_neg_cond_0010(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
45+
; CHECK-LABEL: vbsl_neg_cond_0010:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
48+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
49+
; CHECK-NEXT: mov z0.d, z1.d
50+
; CHECK-NEXT: ret
51+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
52+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
53+
%left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
54+
%right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
55+
%bsl0010 = or <vscale x 4 x i32> %right_bits_1, %left_bits_0
56+
ret <vscale x 4 x i32> %bsl0010
57+
}
58+
59+
define <vscale x 4 x i32> @vbsl_neg_cond_0011(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
60+
; CHECK-LABEL: vbsl_neg_cond_0011:
61+
; CHECK: // %bb.0:
62+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
63+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
64+
; CHECK-NEXT: mov z0.d, z1.d
65+
; CHECK-NEXT: ret
66+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
67+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
68+
%left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
69+
%right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
70+
%bsl0011 = or <vscale x 4 x i32> %right_bits_1, %left_bits_1
71+
ret <vscale x 4 x i32> %bsl0011
72+
}
73+
74+
define <vscale x 4 x i32> @vbsl_neg_cond_0100(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
75+
; CHECK-LABEL: vbsl_neg_cond_0100:
76+
; CHECK: // %bb.0:
77+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
78+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
79+
; CHECK-NEXT: mov z0.d, z1.d
80+
; CHECK-NEXT: ret
81+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
82+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
83+
%left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
84+
%right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
85+
%bsl0100 = or <vscale x 4 x i32> %left_bits_0, %right_bits_0
86+
ret <vscale x 4 x i32> %bsl0100
87+
}
88+
89+
define <vscale x 4 x i32> @vbsl_neg_cond_0101(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
90+
; CHECK-LABEL: vbsl_neg_cond_0101:
91+
; CHECK: // %bb.0:
92+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
93+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
94+
; CHECK-NEXT: mov z0.d, z1.d
95+
; CHECK-NEXT: ret
96+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
97+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
98+
%left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
99+
%right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
100+
%bsl0101 = or <vscale x 4 x i32> %left_bits_0, %right_bits_1
101+
ret <vscale x 4 x i32> %bsl0101
102+
}
103+
104+
define <vscale x 4 x i32> @vbsl_neg_cond_0110(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
105+
; CHECK-LABEL: vbsl_neg_cond_0110:
106+
; CHECK: // %bb.0:
107+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
108+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
109+
; CHECK-NEXT: mov z0.d, z1.d
110+
; CHECK-NEXT: ret
111+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
112+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
113+
%left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
114+
%right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
115+
%bsl0110 = or <vscale x 4 x i32> %left_bits_1, %right_bits_0
116+
ret <vscale x 4 x i32> %bsl0110
117+
}
118+
119+
define <vscale x 4 x i32> @vbsl_neg_cond_0111(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
120+
; CHECK-LABEL: vbsl_neg_cond_0111:
121+
; CHECK: // %bb.0:
122+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
123+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
124+
; CHECK-NEXT: mov z0.d, z1.d
125+
; CHECK-NEXT: ret
126+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
127+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
128+
%left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
129+
%right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
130+
%bsl0111 = or <vscale x 4 x i32> %left_bits_1, %right_bits_1
131+
ret <vscale x 4 x i32> %bsl0111
132+
}
133+
134+
define <vscale x 4 x i32> @vbsl_neg_cond_1000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
135+
; CHECK-LABEL: vbsl_neg_cond_1000:
136+
; CHECK: // %bb.0:
137+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
138+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
139+
; CHECK-NEXT: mov z0.d, z2.d
140+
; CHECK-NEXT: ret
141+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
142+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
143+
%flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
144+
%flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
145+
%bsl1000 = or <vscale x 4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_0
146+
ret <vscale x 4 x i32> %bsl1000
147+
}
148+
149+
define <vscale x 4 x i32> @vbsl_neg_cond_1001(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
150+
; CHECK-LABEL: vbsl_neg_cond_1001:
151+
; CHECK: // %bb.0:
152+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
153+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
154+
; CHECK-NEXT: mov z0.d, z2.d
155+
; CHECK-NEXT: ret
156+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
157+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
158+
%flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
159+
%flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
160+
%bsl1001 = or <vscale x 4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_1
161+
ret <vscale x 4 x i32> %bsl1001
162+
}
163+
164+
define <vscale x 4 x i32> @vbsl_neg_cond_1010(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
165+
; CHECK-LABEL: vbsl_neg_cond_1010:
166+
; CHECK: // %bb.0:
167+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
168+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
169+
; CHECK-NEXT: mov z0.d, z2.d
170+
; CHECK-NEXT: ret
171+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
172+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
173+
%flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
174+
%flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
175+
%bsl1010 = or <vscale x 4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_0
176+
ret <vscale x 4 x i32> %bsl1010
177+
}
178+
179+
define <vscale x 4 x i32> @vbsl_neg_cond_1011(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
180+
; CHECK-LABEL: vbsl_neg_cond_1011:
181+
; CHECK: // %bb.0:
182+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
183+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
184+
; CHECK-NEXT: mov z0.d, z2.d
185+
; CHECK-NEXT: ret
186+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
187+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
188+
%flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
189+
%flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
190+
%bsl1011 = or <vscale x 4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_1
191+
ret <vscale x 4 x i32> %bsl1011
192+
}
193+
194+
define <vscale x 4 x i32> @vbsl_neg_cond_1100(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
195+
; CHECK-LABEL: vbsl_neg_cond_1100:
196+
; CHECK: // %bb.0:
197+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
198+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
199+
; CHECK-NEXT: mov z0.d, z2.d
200+
; CHECK-NEXT: ret
201+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
202+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
203+
%flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
204+
%flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
205+
%bsl1100 = or <vscale x 4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_0
206+
ret <vscale x 4 x i32> %bsl1100
207+
}
208+
209+
define <vscale x 4 x i32> @vbsl_neg_cond_1101(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
210+
; CHECK-LABEL: vbsl_neg_cond_1101:
211+
; CHECK: // %bb.0:
212+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
213+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
214+
; CHECK-NEXT: mov z0.d, z2.d
215+
; CHECK-NEXT: ret
216+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
217+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
218+
%flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
219+
%flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
220+
%bsl1101 = or <vscale x 4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_1
221+
ret <vscale x 4 x i32> %bsl1101
222+
}
223+
224+
define <vscale x 4 x i32> @vbsl_neg_cond_1110(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
225+
; CHECK-LABEL: vbsl_neg_cond_1110:
226+
; CHECK: // %bb.0:
227+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
228+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
229+
; CHECK-NEXT: mov z0.d, z2.d
230+
; CHECK-NEXT: ret
231+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
232+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
233+
%flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
234+
%flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
235+
%bsl1110 = or <vscale x 4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_0
236+
ret <vscale x 4 x i32> %bsl1110
237+
}
238+
239+
define <vscale x 4 x i32> @vbsl_neg_cond_1111(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
240+
; CHECK-LABEL: vbsl_neg_cond_1111:
241+
; CHECK: // %bb.0:
242+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
243+
; CHECK-NEXT: bsl z2.d, z2.d, z1.d, z0.d
244+
; CHECK-NEXT: mov z0.d, z2.d
245+
; CHECK-NEXT: ret
246+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
247+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
248+
%flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
249+
%flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
250+
%bsl1111 = or <vscale x 4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_1
251+
ret <vscale x 4 x i32> %bsl1111
252+
}
253+
254+
attributes #0 = { "target-features"="+sve2" }

0 commit comments

Comments
 (0)