Skip to content

Commit 2ec13c5

Browse files
authored
[AArch64][SVE] Add patterns for bit-select instructions. (llvm#138689)
This patch adds patterns to select SVE2 bit-sel instructions such as BSL from (or (and a, c), (and b, (vnot c)))) and other similar patterns. For example: ```cpp svuint64_t bsl(svuint64_t a, svuint64_t b, svuint64_t c) { return (a & c) | (b & ~c); } ``` Currently: ```gas bsl: and z0.d, z2.d, z0.d bic z1.d, z1.d, z2.d orr z0.d, z0.d, z1.d ret ``` Becomes: ```gas bsl: bsl z0.d, z0.d, z1.d, z2.d ret ```
1 parent af03d6b commit 2ec13c5

File tree

3 files changed

+248
-7
lines changed

3 files changed

+248
-7
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21997,6 +21997,30 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
2199721997
return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp);
2199821998
}
2199921999

22000+
static SDValue combineSVEBitSel(unsigned IID, SDNode *N, SelectionDAG &DAG) {
22001+
SDLoc DL(N);
22002+
EVT VT = N->getValueType(0);
22003+
SDValue Op1 = N->getOperand(1);
22004+
SDValue Op2 = N->getOperand(2);
22005+
SDValue Op3 = N->getOperand(3);
22006+
22007+
switch (IID) {
22008+
default:
22009+
llvm_unreachable("Called with wrong intrinsic!");
22010+
case Intrinsic::aarch64_sve_bsl:
22011+
return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, Op2);
22012+
case Intrinsic::aarch64_sve_bsl1n:
22013+
return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, DAG.getNOT(DL, Op1, VT),
22014+
Op2);
22015+
case Intrinsic::aarch64_sve_bsl2n:
22016+
return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1,
22017+
DAG.getNOT(DL, Op2, VT));
22018+
case Intrinsic::aarch64_sve_nbsl:
22019+
return DAG.getNOT(DL, DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, Op2),
22020+
VT);
22021+
}
22022+
}
22023+
2200022024
static SDValue performIntrinsicCombine(SDNode *N,
2200122025
TargetLowering::DAGCombinerInfo &DCI,
2200222026
const AArch64Subtarget *Subtarget) {
@@ -22319,6 +22343,11 @@ static SDValue performIntrinsicCombine(SDNode *N,
2231922343
AArch64CC::LAST_ACTIVE);
2232022344
case Intrinsic::aarch64_sve_whilelo:
2232122345
return tryCombineWhileLo(N, DCI, Subtarget);
22346+
case Intrinsic::aarch64_sve_bsl:
22347+
case Intrinsic::aarch64_sve_bsl1n:
22348+
case Intrinsic::aarch64_sve_bsl2n:
22349+
case Intrinsic::aarch64_sve_nbsl:
22350+
return combineSVEBitSel(IID, N, DAG);
2232222351
}
2232322352
return SDValue();
2232422353
}

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -545,12 +545,18 @@ def AArch64umulh : PatFrag<(ops node:$op1, node:$op2),
545545

546546

547547
def AArch64bsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
548-
[(int_aarch64_sve_bsl node:$Op1, node:$Op2, node:$Op3),
549-
(AArch64bsp node:$Op3, node:$Op1, node:$Op2)]>;
548+
[(AArch64bsp node:$Op3, node:$Op1, node:$Op2),
549+
(or (and node:$Op1, node:$Op3), (and node:$Op2, (vnot node:$Op3)))]>;
550550

551-
def AArch64nbsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
552-
[(int_aarch64_sve_nbsl node:$Op1, node:$Op2, node:$Op3),
553-
(vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2))]>;
551+
def AArch64bsl1n : PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
552+
(AArch64bsl (vnot node:$Op1), node:$Op2, node:$Op3)>;
553+
554+
def AArch64bsl2n : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
555+
[(AArch64bsl node:$Op1, (vnot node:$Op2), node:$Op3),
556+
(or (and node:$Op1, node:$Op3), (vnot (or node:$Op2, node:$Op3)))]>;
557+
558+
def AArch64nbsl : PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
559+
(vnot (AArch64bsl node:$Op1, node:$Op2, node:$Op3))>;
554560

555561

556562
let Predicates = [HasSVE] in {
@@ -3934,8 +3940,8 @@ let Predicates = [HasSVE2_or_SME] in {
39343940
defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>;
39353941
defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>;
39363942
defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", AArch64bsl>;
3937-
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
3938-
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
3943+
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", AArch64bsl1n>;
3944+
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", AArch64bsl2n>;
39393945
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>;
39403946

39413947
// SVE2 bitwise xor and rotate right by immediate

llvm/test/CodeGen/AArch64/sve2-bsl.ll

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,209 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
9393
%4 = xor <vscale x 2 x i64> %3, splat(i64 -1)
9494
ret <vscale x 2 x i64> %4
9595
}
96+
97+
; Test BSL/NBSL/BSL1N/BSL2N code generation for:
98+
; #define BSL(x,y,z) ( ((x) & (z)) | ( (y) & ~(z)))
99+
; #define NBSL(x,y,z) (~(((x) & (z)) | ( (y) & ~(z))))
100+
; #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z)))
101+
; #define BSL2N(x,y,z) ( ((x) & (z)) | (~(y) & ~(z)))
102+
103+
define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
104+
; CHECK-LABEL: codegen_bsl_i8:
105+
; CHECK: // %bb.0:
106+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
107+
; CHECK-NEXT: ret
108+
%4 = and <vscale x 16 x i8> %2, %0
109+
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
110+
%6 = and <vscale x 16 x i8> %1, %5
111+
%7 = or <vscale x 16 x i8> %4, %6
112+
ret <vscale x 16 x i8> %7
113+
}
114+
115+
define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
116+
; CHECK-LABEL: codegen_nbsl_i8:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
119+
; CHECK-NEXT: ret
120+
%4 = and <vscale x 16 x i8> %2, %0
121+
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
122+
%6 = and <vscale x 16 x i8> %1, %5
123+
%7 = or <vscale x 16 x i8> %4, %6
124+
%8 = xor <vscale x 16 x i8> %7, splat (i8 -1)
125+
ret <vscale x 16 x i8> %8
126+
}
127+
128+
define <vscale x 16 x i8> @codegen_bsl1n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
129+
; CHECK-LABEL: codegen_bsl1n_i8:
130+
; CHECK: // %bb.0:
131+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
132+
; CHECK-NEXT: ret
133+
%4 = xor <vscale x 16 x i8> %0, splat (i8 -1)
134+
%5 = and <vscale x 16 x i8> %2, %4
135+
%6 = xor <vscale x 16 x i8> %2, splat (i8 -1)
136+
%7 = and <vscale x 16 x i8> %1, %6
137+
%8 = or <vscale x 16 x i8> %5, %7
138+
ret <vscale x 16 x i8> %8
139+
}
140+
141+
define <vscale x 16 x i8> @codegen_bsl2n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
142+
; CHECK-LABEL: codegen_bsl2n_i8:
143+
; CHECK: // %bb.0:
144+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
145+
; CHECK-NEXT: ret
146+
%4 = and <vscale x 16 x i8> %2, %0
147+
%5 = or <vscale x 16 x i8> %2, %1
148+
%6 = xor <vscale x 16 x i8> %5, splat (i8 -1)
149+
%7 = or <vscale x 16 x i8> %4, %6
150+
ret <vscale x 16 x i8> %7
151+
}
152+
153+
define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
154+
; CHECK-LABEL: codegen_bsl_i16:
155+
; CHECK: // %bb.0:
156+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
157+
; CHECK-NEXT: ret
158+
%4 = and <vscale x 8 x i16> %2, %0
159+
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
160+
%6 = and <vscale x 8 x i16> %1, %5
161+
%7 = or <vscale x 8 x i16> %4, %6
162+
ret <vscale x 8 x i16> %7
163+
}
164+
165+
define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
166+
; CHECK-LABEL: codegen_nbsl_i16:
167+
; CHECK: // %bb.0:
168+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
169+
; CHECK-NEXT: ret
170+
%4 = and <vscale x 8 x i16> %2, %0
171+
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
172+
%6 = and <vscale x 8 x i16> %1, %5
173+
%7 = or <vscale x 8 x i16> %4, %6
174+
%8 = xor <vscale x 8 x i16> %7, splat (i16 -1)
175+
ret <vscale x 8 x i16> %8
176+
}
177+
178+
define <vscale x 8 x i16> @codegen_bsl1n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
179+
; CHECK-LABEL: codegen_bsl1n_i16:
180+
; CHECK: // %bb.0:
181+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
182+
; CHECK-NEXT: ret
183+
%4 = xor <vscale x 8 x i16> %0, splat (i16 -1)
184+
%5 = and <vscale x 8 x i16> %2, %4
185+
%6 = xor <vscale x 8 x i16> %2, splat (i16 -1)
186+
%7 = and <vscale x 8 x i16> %1, %6
187+
%8 = or <vscale x 8 x i16> %5, %7
188+
ret <vscale x 8 x i16> %8
189+
}
190+
191+
define <vscale x 8 x i16> @codegen_bsl2n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
192+
; CHECK-LABEL: codegen_bsl2n_i16:
193+
; CHECK: // %bb.0:
194+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
195+
; CHECK-NEXT: ret
196+
%4 = and <vscale x 8 x i16> %2, %0
197+
%5 = or <vscale x 8 x i16> %2, %1
198+
%6 = xor <vscale x 8 x i16> %5, splat (i16 -1)
199+
%7 = or <vscale x 8 x i16> %4, %6
200+
ret <vscale x 8 x i16> %7
201+
}
202+
203+
define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
204+
; CHECK-LABEL: codegen_bsl_i32:
205+
; CHECK: // %bb.0:
206+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
207+
; CHECK-NEXT: ret
208+
%4 = and <vscale x 4 x i32> %2, %0
209+
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
210+
%6 = and <vscale x 4 x i32> %1, %5
211+
%7 = or <vscale x 4 x i32> %4, %6
212+
ret <vscale x 4 x i32> %7
213+
}
214+
215+
define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
216+
; CHECK-LABEL: codegen_nbsl_i32:
217+
; CHECK: // %bb.0:
218+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
219+
; CHECK-NEXT: ret
220+
%4 = and <vscale x 4 x i32> %2, %0
221+
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
222+
%6 = and <vscale x 4 x i32> %1, %5
223+
%7 = or <vscale x 4 x i32> %4, %6
224+
%8 = xor <vscale x 4 x i32> %7, splat (i32 -1)
225+
ret <vscale x 4 x i32> %8
226+
}
227+
228+
define <vscale x 4 x i32> @codegen_bsl1n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
229+
; CHECK-LABEL: codegen_bsl1n_i32:
230+
; CHECK: // %bb.0:
231+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
232+
; CHECK-NEXT: ret
233+
%4 = xor <vscale x 4 x i32> %0, splat (i32 -1)
234+
%5 = and <vscale x 4 x i32> %2, %4
235+
%6 = xor <vscale x 4 x i32> %2, splat (i32 -1)
236+
%7 = and <vscale x 4 x i32> %1, %6
237+
%8 = or <vscale x 4 x i32> %5, %7
238+
ret <vscale x 4 x i32> %8
239+
}
240+
241+
define <vscale x 4 x i32> @codegen_bsl2n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
242+
; CHECK-LABEL: codegen_bsl2n_i32:
243+
; CHECK: // %bb.0:
244+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
245+
; CHECK-NEXT: ret
246+
%4 = and <vscale x 4 x i32> %2, %0
247+
%5 = or <vscale x 4 x i32> %2, %1
248+
%6 = xor <vscale x 4 x i32> %5, splat (i32 -1)
249+
%7 = or <vscale x 4 x i32> %4, %6
250+
ret <vscale x 4 x i32> %7
251+
}
252+
253+
define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
254+
; CHECK-LABEL: codegen_bsl_i64:
255+
; CHECK: // %bb.0:
256+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
257+
; CHECK-NEXT: ret
258+
%4 = and <vscale x 2 x i64> %2, %0
259+
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
260+
%6 = and <vscale x 2 x i64> %1, %5
261+
%7 = or <vscale x 2 x i64> %4, %6
262+
ret <vscale x 2 x i64> %7
263+
}
264+
265+
define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
266+
; CHECK-LABEL: codegen_nbsl_i64:
267+
; CHECK: // %bb.0:
268+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
269+
; CHECK-NEXT: ret
270+
%4 = and <vscale x 2 x i64> %2, %0
271+
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
272+
%6 = and <vscale x 2 x i64> %1, %5
273+
%7 = or <vscale x 2 x i64> %4, %6
274+
%8 = xor <vscale x 2 x i64> %7, splat (i64 -1)
275+
ret <vscale x 2 x i64> %8
276+
}
277+
278+
define <vscale x 2 x i64> @codegen_bsl1n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
279+
; CHECK-LABEL: codegen_bsl1n_i64:
280+
; CHECK: // %bb.0:
281+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
282+
; CHECK-NEXT: ret
283+
%4 = xor <vscale x 2 x i64> %0, splat (i64 -1)
284+
%5 = and <vscale x 2 x i64> %2, %4
285+
%6 = xor <vscale x 2 x i64> %2, splat (i64 -1)
286+
%7 = and <vscale x 2 x i64> %1, %6
287+
%8 = or <vscale x 2 x i64> %5, %7
288+
ret <vscale x 2 x i64> %8
289+
}
290+
291+
define <vscale x 2 x i64> @codegen_bsl2n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
292+
; CHECK-LABEL: codegen_bsl2n_i64:
293+
; CHECK: // %bb.0:
294+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
295+
; CHECK-NEXT: ret
296+
%4 = and <vscale x 2 x i64> %2, %0
297+
%5 = or <vscale x 2 x i64> %2, %1
298+
%6 = xor <vscale x 2 x i64> %5, splat (i64 -1)
299+
%7 = or <vscale x 2 x i64> %4, %6
300+
ret <vscale x 2 x i64> %7
301+
}

0 commit comments

Comments
 (0)