Skip to content

Commit 6f26867

Browse files
author
Dinar Temirbulatov
authored
[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (#88413)
Allow to fold or/and-and to BSL instuction for scalable vectors.
1 parent 054b1b3 commit 6f26867

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17927,11 +17927,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1792717927
} else
1792817928
continue;
1792917929

17930-
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
17930+
if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
1793117931
continue;
1793217932

1793317933
// Constant ones is always righthand operand of the Add.
17934-
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
17934+
if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
1793517935
continue;
1793617936

1793717937
if (Sub.getOperand(1) != Add.getOperand(0))

llvm/test/CodeGen/AArch64/sve2-bsl.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,21 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1313
ret <vscale x 4 x i32> %c
1414
}
1515

16+
define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
17+
; CHECK-LABEL: bsl_add_sub:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
20+
; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
21+
; CHECK-NEXT: mov z0.d, z1.d
22+
; CHECK-NEXT: ret
23+
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
24+
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
25+
%left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
26+
%right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
27+
%bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
28+
ret <vscale x 4 x i32> %bsl0000
29+
}
30+
1631
; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
1732
define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1833
; CHECK-LABEL: no_bsl_fold:

0 commit comments

Comments
 (0)