Skip to content

Commit 9705cee

Browse files
Dinar TemirbulatovDinar Temirbulatov
authored andcommitted
[AArch64][SVE2] SVE2 NBSL instruction lowering.
Allow to fold BSL/CNOT instuctions to NBSL instruction for scalable vectors.
1 parent e0a5155 commit 9705cee

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3767,6 +3767,23 @@ let Predicates = [HasSVE2orSME] in {
37673767

37683768
// SVE2 extract vector (immediate offset, constructive)
37693769
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
3770+
3771+
// zext(cmpeq(bsl(x, y, z), splat(0))) -> nbsl(x, y, z)
3772+
def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive)),
3773+
(nxv16i8 (AArch64bsp nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), (SVEDup0), SETEQ)))),
3774+
(NBSL_ZZZZ nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)>;
3775+
3776+
def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive)),
3777+
(nxv8i16 (AArch64bsp nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), (SVEDup0), SETEQ)))),
3778+
(NBSL_ZZZZ nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)>;
3779+
3780+
def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive)),
3781+
(nxv4i32 (AArch64bsp nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), (SVEDup0), SETEQ)))),
3782+
(NBSL_ZZZZ nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)>;
3783+
3784+
def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive)),
3785+
(nxv2i64 (AArch64bsp nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), (SVEDup0), SETEQ)))),
3786+
(NBSL_ZZZZ nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)>;
37703787
} // End HasSVE2orSME
37713788

37723789
let Predicates = [HasSVE2] in {

llvm/test/CodeGen/AArch64/sve2-bsl.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,18 @@ define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32>
4141
%c = or <vscale x 4 x i32> %1, %2
4242
ret <vscale x 4 x i32> %c
4343
}
44+
45+
define <vscale x 4 x i32> @nbsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
46+
; CHECK-LABEL: nbsl:
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: mov z2.s, #0x7fffffff
49+
; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z1.d
50+
; CHECK-NEXT: mov z0.d, z2.d
51+
; CHECK-NEXT: ret
52+
%1 = and <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2147483647, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
53+
%2 = and <vscale x 4 x i32> %b, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -2147483648, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
54+
%3 = or <vscale x 4 x i32> %1, %2
55+
%4 = icmp eq <vscale x 4 x i32> %3, zeroinitializer
56+
%5 = zext <vscale x 4 x i1> %4 to <vscale x 4 x i32>
57+
ret <vscale x 4 x i32> %5
58+
}

0 commit comments

Comments
 (0)