Skip to content

Commit c561f4d

Browse files
author
Francesco Petrogalli
committed
[SVE][VLS] Don't combine logical AND.
Testing is performed when targeting 128, 256 and 512-bit wide vectors. For 128-bit vectors, the original behavior of using NEON instructions is preserved. Differential Revision: https://reviews.llvm.org/D85479
1 parent 2b8ad6b commit c561f4d

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11156,6 +11156,11 @@ static SDValue performANDCombine(SDNode *N,
1115611156
if (VT.isScalableVector())
1115711157
return performSVEAndCombine(N, DCI);
1115811158

11159+
// The combining code below works only for NEON vectors. In particular, it
11160+
// does not work for SVE when dealing with vectors wider than 128 bits.
11161+
if (!(VT.is64BitVector() || VT.is128BitVector()))
11162+
return SDValue();
11163+
1115911164
BuildVectorSDNode *BVN =
1116011165
dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
1116111166
if (!BVN)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 -o - -asm-verbose=0 < %s | FileCheck %s
2+
3+
; CHECK-LABEL: vls_sve_and_64xi8:
4+
; CHECK-NEXT: adrp x[[ONE:[0-9]+]], .LCPI0_0
5+
; CHECK-NEXT: ptrue p0.b, vl64
6+
; CHECK-NEXT: add x[[TWO:[0-9]+]], x[[ONE]], :lo12:.LCPI0_0
7+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
8+
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x[[TWO]]]
9+
; CHECK-NEXT: and z0.d, z0.d, z1.d
10+
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
11+
; CHECK-NEXT: ret
12+
define void @vls_sve_and_64xi8(<64 x i8>* %ap, <64 x i8>* %out) nounwind {
13+
%a = load <64 x i8>, <64 x i8>* %ap
14+
%b = and <64 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255,
15+
i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255,
16+
i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255,
17+
i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
18+
store <64 x i8> %b, <64 x i8>* %out
19+
ret void
20+
}
21+
22+
; CHECK-LABEL: vls_sve_and_16xi8:
23+
; CHECK-NEXT: bic v0.8h, #255
24+
; CHECK-NEXT: ret
25+
define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b, <16 x i8>* %out) nounwind {
26+
%c = and <16 x i8> %b, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
27+
ret <16 x i8> %c
28+
}
29+
30+
; CHECK-LABEL: vls_sve_and_8xi8:
31+
; CHECK-NEXT: bic v0.4h, #255
32+
; CHECK-NEXT: ret
33+
define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b, <8 x i8>* %out) nounwind {
34+
%c = and <8 x i8> %b, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
35+
ret <8 x i8> %c
36+
}
37+

0 commit comments

Comments
 (0)