Skip to content

Commit a6f1134

Browse files
author
Dinar Temirbulatov
committed
[AArch64][SVE] Avoid AND operation if both side are splat of i1 or PTRUE
If both sides of AND operations are i1 splat_vectors or PTRUE node then we can produce just i1 splat_vector as the result. Differential Revision: https://reviews.llvm.org/D141043
1 parent 3f7dc5c commit a6f1134

File tree

3 files changed

+97
-3
lines changed

3 files changed

+97
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16319,6 +16319,17 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
1631916319
return false;
1632016320
}
1632116321

16322+
static SDValue performReinterpretCastCombine(SDNode *N) {
16323+
SDValue LeafOp = SDValue(N, 0);
16324+
SDValue Op = N->getOperand(0);
16325+
while (Op.getOpcode() == AArch64ISD::REINTERPRET_CAST &&
16326+
LeafOp.getValueType() != Op.getValueType())
16327+
Op = Op->getOperand(0);
16328+
if (LeafOp.getValueType() == Op.getValueType())
16329+
return Op;
16330+
return SDValue();
16331+
}
16332+
1632216333
static SDValue performSVEAndCombine(SDNode *N,
1632316334
TargetLowering::DAGCombinerInfo &DCI) {
1632416335
if (DCI.isBeforeLegalizeOps())
@@ -16365,6 +16376,13 @@ static SDValue performSVEAndCombine(SDNode *N,
1636516376
return DAG.getNode(Opc, DL, N->getValueType(0), And);
1636616377
}
1636716378

16379+
// If both sides of AND operations are i1 splat_vectors then
16380+
// we can produce just i1 splat_vector as the result.
16381+
if (isAllActivePredicate(DAG, N->getOperand(0)))
16382+
return N->getOperand(1);
16383+
if (isAllActivePredicate(DAG, N->getOperand(1)))
16384+
return N->getOperand(0);
16385+
1636816386
if (!EnableCombineMGatherIntrinsics)
1636916387
return SDValue();
1637016388

@@ -21400,6 +21418,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2140021418
return performUzpCombine(N, DAG);
2140121419
case AArch64ISD::SETCC_MERGE_ZERO:
2140221420
return performSetccMergeZeroCombine(N, DCI);
21421+
case AArch64ISD::REINTERPRET_CAST:
21422+
return performReinterpretCastCombine(N);
2140321423
case AArch64ISD::GLD1_MERGE_ZERO:
2140421424
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
2140521425
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:

llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,7 @@ define <vscale x 16 x i1> @reinterpret_cmpgt(<vscale x 8 x i1> %p, <vscale x 8 x
125125
define <vscale x 16 x i1> @chained_reinterpret() {
126126
; CHECK-LABEL: chained_reinterpret:
127127
; CHECK: // %bb.0:
128-
; CHECK-NEXT: ptrue p0.b
129-
; CHECK-NEXT: ptrue p1.d
130-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
128+
; CHECK-NEXT: ptrue p0.d
131129
; CHECK-NEXT: ret
132130
%in = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
133131
%cast2 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %in)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
; Ensure that a no-op 'and' get removed with vector splat of 1 or ptrue with proper constant
7+
8+
define <vscale x 16 x i1> @fold_away_ptrue_and_ptrue() #0 {
9+
; CHECK-LABEL: fold_away_ptrue_and_ptrue:
10+
; CHECK: // %bb.0: // %entry
11+
; CHECK-NEXT: ptrue p0.s
12+
; CHECK-NEXT: ret
13+
entry:
14+
%0 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
15+
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %0)
16+
%2 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
17+
%and = and <vscale x 16 x i1> %2, %1
18+
ret <vscale x 16 x i1> %and
19+
}
20+
21+
define <vscale x 16 x i1> @fold_away_ptrue_and_splat_predicate() #0 {
22+
; CHECK-LABEL: fold_away_ptrue_and_splat_predicate:
23+
; CHECK: // %bb.0: // %entry
24+
; CHECK-NEXT: ptrue p0.s
25+
; CHECK-NEXT: ret
26+
entry:
27+
%ins = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
28+
%splat = shufflevector <vscale x 4 x i1> %ins, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
29+
%0 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %splat)
30+
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
31+
%and = and <vscale x 16 x i1> %0, %1
32+
ret <vscale x 16 x i1> %and
33+
}
34+
35+
; Ensure that one AND operation remain for inactive lanes zeroing with 2 x i1 type (llvm.aarch64.sve.convert.to.svbool.nxv2i1).
36+
define <vscale x 16 x i1> @fold_away_ptrue_and_convert_to() #0 {
37+
; CHECK-LABEL: fold_away_ptrue_and_convert_to:
38+
; CHECK: // %bb.0: // %entry
39+
; CHECK-NEXT: ptrue p0.s
40+
; CHECK-NEXT: ptrue p1.d
41+
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
42+
; CHECK-NEXT: ret
43+
entry:
44+
%0 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
45+
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %0)
46+
%2 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
47+
%3 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %2)
48+
%4 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %3)
49+
%and = and <vscale x 16 x i1> %4, %1
50+
ret <vscale x 16 x i1> %and
51+
}
52+
53+
define <vscale x 16 x i1> @fold_away_two_similar() #0 {
54+
; CHECK-LABEL: fold_away_two_similar:
55+
; CHECK: // %bb.0: // %entry
56+
; CHECK-NEXT: ptrue p0.b
57+
; CHECK-NEXT: ret
58+
entry:
59+
%0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
60+
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
61+
%and = and <vscale x 16 x i1> %0, %1
62+
ret <vscale x 16 x i1> %and
63+
}
64+
65+
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg)
66+
67+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
68+
69+
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
70+
71+
declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
72+
73+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
74+
75+
76+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)