Skip to content

Commit 30222c6

Browse files
committed
DAGCombine for lastb
1 parent 3f781f1 commit 30222c6

File tree

2 files changed

+59
-54
lines changed

2 files changed

+59
-54
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19512,6 +19512,57 @@ performLastTrueTestVectorCombine(SDNode *N,
1951219512
return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
1951319513
}
1951419514

19515+
static SDValue
19516+
performLastActiveExtractEltCombine(SDNode *N,
19517+
TargetLowering::DAGCombinerInfo &DCI,
19518+
const AArch64Subtarget *Subtarget) {
19519+
SDValue Index = N->getOperand(1);
19520+
// FIXME: Make this more generic. Should be a utility func somewhere?
19521+
if (Index.getOpcode() == ISD::ZERO_EXTEND)
19522+
Index = Index.getOperand(0);
19523+
19524+
// Looking for an add of an inverted value.
19525+
if (Index.getOpcode() != ISD::ADD)
19526+
return SDValue();
19527+
19528+
SDValue Size = Index.getOperand(1);
19529+
19530+
if (Size.getOpcode() == ISD::TRUNCATE)
19531+
Size = Size.getOperand(0);
19532+
19533+
// Check that we're looking at the size of the overall vector...
19534+
// FIXME: What about VSL codegen?
19535+
if (Size.getOpcode() != ISD::VSCALE)
19536+
return SDValue();
19537+
19538+
unsigned NElts = N->getOperand(0)->getValueType(0).getVectorElementCount().getKnownMinValue();
19539+
if (Size.getConstantOperandVal(0) != NElts)
19540+
return SDValue();
19541+
19542+
SDValue Invert = Index.getOperand(0);
19543+
if (Invert.getOpcode() != ISD::XOR)
19544+
return SDValue();
19545+
19546+
if (!Invert.getConstantOperandAPInt(1).isAllOnes())
19547+
return SDValue();
19548+
19549+
SDValue LZeroes = Invert.getOperand(0);
19550+
if (LZeroes.getOpcode() == ISD::TRUNCATE)
19551+
LZeroes = LZeroes.getOperand(0);
19552+
19553+
// Check that we're looking at a cttz.elts from a reversed predicate...
19554+
if (LZeroes.getOpcode() != AArch64ISD::CTTZ_ELTS)
19555+
return SDValue();
19556+
19557+
SDValue Pred = LZeroes.getOperand(0);
19558+
if (Pred.getOpcode() != ISD::VECTOR_REVERSE)
19559+
return SDValue();
19560+
19561+
// Matched a LASTB pattern.
19562+
return DCI.DAG.getNode(AArch64ISD::LASTB, SDLoc(N), N->getValueType(0),
19563+
Pred.getOperand(0), N->getOperand(0));
19564+
}
19565+
1951519566
static SDValue
1951619567
performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1951719568
const AArch64Subtarget *Subtarget) {
@@ -19520,6 +19571,8 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1952019571
return Res;
1952119572
if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
1952219573
return Res;
19574+
if (SDValue Res = performLastActiveExtractEltCombine(N, DCI, Subtarget))
19575+
return Res;
1952319576

1952419577
SelectionDAG &DAG = DCI.DAG;
1952519578
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);

llvm/test/CodeGen/AArch64/sve-clastb.ll

Lines changed: 6 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,8 @@
44
define i8 @clastb_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8 %existing) {
55
; CHECK-LABEL: clastb_i8:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: ptrue p1.b
8-
; CHECK-NEXT: rdvl x9, #1
9-
; CHECK-NEXT: rev p2.b, p0.b
10-
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
11-
; CHECK-NEXT: cntp x8, p1, p1.b
12-
; CHECK-NEXT: mvn w8, w8
13-
; CHECK-NEXT: add w8, w8, w9
14-
; CHECK-NEXT: whilels p1.b, xzr, x8
7+
; CHECK-NEXT: lastb w8, p0, z0.b
158
; CHECK-NEXT: ptest p0, p0.b
16-
; CHECK-NEXT: lastb w8, p1, z0.b
179
; CHECK-NEXT: csel w0, w8, w0, ne
1810
; CHECK-NEXT: ret
1911
%rev.pg = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> %pg)
@@ -31,15 +23,7 @@ define i8 @clastb_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8 %exist
3123
define i16 @clastb_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16 %existing) {
3224
; CHECK-LABEL: clastb_i16:
3325
; CHECK: // %bb.0:
34-
; CHECK-NEXT: ptrue p1.h
35-
; CHECK-NEXT: cnth x9
36-
; CHECK-NEXT: rev p2.h, p0.h
37-
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
38-
; CHECK-NEXT: cntp x8, p1, p1.h
39-
; CHECK-NEXT: mvn w8, w8
40-
; CHECK-NEXT: add w8, w8, w9
41-
; CHECK-NEXT: whilels p1.h, xzr, x8
42-
; CHECK-NEXT: lastb w8, p1, z0.h
26+
; CHECK-NEXT: lastb w8, p0, z0.h
4327
; CHECK-NEXT: ptrue p1.h
4428
; CHECK-NEXT: ptest p1, p0.b
4529
; CHECK-NEXT: csel w0, w8, w0, ne
@@ -59,15 +43,7 @@ define i16 @clastb_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16 %exi
5943
define i32 @clastb_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32 %existing) {
6044
; CHECK-LABEL: clastb_i32:
6145
; CHECK: // %bb.0:
62-
; CHECK-NEXT: ptrue p1.s
63-
; CHECK-NEXT: cntw x9
64-
; CHECK-NEXT: rev p2.s, p0.s
65-
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
66-
; CHECK-NEXT: cntp x8, p1, p1.s
67-
; CHECK-NEXT: mvn w8, w8
68-
; CHECK-NEXT: add w8, w8, w9
69-
; CHECK-NEXT: whilels p1.s, xzr, x8
70-
; CHECK-NEXT: lastb w8, p1, z0.s
46+
; CHECK-NEXT: lastb w8, p0, z0.s
7147
; CHECK-NEXT: ptrue p1.s
7248
; CHECK-NEXT: ptest p1, p0.b
7349
; CHECK-NEXT: csel w0, w8, w0, ne
@@ -87,15 +63,7 @@ define i32 @clastb_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32 %exi
8763
define i64 @clastb_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64 %existing) {
8864
; CHECK-LABEL: clastb_i64:
8965
; CHECK: // %bb.0:
90-
; CHECK-NEXT: ptrue p1.d
91-
; CHECK-NEXT: cntd x9
92-
; CHECK-NEXT: rev p2.d, p0.d
93-
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
94-
; CHECK-NEXT: cntp x8, p1, p1.d
95-
; CHECK-NEXT: mvn w8, w8
96-
; CHECK-NEXT: add w8, w8, w9
97-
; CHECK-NEXT: whilels p1.d, xzr, x8
98-
; CHECK-NEXT: lastb x8, p1, z0.d
66+
; CHECK-NEXT: lastb x8, p0, z0.d
9967
; CHECK-NEXT: ptrue p1.d
10068
; CHECK-NEXT: ptest p1, p0.b
10169
; CHECK-NEXT: csel x0, x8, x0, ne
@@ -115,15 +83,7 @@ define i64 @clastb_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64 %exi
11583
define float @clastb_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float %existing) {
11684
; CHECK-LABEL: clastb_float:
11785
; CHECK: // %bb.0:
118-
; CHECK-NEXT: ptrue p1.s
119-
; CHECK-NEXT: cntw x9
120-
; CHECK-NEXT: rev p2.s, p0.s
121-
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
122-
; CHECK-NEXT: cntp x8, p1, p1.s
123-
; CHECK-NEXT: mvn w8, w8
124-
; CHECK-NEXT: add w8, w8, w9
125-
; CHECK-NEXT: whilels p1.s, xzr, x8
126-
; CHECK-NEXT: lastb s0, p1, z0.s
86+
; CHECK-NEXT: lastb s0, p0, z0.s
12787
; CHECK-NEXT: ptrue p1.s
12888
; CHECK-NEXT: ptest p1, p0.b
12989
; CHECK-NEXT: fcsel s0, s0, s1, ne
@@ -143,15 +103,7 @@ define float @clastb_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, fl
143103
define double @clastb_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double %existing) {
144104
; CHECK-LABEL: clastb_double:
145105
; CHECK: // %bb.0:
146-
; CHECK-NEXT: ptrue p1.d
147-
; CHECK-NEXT: cntd x9
148-
; CHECK-NEXT: rev p2.d, p0.d
149-
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
150-
; CHECK-NEXT: cntp x8, p1, p1.d
151-
; CHECK-NEXT: mvn w8, w8
152-
; CHECK-NEXT: add w8, w8, w9
153-
; CHECK-NEXT: whilels p1.d, xzr, x8
154-
; CHECK-NEXT: lastb d0, p1, z0.d
106+
; CHECK-NEXT: lastb d0, p0, z0.d
155107
; CHECK-NEXT: ptrue p1.d
156108
; CHECK-NEXT: ptest p1, p0.b
157109
; CHECK-NEXT: fcsel d0, d0, d1, ne

0 commit comments

Comments
 (0)