Skip to content

Commit e45669a

Browse files
committed
Combine csel+lastb -> clastb
1 parent 1759f0e commit e45669a

File tree

2 files changed

+47
-23
lines changed

2 files changed

+47
-23
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25058,6 +25058,41 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
2505825058
}
2505925059
}
2506025060

25061+
static SDValue foldCSELofLASTB(SDNode *Op, SelectionDAG &DAG) {
25062+
AArch64CC::CondCode OpCC =
25063+
static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
25064+
25065+
if (OpCC != AArch64CC::NE)
25066+
return SDValue();
25067+
25068+
SDValue PTest = Op->getOperand(3);
25069+
if (PTest.getOpcode() != AArch64ISD::PTEST_ANY)
25070+
return SDValue();
25071+
25072+
SDValue TruePred = PTest.getOperand(0);
25073+
SDValue AnyPred = PTest.getOperand(1);
25074+
25075+
if (TruePred.getOpcode() == AArch64ISD::REINTERPRET_CAST)
25076+
TruePred = TruePred.getOperand(0);
25077+
25078+
if (AnyPred.getOpcode() == AArch64ISD::REINTERPRET_CAST)
25079+
AnyPred = AnyPred.getOperand(0);
25080+
25081+
if (TruePred != AnyPred && TruePred.getOpcode() != AArch64ISD::PTRUE)
25082+
return SDValue();
25083+
25084+
SDValue LastB = Op->getOperand(0);
25085+
SDValue Default = Op->getOperand(1);
25086+
25087+
if (LastB.getOpcode() != AArch64ISD::LASTB || LastB.getOperand(0) != AnyPred)
25088+
return SDValue();
25089+
25090+
SDValue Vec = LastB.getOperand(1);
25091+
25092+
return DAG.getNode(AArch64ISD::CLASTB_N, SDLoc(Op), Op->getValueType(0),
25093+
AnyPred, Default, Vec);
25094+
}
25095+
2506125096
// Optimize CSEL instructions
2506225097
static SDValue performCSELCombine(SDNode *N,
2506325098
TargetLowering::DAGCombinerInfo &DCI,
@@ -25103,6 +25138,10 @@ static SDValue performCSELCombine(SDNode *N,
2510325138
}
2510425139
}
2510525140

25141+
// CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z
25142+
if (SDValue CondLast = foldCSELofLASTB(N, DAG))
25143+
return CondLast;
25144+
2510625145
return performCONDCombine(N, DCI, DAG, 2, 3);
2510725146
}
2510825147

llvm/test/CodeGen/AArch64/vector-extract-last-active.ll

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,7 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
293293
define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) #0 {
294294
; CHECK-LABEL: extract_last_i8_scalable:
295295
; CHECK: // %bb.0:
296-
; CHECK-NEXT: lastb w8, p0, z0.b
297-
; CHECK-NEXT: ptest p0, p0.b
298-
; CHECK-NEXT: csel w0, w8, w0, ne
296+
; CHECK-NEXT: clastb w0, p0, w0, z0.b
299297
; CHECK-NEXT: ret
300298
%res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru)
301299
ret i8 %res
@@ -304,10 +302,7 @@ define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1>
304302
define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) #0 {
305303
; CHECK-LABEL: extract_last_i16_scalable:
306304
; CHECK: // %bb.0:
307-
; CHECK-NEXT: lastb w8, p0, z0.h
308-
; CHECK-NEXT: ptrue p1.h
309-
; CHECK-NEXT: ptest p1, p0.b
310-
; CHECK-NEXT: csel w0, w8, w0, ne
305+
; CHECK-NEXT: clastb w0, p0, w0, z0.h
311306
; CHECK-NEXT: ret
312307
%res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
313308
ret i16 %res
@@ -316,10 +311,7 @@ define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1
316311
define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) #0 {
317312
; CHECK-LABEL: extract_last_i32_scalable:
318313
; CHECK: // %bb.0:
319-
; CHECK-NEXT: lastb w8, p0, z0.s
320-
; CHECK-NEXT: ptrue p1.s
321-
; CHECK-NEXT: ptest p1, p0.b
322-
; CHECK-NEXT: csel w0, w8, w0, ne
314+
; CHECK-NEXT: clastb w0, p0, w0, z0.s
323315
; CHECK-NEXT: ret
324316
%res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru)
325317
ret i32 %res
@@ -328,10 +320,7 @@ define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1
328320
define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) #0 {
329321
; CHECK-LABEL: extract_last_i64_scalable:
330322
; CHECK: // %bb.0:
331-
; CHECK-NEXT: lastb x8, p0, z0.d
332-
; CHECK-NEXT: ptrue p1.d
333-
; CHECK-NEXT: ptest p1, p0.b
334-
; CHECK-NEXT: csel x0, x8, x0, ne
323+
; CHECK-NEXT: clastb x0, p0, x0, z0.d
335324
; CHECK-NEXT: ret
336325
%res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru)
337326
ret i64 %res
@@ -340,10 +329,8 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1
340329
define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 {
341330
; CHECK-LABEL: extract_last_float_scalable:
342331
; CHECK: // %bb.0:
343-
; CHECK-NEXT: lastb s0, p0, z0.s
344-
; CHECK-NEXT: ptrue p1.s
345-
; CHECK-NEXT: ptest p1, p0.b
346-
; CHECK-NEXT: fcsel s0, s0, s1, ne
332+
; CHECK-NEXT: clastb s1, p0, s1, z0.s
333+
; CHECK-NEXT: fmov s0, s1
347334
; CHECK-NEXT: ret
348335
%res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru)
349336
ret float %res
@@ -352,10 +339,8 @@ define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x
352339
define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) #0 {
353340
; CHECK-LABEL: extract_last_double_scalable:
354341
; CHECK: // %bb.0:
355-
; CHECK-NEXT: lastb d0, p0, z0.d
356-
; CHECK-NEXT: ptrue p1.d
357-
; CHECK-NEXT: ptest p1, p0.b
358-
; CHECK-NEXT: fcsel d0, d0, d1, ne
342+
; CHECK-NEXT: clastb d1, p0, d1, z0.d
343+
; CHECK-NEXT: fmov d0, d1
359344
; CHECK-NEXT: ret
360345
%res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru)
361346
ret double %res

0 commit comments

Comments
 (0)