Skip to content

Commit 2fb3e3c

Browse files
committed
[AArch64][SVE] Add PTEST_ANY pseudo instruction
This allow recognition of when a ptest was emitted as an any condition and allows for extra optimization to be done later. This addresses missing optimizations from D137716 and D137718, and partially D137717. Depends on D137716, D137717, D137718 Differential Revision: https://reviews.llvm.org/D137930
1 parent 8051c1d commit 2fb3e3c

18 files changed

+65
-98
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2335,6 +2335,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
23352335
MAKE_CASE(AArch64ISD::UUNPKLO)
23362336
MAKE_CASE(AArch64ISD::INSR)
23372337
MAKE_CASE(AArch64ISD::PTEST)
2338+
MAKE_CASE(AArch64ISD::PTEST_ANY)
23382339
MAKE_CASE(AArch64ISD::PTRUE)
23392340
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
23402341
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
@@ -17405,7 +17406,9 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
1740517406
}
1740617407

1740717408
// Set condition code (CC) flags.
17408-
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
17409+
SDValue Test = DAG.getNode(
17410+
Cond == AArch64CC::ANY_ACTIVE ? AArch64ISD::PTEST_ANY : AArch64ISD::PTEST,
17411+
DL, MVT::Other, Pg, Op);
1740917412

1741017413
// Convert CC to integer based on requested condition.
1741117414
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ enum NodeType : unsigned {
332332

333333
INSR,
334334
PTEST,
335+
PTEST_ANY,
335336
PTRUE,
336337

337338
BITREVERSE_MERGE_PASSTHRU,

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
11221122
default:
11231123
break;
11241124
case AArch64::PTEST_PP:
1125+
case AArch64::PTEST_PP_ANY:
11251126
SrcReg = MI.getOperand(0).getReg();
11261127
SrcReg2 = MI.getOperand(1).getReg();
11271128
// Not sure about the mask and value for now...
@@ -1321,13 +1322,21 @@ bool AArch64InstrInfo::optimizePTestInstr(
13211322

13221323
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
13231324
// PTEST_LIKE instruction uses the same all active mask and the element
1324-
// size matches.
1325+
// size matches. If the PTEST has a condition of any then it is always
1326+
// redundant.
13251327
if (PredIsPTestLike) {
13261328
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1327-
if (Mask != PTestLikeMask)
1329+
if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
13281330
return false;
13291331
}
13301332

1333+
// Fallthough to simply remove the PTEST.
1334+
} else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1335+
PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1336+
// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1337+
// instruction that sets the flags as PTEST would. This is only valid when
1338+
// the condition is any.
1339+
13311340
// Fallthough to simply remove the PTEST.
13321341
} else if (PredIsPTestLike) {
13331342
// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
@@ -1350,10 +1359,13 @@ bool AArch64InstrInfo::optimizePTestInstr(
13501359
// where the compare generates a canonical all active 32-bit predicate
13511360
// (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
13521361
// active flag, whereas the PTEST instruction with the same mask doesn't.
1362+
// For PTEST_ANY this doesn't apply as the flags in this case would be
1363+
// identical regardless of element size.
13531364
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
13541365
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
13551366
if ((Mask != PTestLikeMask) ||
1356-
(PredElementSize != AArch64::ElementSizeB))
1367+
(PredElementSize != AArch64::ElementSizeB &&
1368+
PTest->getOpcode() != AArch64::PTEST_PP_ANY))
13571369
return false;
13581370

13591371
// Fallthough to simply remove the PTEST.
@@ -1473,7 +1485,8 @@ bool AArch64InstrInfo::optimizeCompareInstr(
14731485
return true;
14741486
}
14751487

1476-
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
1488+
if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1489+
CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
14771490
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
14781491

14791492
if (SrcReg2 != 0)

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
308308

309309
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
310310
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
311+
def AArch64ptest_any : SDNode<"AArch64ISD::PTEST_ANY", SDT_AArch64PTest>;
311312

312313
def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;
313314
def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;
@@ -858,10 +859,10 @@ let Predicates = [HasSVEorSME] in {
858859
defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>;
859860
defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
860861

861-
def PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest>;
862-
defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
863-
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
864-
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
862+
defm PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest, AArch64ptest_any>;
863+
defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
864+
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
865+
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
865866

866867
defm AND_PPzPP : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>;
867868
defm BIC_PPzPP : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,17 @@ class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
687687
let isCompare = 1;
688688
}
689689

690+
multiclass sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op,
691+
SDPatternOperator op_any> {
692+
def NAME : sve_int_ptest<opc, asm, op>;
693+
694+
let hasNoSchedulingInfo = 1, isCompare = 1, Defs = [NZCV] in {
695+
def _ANY : Pseudo<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
696+
[(op_any (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>,
697+
PseudoInstExpansion<(!cast<Instruction>(NAME) PPRAny:$Pg, PPR8:$Pn)>;
698+
}
699+
}
700+
690701
class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
691702
PPRRegOp pprty>
692703
: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpeq_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
24-
; CHECK-NEXT: ptest p0, p1.b
23+
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
2524
; CHECK-NEXT: cset w0, ne
2625
; CHECK-NEXT: ret
2726
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
3736
define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
3837
; CHECK-LABEL: cmpeq_imm_nxv16i8:
3938
; CHECK: // %bb.0:
40-
; CHECK-NEXT: ptrue p1.b
4139
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
42-
; CHECK-NEXT: ptest p1, p0.b
4340
; CHECK-NEXT: cset w0, ne
4441
; CHECK-NEXT: ret
4542
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6865
define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6966
; CHECK-LABEL: cmpeq_wide_nxv8i16:
7067
; CHECK: // %bb.0:
71-
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.d
72-
; CHECK-NEXT: ptest p0, p1.b
68+
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.d
7369
; CHECK-NEXT: cset w0, ne
7470
; CHECK-NEXT: ret
7571
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
8379
define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8480
; CHECK-LABEL: cmpeq_wide_nxv4i32:
8581
; CHECK: // %bb.0:
86-
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.d
87-
; CHECK-NEXT: ptest p0, p1.b
82+
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.d
8883
; CHECK-NEXT: cset w0, ne
8984
; CHECK-NEXT: ret
9085
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpge_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
24-
; CHECK-NEXT: ptest p0, p1.b
23+
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
2524
; CHECK-NEXT: cset w0, ne
2625
; CHECK-NEXT: ret
2726
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
3736
define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
3837
; CHECK-LABEL: cmpge_imm_nxv16i8:
3938
; CHECK: // %bb.0:
40-
; CHECK-NEXT: ptrue p1.b
4139
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, #0
42-
; CHECK-NEXT: ptest p1, p0.b
4340
; CHECK-NEXT: cset w0, ne
4441
; CHECK-NEXT: ret
4542
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6865
define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6966
; CHECK-LABEL: cmpge_wide_nxv8i16:
7067
; CHECK: // %bb.0:
71-
; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.d
72-
; CHECK-NEXT: ptest p0, p1.b
68+
; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.d
7369
; CHECK-NEXT: cset w0, ne
7470
; CHECK-NEXT: ret
7571
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
8379
define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8480
; CHECK-LABEL: cmpge_wide_nxv4i32:
8581
; CHECK: // %bb.0:
86-
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.d
87-
; CHECK-NEXT: ptest p0, p1.b
82+
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.d
8883
; CHECK-NEXT: cset w0, ne
8984
; CHECK-NEXT: ret
9085
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmpgt_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
24-
; CHECK-NEXT: ptest p0, p1.b
23+
; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
2524
; CHECK-NEXT: cset w0, ne
2625
; CHECK-NEXT: ret
2726
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
3736
define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
3837
; CHECK-LABEL: cmpgt_imm_nxv16i8:
3938
; CHECK: // %bb.0:
40-
; CHECK-NEXT: ptrue p1.b
4139
; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, #0
42-
; CHECK-NEXT: ptest p1, p0.b
4340
; CHECK-NEXT: cset w0, ne
4441
; CHECK-NEXT: ret
4542
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6865
define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6966
; CHECK-LABEL: cmpgt_wide_nxv8i16:
7067
; CHECK: // %bb.0:
71-
; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.d
72-
; CHECK-NEXT: ptest p0, p1.b
68+
; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.d
7369
; CHECK-NEXT: cset w0, ne
7470
; CHECK-NEXT: ret
7571
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
8379
define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8480
; CHECK-LABEL: cmpgt_wide_nxv4i32:
8581
; CHECK: // %bb.0:
86-
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.d
87-
; CHECK-NEXT: ptest p0, p1.b
82+
; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.d
8883
; CHECK-NEXT: cset w0, ne
8984
; CHECK-NEXT: ret
9085
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmphi_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
24-
; CHECK-NEXT: ptest p0, p1.b
23+
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
2524
; CHECK-NEXT: cset w0, ne
2625
; CHECK-NEXT: ret
2726
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -38,9 +37,7 @@ define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
3837
define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
3938
; CHECK-LABEL: cmphi_imm_nxv16i8:
4039
; CHECK: // %bb.0:
41-
; CHECK-NEXT: ptrue p1.b
4240
; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, #0
43-
; CHECK-NEXT: ptest p1, p0.b
4441
; CHECK-NEXT: cset w0, ne
4542
; CHECK-NEXT: ret
4643
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -69,8 +66,7 @@ define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6966
define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
7067
; CHECK-LABEL: cmphi_wide_nxv8i16:
7168
; CHECK: // %bb.0:
72-
; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.d
73-
; CHECK-NEXT: ptest p0, p1.b
69+
; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.d
7470
; CHECK-NEXT: cset w0, ne
7571
; CHECK-NEXT: ret
7672
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -84,8 +80,7 @@ define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
8480
define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8581
; CHECK-LABEL: cmphi_wide_nxv4i32:
8682
; CHECK: // %bb.0:
87-
; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.d
88-
; CHECK-NEXT: ptest p0, p1.b
83+
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.d
8984
; CHECK-NEXT: cset w0, ne
9085
; CHECK-NEXT: ret
9186
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
2020
define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2121
; CHECK-LABEL: cmphs_nxv4i32:
2222
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.s
24-
; CHECK-NEXT: ptest p0, p1.b
23+
; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
2524
; CHECK-NEXT: cset w0, ne
2625
; CHECK-NEXT: ret
2726
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
3736
define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
3837
; CHECK-LABEL: cmphs_imm_nxv16i8:
3938
; CHECK: // %bb.0:
40-
; CHECK-NEXT: ptrue p1.b
4139
; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, #0
42-
; CHECK-NEXT: ptest p1, p0.b
4340
; CHECK-NEXT: cset w0, ne
4441
; CHECK-NEXT: ret
4542
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
6865
define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
6966
; CHECK-LABEL: cmphs_wide_nxv8i16:
7067
; CHECK: // %bb.0:
71-
; CHECK-NEXT: cmphs p1.h, p0/z, z0.h, z1.d
72-
; CHECK-NEXT: ptest p0, p1.b
68+
; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.d
7369
; CHECK-NEXT: cset w0, ne
7470
; CHECK-NEXT: ret
7571
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
8379
define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
8480
; CHECK-LABEL: cmphs_wide_nxv4i32:
8581
; CHECK: // %bb.0:
86-
; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.d
87-
; CHECK-NEXT: ptest p0, p1.b
82+
; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.d
8883
; CHECK-NEXT: cset w0, ne
8984
; CHECK-NEXT: ret
9085
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

0 commit comments

Comments
 (0)