rust-lang
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 4 additions & 1 deletion b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 4 additions & 1 deletion
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.h
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Lines changed: 17 additions & 4 deletions b/‎llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Lines changed: 17 additions & 4 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Lines changed: 5 additions & 4 deletions b/‎llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Lines changed: 5 additions & 4 deletions
diff --git a/‎llvm/lib/Target/AArch64/SVEInstrFormats.td
Lines changed: 11 additions & 0 deletions b/‎llvm/lib/Target/AArch64/SVEInstrFormats.td
Lines changed: 11 additions & 0 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
Lines changed: 3 additions & 8 deletions b/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
Lines changed: 3 additions & 8 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
Lines changed: 3 additions & 8 deletions b/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
Lines changed: 3 additions & 8 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
Lines changed: 3 additions & 8 deletions b/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
Lines changed: 3 additions & 8 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
Lines changed: 3 additions & 8 deletions b/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
Lines changed: 3 additions & 8 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
Lines changed: 3 additions & 8 deletions b/‎llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
Lines changed: 3 additions & 8 deletions
@@ -2335,6 +2335,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::UUNPKLO)
     MAKE_CASE(AArch64ISD::INSR)
     MAKE_CASE(AArch64ISD::PTEST)
+    MAKE_CASE(AArch64ISD::PTEST_ANY)
     MAKE_CASE(AArch64ISD::PTRUE)
     MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
@@ -17405,7 +17406,9 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
   }
 
   // Set condition code (CC) flags.
-  SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
+  SDValue Test = DAG.getNode(
+      Cond == AArch64CC::ANY_ACTIVE ? AArch64ISD::PTEST_ANY : AArch64ISD::PTEST,
+      DL, MVT::Other, Pg, Op);
 
   // Convert CC to integer based on requested condition.
   // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
 
@@ -332,6 +332,7 @@ enum NodeType : unsigned {
 
   INSR,
   PTEST,
+  PTEST_ANY,
   PTRUE,
 
   BITREVERSE_MERGE_PASSTHRU,
 
@@ -1122,6 +1122,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   default:
     break;
   case AArch64::PTEST_PP:
+  case AArch64::PTEST_PP_ANY:
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = MI.getOperand(1).getReg();
     // Not sure about the mask and value for now...
@@ -1321,13 +1322,21 @@ bool AArch64InstrInfo::optimizePTestInstr(
 
     // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
     // PTEST_LIKE instruction uses the same all active mask and the element
-    // size matches.
+    // size matches. If the PTEST has a condition of any then it is always
+    // redundant.
     if (PredIsPTestLike) {
       auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
-      if (Mask != PTestLikeMask)
+      if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
         return false;
     }
 
+    // Fallthough to simply remove the PTEST.
+  } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
+             PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
+    // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+    // instruction that sets the flags as PTEST would. This is only valid when
+    // the condition is any.
+
     // Fallthough to simply remove the PTEST.
   } else if (PredIsPTestLike) {
     // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
@@ -1350,10 +1359,13 @@ bool AArch64InstrInfo::optimizePTestInstr(
     // where the compare generates a canonical all active 32-bit predicate
     // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
     // active flag, whereas the PTEST instruction with the same mask doesn't.
+    // For PTEST_ANY this doesn't apply as the flags in this case would be
+    // identical regardless of element size.
     auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
     uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
     if ((Mask != PTestLikeMask) ||
-        (PredElementSize != AArch64::ElementSizeB))
+        (PredElementSize != AArch64::ElementSizeB &&
+         PTest->getOpcode() != AArch64::PTEST_PP_ANY))
       return false;
 
     // Fallthough to simply remove the PTEST.
@@ -1473,7 +1485,8 @@ bool AArch64InstrInfo::optimizeCompareInstr(
     return true;
   }
 
-  if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
+  if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
+      CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
     return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
 
   if (SrcReg2 != 0)
 
@@ -308,6 +308,7 @@ def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
 
 def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
 def AArch64ptest     : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
+def AArch64ptest_any : SDNode<"AArch64ISD::PTEST_ANY", SDT_AArch64PTest>;
 
 def SDT_AArch64DUP_PRED  : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;
 def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;
@@ -858,10 +859,10 @@ let Predicates = [HasSVEorSME] in {
   defm BRKB_PPmP  : sve_int_break_m<0b101, "brkb",  int_aarch64_sve_brkb>;
   defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
 
-  def PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest>;
-  defm PFALSE  : sve_int_pfalse<0b000000, "pfalse">;
-  defm PFIRST  : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
-  defm PNEXT   : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
+  defm PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest, AArch64ptest_any>;
+  defm PFALSE   : sve_int_pfalse<0b000000, "pfalse">;
+  defm PFIRST   : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
+  defm PNEXT    : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
 
   defm AND_PPzPP   : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>;
   defm BIC_PPzPP   : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>;
 
@@ -687,6 +687,17 @@ class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
   let isCompare = 1;
 }
 
+multiclass sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op,
+                         SDPatternOperator op_any> {
+  def NAME : sve_int_ptest<opc, asm, op>;
+
+  let hasNoSchedulingInfo = 1, isCompare = 1, Defs = [NZCV] in {
+  def _ANY : Pseudo<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
+                    [(op_any (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>,
+             PseudoInstExpansion<(!cast<Instruction>(NAME) PPRAny:$Pg, PPR8:$Pn)>;
+  }
+}
+
 class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
                           PPRRegOp pprty>
 : I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),
 
@@ -20,8 +20,7 @@ define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
 define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpeq_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpeq_imm_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_wide_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
 define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpeq_wide_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
 
@@ -20,8 +20,7 @@ define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
 define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpge_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpge p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpge_imm_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_wide_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpge p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpge p0.h, p0/z, z0.h, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
 define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpge_wide_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpge p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
 
@@ -20,8 +20,7 @@ define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
 define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmpgt_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpgt p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpgt_imm_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_wide_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpgt p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpgt p0.h, p0/z, z0.h, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
 define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmpgt_wide_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmpgt p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmpgt p0.s, p0/z, z0.s, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
 
@@ -20,8 +20,7 @@ define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
 define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphi_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmphi p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -38,9 +37,7 @@ define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmphi_imm_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -69,8 +66,7 @@ define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_wide_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmphi p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmphi p0.h, p0/z, z0.h, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -84,8 +80,7 @@ define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
 define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphi_wide_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmphi p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmphi p0.s, p0/z, z0.s, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
 
@@ -20,8 +20,7 @@ define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
 define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmphs_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmphs p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,9 +36,7 @@ define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmphs_imm_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -68,8 +65,7 @@ define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_wide_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmphs p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmphs p0.h, p0/z, z0.h, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -83,8 +79,7 @@ define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
 define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cmphs_wide_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmphs p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cmphs p0.s, p0/z, z0.s, z1.d
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)