llvm · fzou1 · Mar 12, 2025 · Dec 19, 2024 · Mar 10, 2025 · Mar 10, 2025
diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -36,7 +36,7 @@ class Ctest<bits<8> o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>:
 //===----------------------------------------------------------------------===//
 // CCMP Instructions
 //
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU], isCompare = 1 in {
   def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8,  GR8,  GR8>;
   def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD;
   def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>;
@@ -55,7 +55,7 @@ let SchedRW = [WriteALU] in {
   def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>;
 }
 
-let mayLoad = 1 in {
+let mayLoad = 1, isCompare = 1 in {
   let SchedRW = [WriteALU.Folded] in {
     def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD;
     def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>;

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4854,6 +4854,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   case X86::CMP32ri:
   case X86::CMP16ri:
   case X86::CMP8ri:
+  case X86::CCMP64ri32:
+  case X86::CCMP32ri:
+  case X86::CCMP16ri:
+  case X86::CCMP8ri:
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = 0;
     if (MI.getOperand(1).isImm()) {
@@ -4951,6 +4955,18 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
     }
     return false;
   }
+  case X86::CCMP64ri32:
+  case X86::CCMP32ri:
+  case X86::CCMP16ri:
+  case X86::CCMP8ri: {
+    // The CCMP instruction should not be optimized if the scc/dfv in it is not
+    // same as the one in previous CCMP instruction.
+    if ((FlagI.getOpcode() != OI.getOpcode()) ||
+        (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm()) ||
+        (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+      return false;
+    [[fallthrough]];
+  }
   case X86::CMP64ri32:
   case X86::CMP32ri:
   case X86::CMP16ri:

diff --git a/llvm/test/CodeGen/X86/apx/ccmp.ll b/llvm/test/CodeGen/X86/apx/ccmp.ll
@@ -1300,5 +1300,75 @@ if.end:                                           ; preds = %entry, %if.then
   ret void
 }
 
+define void @ccmp_continous_adjust_imm(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: ccmp_continous_adjust_imm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $2, %edi # encoding: [0x83,0xff,0x02]
+; CHECK-NEXT:    ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
+; CHECK-NEXT:    jg .LBB31_1 # encoding: [0x7f,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    jmp foo # TAILCALL
+; CHECK-NEXT:    # encoding: [0xeb,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK-NEXT:  .LBB31_1: # %if.end
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp_continous_adjust_imm:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpl $2, %edi # encoding: [0x83,0xff,0x02]
+; NDD-NEXT:    ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
+; NDD-NEXT:    jg .LBB31_1 # encoding: [0x7f,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %if.then
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    jmp foo # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; NDD-NEXT:  .LBB31_1: # %if.end
+; NDD-NEXT:    retq # encoding: [0xc3]
+entry:
+  %cmp = icmp slt i32 %a, 2
+  %cmp1 = icmp slt i32 %b, 2
+  %or.cond = and i1 %cmp, %cmp1
+  %cmp3 = icmp slt i32 %b, 3
+  %or.cond4 = and i1 %or.cond, %cmp3
+  br i1 %or.cond4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...) @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define i32 @ccmp_continous_nobranch_adjust_imm(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: ccmp_continous_nobranch_adjust_imm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
+; CHECK-NEXT:    ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
+; CHECK-NEXT:    setge %al # encoding: [0x0f,0x9d,0xc0]
+; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp_continous_nobranch_adjust_imm:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
+; NDD-NEXT:    ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
+; NDD-NEXT:    setge %al # encoding: [0x0f,0x9d,0xc0]
+; NDD-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
+entry:
+  %cmp = icmp sgt i32 %a, 1
+  %cmp1 = icmp slt i32 %b, 2
+  %cmp2 = icmp slt i32 %b, 3
+  %or1 = or i1 %cmp, %cmp1
+  %or2 = or i1 %or1, %cmp2
+  %. = zext i1 %or2 to i32
+  ret i32 %.
+}
+
 declare dso_local void @foo(...)
 declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone