[AArch64] Use isKnownNonZero to optimize eligible compares to cmn

AZero13 · AZero13 · commit 643e797edea8 · 2024-07-16T16:31:50.000-04:00
Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. Also do not consider right-hand operands whose absolute value can be encoded into a cmn. adds 0 and sub 0 differ when carry handling, which is useful in unsigned comparisons. The problematic case for unsigned comparisons occurs only when the second argument is zero. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3385,19 +3385,26 @@ static bool isLegalArithImmed(uint64_t C) {
   return IsLegal;
 }
 
+static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
+  KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
+  return !KnownSrc.getSignedMinValue().isMinSignedValue();
+}
+
 // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
 // can be set differently by this operation. It comes down to whether
 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
 // everything is fine. If not then the optimization is wrong. Thus general
 // comparisons are only valid if op2 != 0.
 //
-// So, finally, the only LLVM-native comparisons that don't mention C and V
-// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
-// the absence of information about op2.
-static bool isCMN(SDValue Op, ISD::CondCode CC) {
+// So, finally, the only LLVM-native comparisons that don't mention C or V
+// are the ones that aren't unsigned comparisons. They're the only ones we can
+// safely use CMN for in the absence of information about op2.
+static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
   return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
-         (CC == ISD::SETEQ || CC == ISD::SETNE);
+         (isIntEqualitySetCC(CC) ||
+          (isUnsignedIntSetCC(CC) ? DAG.isKnownNeverZero(Op.getOperand(1))
+                                  : cannotBeIntMin(Op.getOperand(1), DAG)));
 }
 
 static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3442,11 +3449,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // register to WZR/XZR if it ends up being unused.
   unsigned Opcode = AArch64ISD::SUBS;
 
-  if (isCMN(RHS, CC)) {
+  if (isCMN(RHS, CC, DAG)) {
     // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
-  } else if (isCMN(LHS, CC)) {
+  } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+             isIntEqualitySetCC(CC)) {
     // As we are looking for EQ/NE compares, the operands can be commuted ; can
     // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
@@ -3548,13 +3556,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
       Opcode = AArch64ISD::CCMN;
       RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
     }
-  } else if (RHS.getOpcode() == ISD::SUB) {
-    SDValue SubOp0 = RHS.getOperand(0);
-    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-      // See emitComparison() on why we can only do this for SETEQ and SETNE.
-      Opcode = AArch64ISD::CCMN;
-      RHS = RHS.getOperand(1);
-    }
+  } else if (isCMN(RHS, CC, DAG)) {
+    Opcode = AArch64ISD::CCMN;
+    RHS = RHS.getOperand(1);
+  } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+             isIntEqualitySetCC(CC)) {
+    // As we are looking for EQ/NE compares, the operands can be commuted ; can
+    // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
+    Opcode = AArch64ISD::CCMN;
+    LHS = LHS.getOperand(1);
   }
   if (Opcode == 0)
     Opcode = AArch64ISD::CCMP;
@@ -3872,8 +3882,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   //    cmp     w12, w11, lsl #1
   if (!isa<ConstantSDNode>(RHS) ||
       !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
-    bool LHSIsCMN = isCMN(LHS, CC);
-    bool RHSIsCMN = isCMN(RHS, CC);
+    bool LHSIsCMN = isCMN(LHS, CC, DAG);
+    bool RHSIsCMN = isCMN(RHS, CC, DAG);
     SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
     SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
 
@@ -3886,7 +3896,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 
   SDValue Cmp;
   AArch64CC::CondCode AArch64CC;
-  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
+  if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
     const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
 
     // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
 
 ; (b > -(d | 1) && a < c)
 define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK-LABEL: neg_range_int_comp:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w3, #0x1
-; CHECK-NEXT:    cmp w0, w2
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    ccmp w1, w8, #4, lt
-; CHECK-NEXT:    csel w0, w1, w0, gt
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: neg_range_int_comp:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    orr w8, w3, #0x1
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    ccmn w1, w8, #4, lt
+; SDISEL-NEXT:    csel w0, w1, w0, gt
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: neg_range_int_comp:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    orr w8, w3, #0x1
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    neg w8, w8
+; GISEL-NEXT:    ccmp w1, w8, #4, lt
+; GISEL-NEXT:    csel w0, w1, w0, gt
+; GISEL-NEXT:    ret
   %dor = or i32 %d, 1
   %negd = sub i32 0, %dor
   %cmp = icmp sgt i32 %b, %negd
@@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
 
 ; (b >u -(d | 1) && a < c)
 define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK-LABEL: neg_range_int_comp_u:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w3, #0x1
-; CHECK-NEXT:    cmp w0, w2
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    ccmp w1, w8, #0, lt
-; CHECK-NEXT:    csel w0, w1, w0, hi
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: neg_range_int_comp_u:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    orr w8, w3, #0x1
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    ccmn w1, w8, #0, lt
+; SDISEL-NEXT:    csel w0, w1, w0, hi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: neg_range_int_comp_u:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    orr w8, w3, #0x1
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    neg w8, w8
+; GISEL-NEXT:    ccmp w1, w8, #0, lt
+; GISEL-NEXT:    csel w0, w1, w0, hi
+; GISEL-NEXT:    ret
   %dor = or i32 %d, 1
   %negd = sub i32 0, %dor
   %cmp = icmp ugt i32 %b, %negd
@@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
 
 ; (b > -(d | 1) && a u < c)
 define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK-LABEL: neg_range_int_comp_ua:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w3, #0x1
-; CHECK-NEXT:    cmp w0, w2
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    ccmp w1, w8, #4, lo
-; CHECK-NEXT:    csel w0, w1, w0, gt
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: neg_range_int_comp_ua:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    orr w8, w3, #0x1
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    ccmn w1, w8, #4, lo
+; SDISEL-NEXT:    csel w0, w1, w0, gt
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: neg_range_int_comp_ua:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    orr w8, w3, #0x1
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    neg w8, w8
+; GISEL-NEXT:    ccmp w1, w8, #4, lo
+; GISEL-NEXT:    csel w0, w1, w0, gt
+; GISEL-NEXT:    ret
   %dor = or i32 %d, 1
   %negd = sub i32 0, %dor
   %cmp = icmp sgt i32 %b, %negd
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -266,9 +266,8 @@ define i32 @or_neg(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    cmp w8, w1
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cset w0, lt
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
   %4 = sub i32 0, %3
@@ -281,9 +280,8 @@ define i32 @or_neg_ult(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg_ult:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, #0x1
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    cmp w8, w1
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %3 = or i32 %x, 1
   %4 = sub i32 0, %3
@@ -326,9 +324,8 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
 ; CHECK-LABEL: or_neg_no_smin_but_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bic w8, w0, w0, asr #31
-; CHECK-NEXT:    neg w8, w8
-; CHECK-NEXT:    cmp w8, w1
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cmn w1, w8
+; CHECK-NEXT:    cset w0, lt
 ; CHECK-NEXT:    ret
   %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
   %4 = sub i32 0, %3