[PPC] Add custom lowering for uaddo (#110137)

syzaara · web-flow · commit c5ca1b8626db · 2024-10-21T11:13:16.000-04:00
Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in
32-bit mode by custom lowering.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -198,6 +198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     }
   }
 
+  setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
+
   // Match BITREVERSE to customized fast code sequence in the td file.
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,11 +11969,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("ERROR:Should return for all cases within swtich.");
 }
 
+SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
+  // Default to target independent lowering if there is a logical user of the
+  // carry-bit.
+  for (SDNode *U : Op->uses()) {
+    if (U->getOpcode() == ISD::SELECT)
+      return SDValue();
+    if (ISD::isBitwiseLogicOp(U->getOpcode())) {
+      for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
+        if (U->getOperand(i).getOpcode() != ISD::UADDO &&
+            U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
+          return SDValue();
+      }
+    }
+  }
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDLoc dl(Op);
+
+  // Default to target independent lowering for special cases handled there.
+  if (isOneConstant(RHS) || isAllOnesConstant(RHS))
+    return SDValue();
+
+  EVT VT = Op.getNode()->getValueType(0);
+
+  SDValue ADDC;
+  SDValue Overflow;
+  SDVTList VTs = Op.getNode()->getVTList();
+
+  ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
+  Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
+                         DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
+                         ADDC.getValue(1));
+  SDValue OverflowTrunc =
+      DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+  SDValue Res =
+      DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
+  return Res;
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::UADDO:              return LowerUaddo(Op, DAG);
   case ISD::FPOW:               return lowerPow(Op, DAG);
   case ISD::FSIN:               return lowerSin(Op, DAG);
   case ISD::FCOS:               return lowerCos(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1277,6 +1277,7 @@ namespace llvm {
     SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
 define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, -43
 ; CHECK-NEXT:    addi 4, 3, 42
-; CHECK-NEXT:    cmpld 3, 5
+; CHECK-NEXT:    cmpld 4, 3
 ; CHECK-NEXT:    li 3, -1
-; CHECK-NEXT:    iselgt 3, 3, 4
+; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i64 %x, 42
   %c = icmp ugt i64 %x, -43
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-32.ll b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s
+
+define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    addc 3, 3, 4
+; CHECK-NEXT:    addze 4, 6
+; CHECK-NEXT:    stw 4, 0(5)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  %2 = extractvalue { i32, i1 } %0, 0
+  %3 = zext i1 %1 to i32
+  store i32 %3, ptr %ovf, align 8
+  ret i32 %2
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
+define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add_overflow:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    addc 4, 3, 4
+; CHECK-NEXT:    addze 3, 6
+; CHECK-NEXT:    stw 4, 0(5)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  %2 = extractvalue { i32, i1 } %0, 0
+  store i32 %2, ptr %ovf, align 8
+  ret i1 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-64.ll b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s
+
+define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    addc 3, 3, 4
+; CHECK-NEXT:    addze 4, 6
+; CHECK-NEXT:    std 4, 0(5)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %1 = extractvalue { i64, i1 } %0, 1
+  %2 = extractvalue { i64, i1 } %0, 0
+  %3 = zext i1 %1 to i64
+  store i64 %3, ptr %ovf, align 8
+  ret i64 %2
+}
+
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
+
+define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add_overflow:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    addc 4, 3, 4
+; CHECK-NEXT:    addze 3, 6
+; CHECK-NEXT:    std 4, 0(5)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %1 = extractvalue { i64, i1 } %0, 1
+  %2 = extractvalue { i64, i1 } %0, 0
+  store i64 %2, ptr %ovf, align 8
+  ret i1 %1
+}
+
+define noundef i64 @addWithCarryIn (i64 noundef %a, i64 noundef %b, i64 noundef %c, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: addWithCarryIn:
+; CHECK:       # %bb.0:                                # %entry
+; CHECK-NEXT:    li 7, 0
+; CHECK-NEXT:    addc 3, 3, 4
+; CHECK-NEXT:    addze 4, 7
+; CHECK-NEXT:    addc 3, 3, 5
+; CHECK-NEXT:    addze 5, 7
+; CHECK-NEXT:    or 4, 4, 5
+; CHECK-NEXT:    std 4, 0(6)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %1 = extractvalue { i64, i1 } %0, 1
+  %2 = extractvalue { i64, i1 } %0, 0
+  %3 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %2, i64 %c)
+  %4 = extractvalue { i64, i1 } %3, 1
+  %5 = extractvalue { i64, i1 } %3, 0
+  %6 = or i1 %1, %4
+  %7 = zext i1 %6 to i64
+  store i64 %7, ptr %ovf, align 8
+  ret i64 %5
+}