[PPC] Add custom lowering for uaddo #110137

syzaara · 2024-09-26T16:04:08Z

Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.

llvmbot · 2024-09-26T16:04:45Z

@llvm/pr-subscribers-backend-powerpc

Author: Zaara Syeda (syzaara)

Changes

Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.

Full diff: https://github.com/llvm/llvm-project/pull/110137.diff

6 Files Affected:

(modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+43)
(modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+1)
(modified) llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (+55-6)
(modified) llvm/test/CodeGen/PowerPC/sat-add.ll (+2-3)
(added) llvm/test/CodeGen/PowerPC/uaddo-32.ll (+37)
(added) llvm/test/CodeGen/PowerPC/uaddo-64.ll (+37)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d9847a21489e63..c3497314b91e94 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -198,6 +198,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     }
   }
 
+  if (!Subtarget.hasP10Vector())
+    setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
+
   // Match BITREVERSE to customized fast code sequence in the td file.
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,11 +11970,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("ERROR:Should return for all cases within swtich.");
 }
 
+SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
+  // Default to target independent lowering if there is a logical user of the
+  // carry-bit.
+  for (SDNode *U : Op->uses()) {
+    if (U->getOpcode() == ISD::SELECT || ISD::isBitwiseLogicOp(U->getOpcode()))
+      return SDValue();
+  }
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDLoc dl(Op);
+
+  // Default to target independent lowering for special cases handled there.
+  if (isOneConstant(RHS) || isAllOnesConstant(RHS))
+    return SDValue();
+
+  EVT VT = Op.getNode()->getValueType(0);
+  bool is64Bit = Subtarget.isPPC64();
+
+  SDValue ADDC;
+  SDValue Overflow;
+  SDVTList VTs = Op.getNode()->getVTList();
+
+  ADDC = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDC8 : PPC::ADDC, dl, VT,
+                                    MVT::Glue, LHS, RHS),
+                 0);
+  SDValue Li = SDValue(DAG.getMachineNode(is64Bit ? PPC::LI8 : PPC::LI, dl, VT,
+                                          DAG.getTargetConstant(0, dl, VT)),
+                       0);
+  Overflow = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDZE8 : PPC::ADDZE, dl,
+                                        VT, MVT::Glue, Li, ADDC.getValue(1)),
+                     0);
+  SDValue OverflowTrunc =
+      DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+  SDValue Res =
+      DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
+  return Res;
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::UADDO:
+    return LowerUaddo(Op, DAG);
   case ISD::FPOW:               return lowerPow(Op, DAG);
   case ISD::FSIN:               return lowerSin(Op, DAG);
   case ISD::FCOS:               return lowerCos(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 8907c3c5a81c3c..7285f6de4728d5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1277,6 +1277,7 @@ namespace llvm {
     SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index b8abee76cdfa80..362eaf33ba96a8 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -139,6 +139,8 @@ struct PPCMIPeephole : public MachineFunctionPass {
   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
                       MachineInstr *MI);
 
+  bool eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
+                                       MachineInstr *&ToErase);
   // A number of transformations will eliminate the definition of a register
   // as all of its uses will be removed. However, this leaves a register
   // without a definition for LiveVariables. Such transformations should
@@ -1071,6 +1073,18 @@ bool PPCMIPeephole::simplifyCode() {
         break;
       }
       case PPC::RLDICL: {
+        Register SrcReg = MI.getOperand(1).getReg();
+        if (!SrcReg.isVirtual())
+          break;
+
+        MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+        // We can eliminate clearing the left 63 bits when only the carry-bit is
+        // set.
+        if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
+          Simplified = true;
+          break;
+        }
+
         // We can eliminate RLDICL (e.g. for zero-extension)
         // if all bits to clear are already zero in the input.
         // This code assume following code sequence for zero-extension.
@@ -1082,11 +1096,6 @@ bool PPCMIPeephole::simplifyCode() {
         if (MI.getOperand(2).getImm() != 0)
           break;
 
-        Register SrcReg = MI.getOperand(1).getReg();
-        if (!SrcReg.isVirtual())
-          break;
-
-        MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
         if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
               SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
           break;
@@ -1277,7 +1286,15 @@ bool PPCMIPeephole::simplifyCode() {
         Simplified = true;
         break;
       }
-      case PPC::RLWINM:
+      case PPC::RLWINM: {
+        // We can eliminate clearing the left 31 bits when only the carry-bit is
+        // set.
+        if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
+          Simplified = true;
+          break;
+        }
+      }
+        LLVM_FALLTHROUGH;
       case PPC::RLWINM_rec:
       case PPC::RLWINM8:
       case PPC::RLWINM8_rec: {
@@ -1889,6 +1906,38 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
 
   return Simplified;
 }
+bool PPCMIPeephole::eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
+                                                    MachineInstr *&ToErase) {
+  Register SrcReg = MI.getOperand(1).getReg();
+  if (!SrcReg.isVirtual())
+    return false;
+  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+  bool Is64Bit = MI.getOpcode() == PPC::RLDICL;
+  int Imm = Is64Bit ? 63 : 31;
+  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != Imm)
+    return false;
+  if (SrcMI->getOpcode() != (Is64Bit ? PPC::ADDZE8 : PPC::ADDZE))
+    return false;
+  MachineInstr *LI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+  if (LI->getOpcode() != (Is64Bit ? PPC::LI8 : PPC::LI))
+    return false;
+  if (LI->getOperand(1).getImm() != 0 || MI.getOperand(2).getImm() != 0)
+    return false;
+  Register NewReg = SrcMI->getOperand(0).getReg();
+  ToErase = &MI;
+  Register MIDestReg = MI.getOperand(0).getReg();
+  for (MachineInstr &UseMI : MRI->use_instructions(MIDestReg)) {
+    for (MachineOperand &MO : UseMI.operands()) {
+      if (MO.isReg() && MO.getReg() == MIDestReg) {
+        MO.setReg(NewReg);
+        addRegToUpdate(NewReg);
+        break;
+      }
+    }
+  }
+  return true;
+}
 
 // We miss the opportunity to emit an RLDIC when lowering jump tables
 // since ISEL sees only a single basic block. When selecting, the clear
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index f699ea54192d88..8fff2c28da245e 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
 define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, -43
 ; CHECK-NEXT:    addi 4, 3, 42
-; CHECK-NEXT:    cmpld 3, 5
+; CHECK-NEXT:    cmpld 4, 3
 ; CHECK-NEXT:    li 3, -1
-; CHECK-NEXT:    iselgt 3, 3, 4
+; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i64 %x, 42
   %c = icmp ugt i64 %x, -43
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-32.ll b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
new file mode 100644
index 00000000000000..7c741e3618e6fc
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    addc 3, 3, 4
+; CHECK-NEXT:    addze 4, 6
+; CHECK-NEXT:    stw 4, 0(5)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  %2 = extractvalue { i32, i1 } %0, 0
+  %3 = zext i1 %1 to i32
+  store i32 %3, ptr %ovf, align 8
+  ret i32 %2
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
+define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add_overflow:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    addc 4, 3, 4
+; CHECK-NEXT:    addze 3, 6
+; CHECK-NEXT:    stw 4, 0(5)
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  %2 = extractvalue { i32, i1 } %0, 0
+  store i32 %2, ptr %ovf, align 8
+  ret i1 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-64.ll b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
new file mode 100644
index 00000000000000..ef4eccd329d9f1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64
+
+define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; PPC64-LABEL: add:
+; PPC64:       # %bb.0: # %entry
+; PPC64-NEXT:    li 6, 0
+; PPC64-NEXT:    addc 3, 3, 4
+; PPC64-NEXT:    addze 4, 6
+; PPC64-NEXT:    std 4, 0(5)
+; PPC64-NEXT:    blr
+entry:
+  %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %1 = extractvalue { i64, i1 } %0, 1
+  %2 = extractvalue { i64, i1 } %0, 0
+  %3 = zext i1 %1 to i64
+  store i64 %3, ptr %ovf, align 8
+  ret i64 %2
+}
+
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
+
+define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; PPC64-LABEL: add_overflow:
+; PPC64:       # %bb.0: # %entry
+; PPC64-NEXT:    li 6, 0
+; PPC64-NEXT:    addc 4, 3, 4
+; PPC64-NEXT:    addze 3, 6
+; PPC64-NEXT:    std 4, 0(5)
+; PPC64-NEXT:    blr
+entry:
+  %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %1 = extractvalue { i64, i1 } %0, 1
+  %2 = extractvalue { i64, i1 } %0, 0
+  store i64 %2, ptr %ovf, align 8
+  ret i1 %1
+}

efriedma-quic · 2024-09-26T21:35:11Z

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

+  SDValue Overflow;
+  SDVTList VTs = Op.getNode()->getVTList();
+
+  ADDC = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDC8 : PPC::ADDC, dl, VT,


Please avoid getMachineNode() during legalization; add appropriate PPCISD nodes if necessary. (Or maybe you can just use ISD::ADDC/ISD::ADDE.)

efriedma-quic · 2024-09-26T21:36:11Z

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -198,6 +198,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    }
  }

+  if (!Subtarget.hasP10Vector())


How is hasP10Vector() related to the instructions in question? Do we have testcases for what's generated when hasP10Vector() is enabled?

efriedma-quic

Looks fine from my side, but I don't really have much context for PPC instruction sets, so a PPC expert should also look at it.

llvm/test/CodeGen/PowerPC/uaddo-32.ll

llvm/test/CodeGen/PowerPC/uaddo-64.ll

lei137

Is it possible to do an NFC patch to add in the new testcase so we can see the code gen improvements in this patch?

lei137 · 2024-10-11T17:17:41Z

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  switch (Op.getOpcode()) {
  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::UADDO:
+    return LowerUaddo(Op, DAG);


Maybe we should keep the same formating as the rest of the case statements?

lei137 · 2024-10-11T17:19:51Z

llvm/test/CodeGen/PowerPC/uaddo-32.ll

+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s
+


Are these IR not valid for 64bit? I would think we can just have 1 tc and make sure we test for both 32 and 64bit on Linux and AIX.

It is valid, but I'm only custom lowering @llvm.uadd.with.overflow.i32 for 32-bit mode and @llvm.uadd.with.overflow.i64 for 64-bit mode. So I didn't add the checks for the other modes as its the default lowering.

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

github-actions · 2024-10-16T20:38:07Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 2b0a708f41dd6291ee744704d43febc975e3d026 445dd4381bb77c931deb9929ac64b95d24459f11 --extensions h,cpp -- llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h

View the diff from clang-format here.

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c2e4281363..a5aefcc0b8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12013,7 +12013,8 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
-  case ISD::UADDO:              return LowerUaddo(Op, DAG);
+  case ISD::UADDO:
+    return LowerUaddo(Op, DAG);
   case ISD::FPOW:               return lowerPow(Op, DAG);
   case ISD::FSIN:               return lowerSin(Op, DAG);
   case ISD::FCOS:               return lowerCos(Op, DAG);

maryammo

LGTM.

[PPC] Add custom lowering for uaddo

da682f2

Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.

syzaara requested review from amy-kwan, maryammo, mandlebug, kamaub, diggerlin, lei137, RolandF77 and stefanp-synopsys September 26, 2024 16:04

llvmbot added the backend:PowerPC label Sep 26, 2024

efriedma-quic reviewed Sep 26, 2024

View reviewed changes

Zaara Syeda added 2 commits October 2, 2024 13:54

Address review comments

69f6e12

Address review comments

cfe1347

syzaara requested a review from efriedma-quic October 7, 2024 17:10

efriedma-quic reviewed Oct 7, 2024

View reviewed changes

maryammo reviewed Oct 8, 2024

View reviewed changes

llvm/test/CodeGen/PowerPC/uaddo-32.ll Show resolved Hide resolved

llvm/test/CodeGen/PowerPC/uaddo-64.ll Show resolved Hide resolved

Address review comments

86a2ff1

lei137 reviewed Oct 11, 2024

View reviewed changes

maryammo reviewed Oct 11, 2024

View reviewed changes

llvm/lib/Target/PowerPC/PPCISelLowering.cpp Show resolved Hide resolved

Address review comments

445dd43

maryammo approved these changes Oct 16, 2024

View reviewed changes

syzaara merged commit c5ca1b8 into llvm:main Oct 21, 2024
5 of 8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[PPC] Add custom lowering for uaddo #110137

[PPC] Add custom lowering for uaddo #110137

Uh oh!

syzaara commented Sep 26, 2024

Uh oh!

llvmbot commented Sep 26, 2024

Uh oh!

efriedma-quic Sep 26, 2024

Uh oh!

efriedma-quic Sep 26, 2024

Uh oh!

efriedma-quic left a comment

Uh oh!

Uh oh!

Uh oh!

lei137 left a comment

Uh oh!

lei137 Oct 11, 2024

Uh oh!

lei137 Oct 11, 2024

Uh oh!

syzaara Oct 16, 2024

Uh oh!

Uh oh!

github-actions bot commented Oct 16, 2024

Uh oh!

maryammo left a comment

Uh oh!

Uh oh!

Uh oh!

[PPC] Add custom lowering for uaddo #110137

[PPC] Add custom lowering for uaddo #110137

Uh oh!

Conversation

syzaara commented Sep 26, 2024

Uh oh!

llvmbot commented Sep 26, 2024

Uh oh!

efriedma-quic Sep 26, 2024

Choose a reason for hiding this comment

Uh oh!

efriedma-quic Sep 26, 2024

Choose a reason for hiding this comment

Uh oh!

efriedma-quic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

lei137 left a comment

Choose a reason for hiding this comment

Uh oh!

lei137 Oct 11, 2024

Choose a reason for hiding this comment

Uh oh!

lei137 Oct 11, 2024

Choose a reason for hiding this comment

Uh oh!

syzaara Oct 16, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Oct 16, 2024

Uh oh!

maryammo left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!