-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[PPC] Add custom lowering for uaddo #110137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.
@llvm/pr-subscribers-backend-powerpc Author: Zaara Syeda (syzaara) ChangesImprove the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering. Full diff: https://github.com/llvm/llvm-project/pull/110137.diff 6 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d9847a21489e63..c3497314b91e94 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -198,6 +198,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
}
+ if (!Subtarget.hasP10Vector())
+ setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
+
// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,11 +11970,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("ERROR:Should return for all cases within swtich.");
}
+SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
+ // Default to target independent lowering if there is a logical user of the
+ // carry-bit.
+ for (SDNode *U : Op->uses()) {
+ if (U->getOpcode() == ISD::SELECT || ISD::isBitwiseLogicOp(U->getOpcode()))
+ return SDValue();
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Default to target independent lowering for special cases handled there.
+ if (isOneConstant(RHS) || isAllOnesConstant(RHS))
+ return SDValue();
+
+ EVT VT = Op.getNode()->getValueType(0);
+ bool is64Bit = Subtarget.isPPC64();
+
+ SDValue ADDC;
+ SDValue Overflow;
+ SDVTList VTs = Op.getNode()->getVTList();
+
+ ADDC = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDC8 : PPC::ADDC, dl, VT,
+ MVT::Glue, LHS, RHS),
+ 0);
+ SDValue Li = SDValue(DAG.getMachineNode(is64Bit ? PPC::LI8 : PPC::LI, dl, VT,
+ DAG.getTargetConstant(0, dl, VT)),
+ 0);
+ Overflow = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDZE8 : PPC::ADDZE, dl,
+ VT, MVT::Glue, Li, ADDC.getValue(1)),
+ 0);
+ SDValue OverflowTrunc =
+ DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+ SDValue Res =
+ DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
+ return Res;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::UADDO:
+ return LowerUaddo(Op, DAG);
case ISD::FPOW: return lowerPow(Op, DAG);
case ISD::FSIN: return lowerSin(Op, DAG);
case ISD::FCOS: return lowerCos(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 8907c3c5a81c3c..7285f6de4728d5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1277,6 +1277,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index b8abee76cdfa80..362eaf33ba96a8 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -139,6 +139,8 @@ struct PPCMIPeephole : public MachineFunctionPass {
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
MachineInstr *MI);
+ bool eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
+ MachineInstr *&ToErase);
// A number of transformations will eliminate the definition of a register
// as all of its uses will be removed. However, this leaves a register
// without a definition for LiveVariables. Such transformations should
@@ -1071,6 +1073,18 @@ bool PPCMIPeephole::simplifyCode() {
break;
}
case PPC::RLDICL: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ // We can eliminate clearing the left 63 bits when only the carry-bit is
+ // set.
+ if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
+ Simplified = true;
+ break;
+ }
+
// We can eliminate RLDICL (e.g. for zero-extension)
// if all bits to clear are already zero in the input.
// This code assume following code sequence for zero-extension.
@@ -1082,11 +1096,6 @@ bool PPCMIPeephole::simplifyCode() {
if (MI.getOperand(2).getImm() != 0)
break;
- Register SrcReg = MI.getOperand(1).getReg();
- if (!SrcReg.isVirtual())
- break;
-
- MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
break;
@@ -1277,7 +1286,15 @@ bool PPCMIPeephole::simplifyCode() {
Simplified = true;
break;
}
- case PPC::RLWINM:
+ case PPC::RLWINM: {
+ // We can eliminate clearing the left 31 bits when only the carry-bit is
+ // set.
+ if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
+ Simplified = true;
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
case PPC::RLWINM_rec:
case PPC::RLWINM8:
case PPC::RLWINM8_rec: {
@@ -1889,6 +1906,38 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
return Simplified;
}
+bool PPCMIPeephole::eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
+ MachineInstr *&ToErase) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ return false;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ bool Is64Bit = MI.getOpcode() == PPC::RLDICL;
+ int Imm = Is64Bit ? 63 : 31;
+ if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != Imm)
+ return false;
+ if (SrcMI->getOpcode() != (Is64Bit ? PPC::ADDZE8 : PPC::ADDZE))
+ return false;
+ MachineInstr *LI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ if (LI->getOpcode() != (Is64Bit ? PPC::LI8 : PPC::LI))
+ return false;
+ if (LI->getOperand(1).getImm() != 0 || MI.getOperand(2).getImm() != 0)
+ return false;
+ Register NewReg = SrcMI->getOperand(0).getReg();
+ ToErase = &MI;
+ Register MIDestReg = MI.getOperand(0).getReg();
+ for (MachineInstr &UseMI : MRI->use_instructions(MIDestReg)) {
+ for (MachineOperand &MO : UseMI.operands()) {
+ if (MO.isReg() && MO.getReg() == MIDestReg) {
+ MO.setReg(NewReg);
+ addRegToUpdate(NewReg);
+ break;
+ }
+ }
+ }
+ return true;
+}
// We miss the opportunity to emit an RLDIC when lowering jump tables
// since ISEL sees only a single basic block. When selecting, the clear
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index f699ea54192d88..8fff2c28da245e 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
; CHECK: # %bb.0:
-; CHECK-NEXT: li 5, -43
; CHECK-NEXT: addi 4, 3, 42
-; CHECK-NEXT: cmpld 3, 5
+; CHECK-NEXT: cmpld 4, 3
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: iselgt 3, 3, 4
+; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i64 %x, 42
%c = icmp ugt i64 %x, -43
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-32.ll b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
new file mode 100644
index 00000000000000..7c741e3618e6fc
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/uaddo-32.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: addc 3, 3, 4
+; CHECK-NEXT: addze 4, 6
+; CHECK-NEXT: stw 4, 0(5)
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ %1 = extractvalue { i32, i1 } %0, 1
+ %2 = extractvalue { i32, i1 } %0, 0
+ %3 = zext i1 %1 to i32
+ store i32 %3, ptr %ovf, align 8
+ ret i32 %2
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
+define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; CHECK-LABEL: add_overflow:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: addc 4, 3, 4
+; CHECK-NEXT: addze 3, 6
+; CHECK-NEXT: stw 4, 0(5)
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ %1 = extractvalue { i32, i1 } %0, 1
+ %2 = extractvalue { i32, i1 } %0, 0
+ store i32 %2, ptr %ovf, align 8
+ ret i1 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/uaddo-64.ll b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
new file mode 100644
index 00000000000000..ef4eccd329d9f1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/uaddo-64.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64
+
+define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; PPC64-LABEL: add:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: li 6, 0
+; PPC64-NEXT: addc 3, 3, 4
+; PPC64-NEXT: addze 4, 6
+; PPC64-NEXT: std 4, 0(5)
+; PPC64-NEXT: blr
+entry:
+ %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %1 = extractvalue { i64, i1 } %0, 1
+ %2 = extractvalue { i64, i1 } %0, 0
+ %3 = zext i1 %1 to i64
+ store i64 %3, ptr %ovf, align 8
+ ret i64 %2
+}
+
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
+
+define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
+; PPC64-LABEL: add_overflow:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: li 6, 0
+; PPC64-NEXT: addc 4, 3, 4
+; PPC64-NEXT: addze 3, 6
+; PPC64-NEXT: std 4, 0(5)
+; PPC64-NEXT: blr
+entry:
+ %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %1 = extractvalue { i64, i1 } %0, 1
+ %2 = extractvalue { i64, i1 } %0, 0
+ store i64 %2, ptr %ovf, align 8
+ ret i1 %1
+}
|
SDValue Overflow; | ||
SDVTList VTs = Op.getNode()->getVTList(); | ||
|
||
ADDC = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDC8 : PPC::ADDC, dl, VT, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please avoid getMachineNode() during legalization; add appropriate PPCISD nodes if necessary. (Or maybe you can just use ISD::ADDC/ISD::ADDE.)
@@ -198,6 +198,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, | |||
} | |||
} | |||
|
|||
if (!Subtarget.hasP10Vector()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is hasP10Vector() related to the instructions in question? Do we have testcases for what's generated when hasP10Vector() is enabled?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks fine from my side, but I don't really have much context for PPC instruction sets, so a PPC expert should also look at it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to do an NFC patch to add in the new testcase so we can see the code gen improvements in this patch?
/// LowerOperation - Provide custom lowering hooks for some operations. | ||
/// | ||
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | ||
switch (Op.getOpcode()) { | ||
default: llvm_unreachable("Wasn't expecting to be able to lower this!"); | ||
case ISD::UADDO: | ||
return LowerUaddo(Op, DAG); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should keep the same formating as the rest of the case statements?
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s | ||
; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these IR not valid for 64bit? I would think we can just have 1 tc and make sure we test for both 32 and 64bit on Linux and AIX.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is valid, but I'm only custom lowering @llvm.uadd.with.overflow.i32 for 32-bit mode and @llvm.uadd.with.overflow.i64 for 64-bit mode. So I didn't add the checks for the other modes as its the default lowering.
You can test this locally with the following command:git-clang-format --diff 2b0a708f41dd6291ee744704d43febc975e3d026 445dd4381bb77c931deb9929ac64b95d24459f11 --extensions h,cpp -- llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h View the diff from clang-format here.diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c2e4281363..a5aefcc0b8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12013,7 +12013,8 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
- case ISD::UADDO: return LowerUaddo(Op, DAG);
+ case ISD::UADDO:
+ return LowerUaddo(Op, DAG);
case ISD::FPOW: return lowerPow(Op, DAG);
case ISD::FSIN: return lowerSin(Op, DAG);
case ISD::FCOS: return lowerCos(Op, DAG);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.