Skip to content

[PPC] Add custom lowering for uaddo #110137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
}

setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);

// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
Expand Down Expand Up @@ -11967,11 +11969,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("ERROR:Should return for all cases within swtich.");
}

SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
// Default to target independent lowering if there is a logical user of the
// carry-bit.
for (SDNode *U : Op->uses()) {
if (U->getOpcode() == ISD::SELECT)
return SDValue();
if (ISD::isBitwiseLogicOp(U->getOpcode())) {
for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
if (U->getOperand(i).getOpcode() != ISD::UADDO &&
U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
return SDValue();
}
}
}
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDLoc dl(Op);

// Default to target independent lowering for special cases handled there.
if (isOneConstant(RHS) || isAllOnesConstant(RHS))
return SDValue();

EVT VT = Op.getNode()->getValueType(0);

SDValue ADDC;
SDValue Overflow;
SDVTList VTs = Op.getNode()->getVTList();

ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
ADDC.getValue(1));
SDValue OverflowTrunc =
DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
SDValue Res =
DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
return Res;
}

/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
case ISD::UADDO: return LowerUaddo(Op, DAG);
case ISD::FPOW: return lowerPow(Op, DAG);
case ISD::FSIN: return lowerSin(Op, DAG);
case ISD::FCOS: return lowerCos(Op, DAG);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/PowerPC/sat-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
; CHECK: # %bb.0:
; CHECK-NEXT: li 5, -43
; CHECK-NEXT: addi 4, 3, 42
; CHECK-NEXT: cmpld 3, 5
; CHECK-NEXT: cmpld 4, 3
; CHECK-NEXT: li 3, -1
; CHECK-NEXT: iselgt 3, 3, 4
; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i64 %x, 42
%c = icmp ugt i64 %x, -43
Expand Down
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/PowerPC/uaddo-32.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these IR not valid for 64bit? I would think we can just have 1 tc and make sure we test for both 32 and 64bit on Linux and AIX.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is valid, but I'm only custom lowering @llvm.uadd.with.overflow.i32 for 32-bit mode and @llvm.uadd.with.overflow.i64 for 64-bit mode. So I didn't add the checks for the other modes as its the default lowering.

define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
; CHECK-LABEL: add:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 6, 0
; CHECK-NEXT: addc 3, 3, 4
; CHECK-NEXT: addze 4, 6
; CHECK-NEXT: stw 4, 0(5)
; CHECK-NEXT: blr
entry:
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %0, 1
%2 = extractvalue { i32, i1 } %0, 0
%3 = zext i1 %1 to i32
store i32 %3, ptr %ovf, align 8
ret i32 %2
}

declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)

define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
; CHECK-LABEL: add_overflow:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 6, 0
; CHECK-NEXT: addc 4, 3, 4
; CHECK-NEXT: addze 3, 6
; CHECK-NEXT: stw 4, 0(5)
; CHECK-NEXT: blr
entry:
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %0, 1
%2 = extractvalue { i32, i1 } %0, 0
store i32 %2, ptr %ovf, align 8
ret i1 %1
}
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/PowerPC/uaddo-64.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s

define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
; CHECK-LABEL: add:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 6, 0
; CHECK-NEXT: addc 3, 3, 4
; CHECK-NEXT: addze 4, 6
; CHECK-NEXT: std 4, 0(5)
; CHECK-NEXT: blr
entry:
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%1 = extractvalue { i64, i1 } %0, 1
%2 = extractvalue { i64, i1 } %0, 0
%3 = zext i1 %1 to i64
store i64 %3, ptr %ovf, align 8
ret i64 %2
}

declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)

define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
; CHECK-LABEL: add_overflow:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 6, 0
; CHECK-NEXT: addc 4, 3, 4
; CHECK-NEXT: addze 3, 6
; CHECK-NEXT: std 4, 0(5)
; CHECK-NEXT: blr
entry:
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%1 = extractvalue { i64, i1 } %0, 1
%2 = extractvalue { i64, i1 } %0, 0
store i64 %2, ptr %ovf, align 8
ret i1 %1
}

define noundef i64 @addWithCarryIn (i64 noundef %a, i64 noundef %b, i64 noundef %c, ptr nocapture noundef writeonly %ovf) {
; CHECK-LABEL: addWithCarryIn:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: addc 3, 3, 4
; CHECK-NEXT: addze 4, 7
; CHECK-NEXT: addc 3, 3, 5
; CHECK-NEXT: addze 5, 7
; CHECK-NEXT: or 4, 4, 5
; CHECK-NEXT: std 4, 0(6)
; CHECK-NEXT: blr
entry:
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%1 = extractvalue { i64, i1 } %0, 1
%2 = extractvalue { i64, i1 } %0, 0
%3 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %2, i64 %c)
%4 = extractvalue { i64, i1 } %3, 1
%5 = extractvalue { i64, i1 } %3, 0
%6 = or i1 %1, %4
%7 = zext i1 %6 to i64
store i64 %7, ptr %ovf, align 8
ret i64 %5
}
Loading