Skip to content

Commit c5ca1b8

Browse files
authored
[PPC] Add custom lowering for uaddo (#110137)
Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.
1 parent 17e9752 commit c5ca1b8

File tree

5 files changed

+145
-3
lines changed

5 files changed

+145
-3
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
198198
}
199199
}
200200

201+
setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
202+
201203
// Match BITREVERSE to customized fast code sequence in the td file.
202204
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
203205
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,11 +11969,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
1196711969
llvm_unreachable("ERROR:Should return for all cases within swtich.");
1196811970
}
1196911971

11972+
SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
11973+
// Default to target independent lowering if there is a logical user of the
11974+
// carry-bit.
11975+
for (SDNode *U : Op->uses()) {
11976+
if (U->getOpcode() == ISD::SELECT)
11977+
return SDValue();
11978+
if (ISD::isBitwiseLogicOp(U->getOpcode())) {
11979+
for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
11980+
if (U->getOperand(i).getOpcode() != ISD::UADDO &&
11981+
U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
11982+
return SDValue();
11983+
}
11984+
}
11985+
}
11986+
SDValue LHS = Op.getOperand(0);
11987+
SDValue RHS = Op.getOperand(1);
11988+
SDLoc dl(Op);
11989+
11990+
// Default to target independent lowering for special cases handled there.
11991+
if (isOneConstant(RHS) || isAllOnesConstant(RHS))
11992+
return SDValue();
11993+
11994+
EVT VT = Op.getNode()->getValueType(0);
11995+
11996+
SDValue ADDC;
11997+
SDValue Overflow;
11998+
SDVTList VTs = Op.getNode()->getVTList();
11999+
12000+
ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
12001+
Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
12002+
DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
12003+
ADDC.getValue(1));
12004+
SDValue OverflowTrunc =
12005+
DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12006+
SDValue Res =
12007+
DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
12008+
return Res;
12009+
}
12010+
1197012011
/// LowerOperation - Provide custom lowering hooks for some operations.
1197112012
///
1197212013
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1197312014
switch (Op.getOpcode()) {
1197412015
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
12016+
case ISD::UADDO: return LowerUaddo(Op, DAG);
1197512017
case ISD::FPOW: return lowerPow(Op, DAG);
1197612018
case ISD::FSIN: return lowerSin(Op, DAG);
1197712019
case ISD::FCOS: return lowerCos(Op, DAG);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,7 @@ namespace llvm {
12771277
SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
12781278
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
12791279
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1280+
SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
12801281
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
12811282
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
12821283
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/PowerPC/sat-add.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
170170
define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
171171
; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
172172
; CHECK: # %bb.0:
173-
; CHECK-NEXT: li 5, -43
174173
; CHECK-NEXT: addi 4, 3, 42
175-
; CHECK-NEXT: cmpld 3, 5
174+
; CHECK-NEXT: cmpld 4, 3
176175
; CHECK-NEXT: li 3, -1
177-
; CHECK-NEXT: iselgt 3, 3, 4
176+
; CHECK-NEXT: isellt 3, 3, 4
178177
; CHECK-NEXT: blr
179178
%a = add i64 %x, 42
180179
%c = icmp ugt i64 %x, -43

llvm/test/CodeGen/PowerPC/uaddo-32.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
3+
; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s
4+
5+
define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
6+
; CHECK-LABEL: add:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: li 6, 0
9+
; CHECK-NEXT: addc 3, 3, 4
10+
; CHECK-NEXT: addze 4, 6
11+
; CHECK-NEXT: stw 4, 0(5)
12+
; CHECK-NEXT: blr
13+
entry:
14+
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
15+
%1 = extractvalue { i32, i1 } %0, 1
16+
%2 = extractvalue { i32, i1 } %0, 0
17+
%3 = zext i1 %1 to i32
18+
store i32 %3, ptr %ovf, align 8
19+
ret i32 %2
20+
}
21+
22+
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
23+
24+
define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
25+
; CHECK-LABEL: add_overflow:
26+
; CHECK: # %bb.0: # %entry
27+
; CHECK-NEXT: li 6, 0
28+
; CHECK-NEXT: addc 4, 3, 4
29+
; CHECK-NEXT: addze 3, 6
30+
; CHECK-NEXT: stw 4, 0(5)
31+
; CHECK-NEXT: blr
32+
entry:
33+
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
34+
%1 = extractvalue { i32, i1 } %0, 1
35+
%2 = extractvalue { i32, i1 } %0, 0
36+
store i32 %2, ptr %ovf, align 8
37+
ret i1 %1
38+
}

llvm/test/CodeGen/PowerPC/uaddo-64.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
3+
; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s
4+
5+
define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
6+
; CHECK-LABEL: add:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: li 6, 0
9+
; CHECK-NEXT: addc 3, 3, 4
10+
; CHECK-NEXT: addze 4, 6
11+
; CHECK-NEXT: std 4, 0(5)
12+
; CHECK-NEXT: blr
13+
entry:
14+
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
15+
%1 = extractvalue { i64, i1 } %0, 1
16+
%2 = extractvalue { i64, i1 } %0, 0
17+
%3 = zext i1 %1 to i64
18+
store i64 %3, ptr %ovf, align 8
19+
ret i64 %2
20+
}
21+
22+
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
23+
24+
define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
25+
; CHECK-LABEL: add_overflow:
26+
; CHECK: # %bb.0: # %entry
27+
; CHECK-NEXT: li 6, 0
28+
; CHECK-NEXT: addc 4, 3, 4
29+
; CHECK-NEXT: addze 3, 6
30+
; CHECK-NEXT: std 4, 0(5)
31+
; CHECK-NEXT: blr
32+
entry:
33+
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
34+
%1 = extractvalue { i64, i1 } %0, 1
35+
%2 = extractvalue { i64, i1 } %0, 0
36+
store i64 %2, ptr %ovf, align 8
37+
ret i1 %1
38+
}
39+
40+
define noundef i64 @addWithCarryIn (i64 noundef %a, i64 noundef %b, i64 noundef %c, ptr nocapture noundef writeonly %ovf) {
41+
; CHECK-LABEL: addWithCarryIn:
42+
; CHECK: # %bb.0: # %entry
43+
; CHECK-NEXT: li 7, 0
44+
; CHECK-NEXT: addc 3, 3, 4
45+
; CHECK-NEXT: addze 4, 7
46+
; CHECK-NEXT: addc 3, 3, 5
47+
; CHECK-NEXT: addze 5, 7
48+
; CHECK-NEXT: or 4, 4, 5
49+
; CHECK-NEXT: std 4, 0(6)
50+
; CHECK-NEXT: blr
51+
entry:
52+
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
53+
%1 = extractvalue { i64, i1 } %0, 1
54+
%2 = extractvalue { i64, i1 } %0, 0
55+
%3 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %2, i64 %c)
56+
%4 = extractvalue { i64, i1 } %3, 1
57+
%5 = extractvalue { i64, i1 } %3, 0
58+
%6 = or i1 %1, %4
59+
%7 = zext i1 %6 to i64
60+
store i64 %7, ptr %ovf, align 8
61+
ret i64 %5
62+
}

0 commit comments

Comments
 (0)