Skip to content

Commit ce992b4

Browse files
committed
[PowerPC] Add custom lowering for ssubo
This patch is to improve the codegen for ssubo node for i32 in 64-bit mode by custom lowering.
1 parent fbb8234 commit ce992b4

File tree

3 files changed

+42
-6
lines changed

3 files changed

+42
-6
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
198198
}
199199
}
200200

201+
if (!Subtarget.hasP10Vector() && isPPC64) {
202+
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
203+
}
204+
201205
// Match BITREVERSE to customized fast code sequence in the td file.
202206
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
203207
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,6 +11971,36 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
1196711971
llvm_unreachable("ERROR:Should return for all cases within swtich.");
1196811972
}
1196911973

11974+
SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
11975+
11976+
SDLoc dl(Op);
11977+
11978+
SDValue LHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(0));
11979+
SDValue RHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(1));
11980+
11981+
SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i64, LHS64, RHS64);
11982+
11983+
SDValue Extsw = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, Sub,
11984+
DAG.getValueType(MVT::i32));
11985+
11986+
SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i64, Extsw, Sub);
11987+
11988+
SDValue Addic = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(MVT::i64, MVT::Glue),
11989+
Xor, DAG.getConstant(-1, dl, MVT::i64));
11990+
11991+
SDValue Overflow =
11992+
DAG.getNode(ISD::SUBE, dl, DAG.getVTList(MVT::i64, MVT::Glue), Xor, Addic,
11993+
Addic.getValue(1));
11994+
11995+
SDValue OverflowTrunc =
11996+
DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
11997+
SDValue SubTrunc =
11998+
(Sub->getValueType(0) != Op.getNode()->getValueType(0))
11999+
? DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(0), Sub)
12000+
: Sub;
12001+
return DAG.getMergeValues({SubTrunc, OverflowTrunc}, dl);
12002+
}
12003+
1197012004
/// LowerOperation - Provide custom lowering hooks for some operations.
1197112005
///
1197212006
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11988,6 +12022,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1198812022
case ISD::SETCC: return LowerSETCC(Op, DAG);
1198912023
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
1199012024
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12025+
case ISD::SSUBO:
12026+
return LowerSSUBO(Op, DAG);
1199112027

1199212028
case ISD::INLINEASM:
1199312029
case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,6 +1278,7 @@ namespace llvm {
12781278
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
12791279
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
12801280
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1281+
SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
12811282
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
12821283
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
12831284
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/PowerPC/saddo-ssubo.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,11 @@ entry:
129129
define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind {
130130
; CHECK-LABEL: test_ssubo_i32:
131131
; CHECK: # %bb.0: # %entry
132-
; CHECK-NEXT: sub 5, 3, 4
133-
; CHECK-NEXT: cmpwi 1, 4, 0
134-
; CHECK-NEXT: cmpw 5, 3
135-
; CHECK-NEXT: li 3, 1
136-
; CHECK-NEXT: creqv 20, 5, 0
137-
; CHECK-NEXT: isel 3, 0, 3, 20
132+
; CHECK-NEXT: sub 3, 3, 4
133+
; CHECK-NEXT: extsw 4, 3
134+
; CHECK-NEXT: xor 3, 4, 3
135+
; CHECK-NEXT: addic 4, 3, -1
136+
; CHECK-NEXT: subfe 3, 4, 3
138137
; CHECK-NEXT: blr
139138
entry:
140139
%res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind

0 commit comments

Comments
 (0)