Skip to content

Commit 5745b6a

Browse files
committed
ARM: use target-specific SUBS node when combining cmp with cmov.
This has two positive effects. First, using a custom node prevents recombination leading to an infinite loop since the output DAG is notionally a little more complex than the input one. Using a flag-setting instruction also allows the subtraction to be folded with the related comparison more easily. https://reviews.llvm.org/D53190 llvm-svn: 348122
1 parent 3c7d062 commit 5745b6a

File tree

8 files changed

+62
-30
lines changed

8 files changed

+62
-30
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
12821282
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
12831283

12841284
case ARMISD::CMOV: return "ARMISD::CMOV";
1285+
case ARMISD::SUBS: return "ARMISD::SUBS";
12851286

12861287
case ARMISD::SSAT: return "ARMISD::SSAT";
12871288
case ARMISD::USAT: return "ARMISD::USAT";
@@ -12707,39 +12708,47 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
1270712708
DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
1270812709
Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
1270912710
}
12710-
} else if (CC == ARMCC::NE && LHS != RHS &&
12711+
} else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
1271112712
(!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
1271212713
// This seems pointless but will allow us to combine it further below.
12713-
// CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
12714-
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12714+
// CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
12715+
SDValue Sub =
12716+
DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
12717+
SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
12718+
Sub.getValue(1), SDValue());
1271512719
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
12716-
N->getOperand(3), Cmp);
12720+
N->getOperand(3), CPSRGlue.getValue(1));
12721+
FalseVal = Sub;
1271712722
}
1271812723
} else if (isNullConstant(TrueVal)) {
12719-
if (CC == ARMCC::EQ && LHS != RHS &&
12724+
if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
1272012725
(!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
1272112726
// This seems pointless but will allow us to combine it further below
1272212727
// Note that we change == for != as this is the dual for the case above.
12723-
// CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
12724-
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12728+
// CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
12729+
SDValue Sub =
12730+
DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
12731+
SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
12732+
Sub.getValue(1), SDValue());
1272512733
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
1272612734
DAG.getConstant(ARMCC::NE, dl, MVT::i32),
12727-
N->getOperand(3), Cmp);
12735+
N->getOperand(3), CPSRGlue.getValue(1));
12736+
FalseVal = Sub;
1272812737
}
1272912738
}
1273012739

1273112740
// On Thumb1, the DAG above may be further combined if z is a power of 2
1273212741
// (z == 2 ^ K).
12733-
// CMOV (SUB x, y), z, !=, (CMPZ x, y) ->
12742+
// CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
1273412743
// merge t3, t4
1273512744
// where t1 = (SUBCARRY (SUB x, y), z, 0)
1273612745
// t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
1273712746
// t3 = if K != 0 then (SHL t2:0, K) else t2:0
1273812747
// t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
1273912748
const APInt *TrueConst;
1274012749
if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
12741-
(FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
12742-
(FalseVal.getOperand(1) == RHS) &&
12750+
(FalseVal.getOpcode() == ARMISD::SUBS) &&
12751+
(FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
1274312752
(TrueConst = isPowerOf2Constant(TrueVal))) {
1274412753
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
1274512754
unsigned ShiftAmount = TrueConst->logBase2();

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class VectorType;
8585
FMSTAT, // ARM fmstat instruction.
8686

8787
CMOV, // ARM conditional move instructions.
88+
SUBS, // Flag-setting subtraction.
8889

8990
SSAT, // Signed saturation
9091
USAT, // Unsigned saturation

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
144144
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
145145
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
146146
[SDNPInGlue]>;
147+
def ARMsubs : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
147148

148149
def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
149150

@@ -3641,6 +3642,14 @@ let isAdd = 1 in
36413642
defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
36423643
defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
36433644

3645+
def : ARMPat<(ARMsubs GPR:$Rn, mod_imm:$imm), (SUBSri $Rn, mod_imm:$imm)>;
3646+
def : ARMPat<(ARMsubs GPR:$Rn, GPR:$Rm), (SUBSrr $Rn, $Rm)>;
3647+
def : ARMPat<(ARMsubs GPR:$Rn, so_reg_imm:$shift),
3648+
(SUBSrsi $Rn, so_reg_imm:$shift)>;
3649+
def : ARMPat<(ARMsubs GPR:$Rn, so_reg_reg:$shift),
3650+
(SUBSrsr $Rn, so_reg_reg:$shift)>;
3651+
3652+
36443653
let isAdd = 1 in
36453654
defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
36463655
defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;

llvm/lib/Target/ARM/ARMInstrThumb.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,12 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
13521352
Sched<[WriteALU]>;
13531353
}
13541354

1355+
1356+
def : T1Pat<(ARMsubs tGPR:$Rn, tGPR:$Rm), (tSUBSrr $Rn, $Rm)>;
1357+
def : T1Pat<(ARMsubs tGPR:$Rn, imm0_7:$imm3), (tSUBSi3 $Rn, imm0_7:$imm3)>;
1358+
def : T1Pat<(ARMsubs tGPR:$Rn, imm0_255:$imm8), (tSUBSi8 $Rn, imm0_255:$imm8)>;
1359+
1360+
13551361
// Sign-extend byte
13561362
def tSXTB : // A8.6.222
13571363
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),

llvm/lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,6 +2094,12 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>;
20942094
defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMaddc, 1>;
20952095
defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMsubc>;
20962096

2097+
def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_imm:$imm),
2098+
(t2SUBSri $Rn, t2_so_imm:$imm)>;
2099+
def : T2Pat<(ARMsubs GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>;
2100+
def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
2101+
(t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>;
2102+
20972103
let hasPostISelHook = 1 in {
20982104
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
20992105
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;

llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,7 @@ entry:
104104

105105
; CHECK-COMMON-LABEL: or_icmp_ugt:
106106
; CHECK-COMMON: ldrb
107-
; CHECK-COMMON: sub.w
108-
; CHECK-COMMON-NOT: uxt
109-
; CHECK-COMMON: cmp.w
107+
; CHECK-COMMON: subs.w
110108
; CHECK-COMMON-NOT: uxt
111109
; CHECK-COMMON: cmp
112110
define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) {

llvm/test/CodeGen/ARM/cmp.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,11 @@ define i1 @f6(i32 %a, i32 %b) {
3939

4040
define i1 @f7(i32 %a, i32 %b) {
4141
; CHECK-LABEL: f7:
42-
; CHECK: sub r2, r0, r1, lsr #6
43-
; CHECK: cmp r0, r1, lsr #6
44-
; CHECK: movwne r2, #1
45-
; CHECK: mov r0, r2
46-
; CHECK-T2: sub.w r2, r0, r1, lsr #6
47-
; CHECK-T2: cmp.w r0, r1, lsr #6
42+
; CHECK: subs r0, r0, r1, lsr #6
43+
; CHECK: movwne r0, #1
44+
; CHECK-T2: subs.w r0, r0, r1, lsr #6
4845
; CHECK-T2: it ne
49-
; CHECK-T2: movne r2, #1
50-
; CHECK-T2: mov r0, r2
46+
; CHECK-T2: movne r0, #1
5147
%tmp = lshr i32 %b, 6
5248
%tmp1 = icmp ne i32 %a, %tmp
5349
ret i1 %tmp1
@@ -68,15 +64,11 @@ define i1 @f8(i32 %a, i32 %b) {
6864

6965
define i1 @f9(i32 %a) {
7066
; CHECK-LABEL: f9:
71-
; CHECK: sub r1, r0, r0, ror #8
72-
; CHECK: cmp r0, r0, ror #8
73-
; CHECK: movwne r1, #1
74-
; CHECK: mov r0, r1
75-
; CHECK-T2: sub.w r1, r0, r0, ror #8
76-
; CHECK-T2: cmp.w r0, r0, ror #8
67+
; CHECK: subs r0, r0, r0, ror #8
68+
; CHECK: movwne r0, #1
69+
; CHECK-T2: subs.w r0, r0, r0, ror #8
7770
; CHECK-T2: it ne
78-
; CHECK-T2: movne r1, #1
79-
; CHECK-T2: mov r0, r1
71+
; CHECK-T2: movne r0, #1
8072
%l8 = shl i32 %a, 24
8173
%r8 = lshr i32 %a, 8
8274
%tmp = or i32 %l8, %r8

llvm/test/CodeGen/ARM/select.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,14 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
142142
ret float %2
143143
}
144144

145+
; CHECK-LABEL: test_overflow_recombine:
146+
define i1 @test_overflow_recombine(i32 %in) {
147+
; CHECK: smull [[LO:r[0-9]+]], [[HI:r[0-9]+]]
148+
; CHECK: subs [[ZERO:r[0-9]+]], [[HI]], [[LO]], asr #31
149+
; CHECK: movne [[ZERO]], #1
150+
%prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 0, i32 %in)
151+
%overflow = extractvalue { i32, i1 } %prod, 1
152+
ret i1 %overflow
153+
}
154+
155+
declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)

0 commit comments

Comments
 (0)