Skip to content

Commit 65420c8

Browse files
committed
DAG: Use getNegatedExpression in combineMinNumMaxNum
Computing the negated RHS expression just to see if it compares equal and throw it away feels dirty.
1 parent 3b80d02 commit 65420c8

File tree

3 files changed

+59
-59
lines changed

3 files changed

+59
-59
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4070,22 +4070,34 @@ class TargetLowering : public TargetLoweringBase {
40704070
NegatibleCost &Cost,
40714071
unsigned Depth = 0) const;
40724072

4073-
/// This is the helper function to return the newly negated expression only
4074-
/// when the cost is cheaper.
4075-
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
4076-
bool LegalOps, bool OptForSize,
4077-
unsigned Depth = 0) const {
4073+
SDValue getCheaperOrNeutralNegatedExpression(
4074+
SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize,
4075+
const NegatibleCost CostThreshold = NegatibleCost::Neutral,
4076+
unsigned Depth = 0) const {
40784077
NegatibleCost Cost = NegatibleCost::Expensive;
40794078
SDValue Neg =
40804079
getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4081-
if (Neg && Cost == NegatibleCost::Cheaper)
4080+
if (!Neg)
4081+
return SDValue();
4082+
4083+
if (Cost <= CostThreshold)
40824084
return Neg;
4085+
40834086
// Remove the new created node to avoid the side effect to the DAG.
4084-
if (Neg && Neg->use_empty())
4087+
if (Neg->use_empty())
40854088
DAG.RemoveDeadNode(Neg.getNode());
40864089
return SDValue();
40874090
}
40884091

4092+
/// This is the helper function to return the newly negated expression only
4093+
/// when the cost is cheaper.
4094+
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
4095+
bool LegalOps, bool OptForSize,
4096+
unsigned Depth = 0) const {
4097+
return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize,
4098+
NegatibleCost::Cheaper, Depth);
4099+
}
4100+
40894101
/// This is the helper function to return the newly negated expression if
40904102
/// the cost is not expensive.
40914103
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,10 @@ namespace {
387387
SDValue PromoteExtend(SDValue Op);
388388
bool PromoteLoad(SDValue Op);
389389

390+
SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
391+
SDValue RHS, SDValue True, SDValue False,
392+
ISD::CondCode CC);
393+
390394
/// Call the node-specific routine that knows how to fold each
391395
/// particular type of node. If that doesn't do anything, try the
392396
/// target-specific DAG combines.
@@ -10392,36 +10396,39 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
1039210396
}
1039310397

1039410398
/// Generate Min/Max node
10395-
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
10396-
SDValue RHS, SDValue True, SDValue False,
10397-
ISD::CondCode CC, const TargetLowering &TLI,
10398-
SelectionDAG &DAG) {
10399+
SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
10400+
SDValue RHS, SDValue True,
10401+
SDValue False, ISD::CondCode CC) {
1039910402
if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
1040010403
return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
1040110404

1040210405
// If we can't directly match this, try to see if we can pull an fneg out of
1040310406
// the select.
10404-
if (True.getOpcode() != ISD::FNEG)
10407+
SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
10408+
True, DAG, LegalOperations, ForCodeSize);
10409+
if (!NegTrue)
1040510410
return SDValue();
1040610411

10407-
ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
10408-
ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
10409-
SDValue NegTrue = True.getOperand(0);
10412+
HandleSDNode NegTrueHandle(NegTrue);
1041010413

1041110414
// Try to unfold an fneg from the select if we are comparing the negated
1041210415
// constant.
1041310416
//
1041410417
// select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
1041510418
//
1041610419
// TODO: Handle fabs
10417-
if (LHS == NegTrue && CFalse && CRHS) {
10418-
APFloat NegRHS = neg(CRHS->getValueAPF());
10419-
if (NegRHS == CFalse->getValueAPF()) {
10420-
SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
10421-
False, CC, TLI, DAG);
10422-
if (Combined)
10420+
if (LHS == NegTrue) {
10421+
// If we can't directly match this, try to see if we can pull an fneg out of
10422+
// the select.
10423+
SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
10424+
RHS, DAG, LegalOperations, ForCodeSize);
10425+
if (NegRHS) {
10426+
HandleSDNode NegRHSHandle(NegRHS);
10427+
if (NegRHS == False) {
10428+
SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
10429+
False, CC, TLI, DAG);
1042310430
return DAG.getNode(ISD::FNEG, DL, VT, Combined);
10424-
return SDValue();
10431+
}
1042510432
}
1042610433
}
1042710434

@@ -10812,8 +10819,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1081210819
//
1081310820
// This is OK if we don't care what happens if either operand is a NaN.
1081410821
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
10815-
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
10816-
CC, TLI, DAG))
10822+
if (SDValue FMinMax =
10823+
combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
1081710824
return FMinMax;
1081810825

1081910826
// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
@@ -11325,8 +11332,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
1132511332
// NaN.
1132611333
//
1132711334
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
11328-
if (SDValue FMinMax =
11329-
combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
11335+
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
1133011336
return FMinMax;
1133111337
}
1133211338

llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,10 @@ define float @select_fsub0_or_8_cmp_olt_fsub1_neg8_f32(float %a, float %b) #0 {
6767
; CHECK-NEXT: vmov.f32 s0, #4.000000e+00
6868
; CHECK-NEXT: vmov s2, r0
6969
; CHECK-NEXT: vmov.f32 s4, #-8.000000e+00
70-
; CHECK-NEXT: vmov.f32 s8, #8.000000e+00
71-
; CHECK-NEXT: vsub.f32 s6, s0, s2
72-
; CHECK-NEXT: vsub.f32 s0, s2, s0
73-
; CHECK-NEXT: vcmp.f32 s4, s6
74-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
75-
; CHECK-NEXT: vselgt.f32 s0, s0, s8
70+
; CHECK-NEXT: vsub.f32 s0, s0, s2
71+
; CHECK-NEXT: vminnm.f32 s0, s0, s4
7672
; CHECK-NEXT: vmov r0, s0
73+
; CHECK-NEXT: eor r0, r0, #-2147483648
7774
; CHECK-NEXT: mov pc, lr
7875
%sub.0 = fsub nnan nsz float 4.0, %a
7976
%sub.1 = fsub nnan nsz float %a, 4.0
@@ -88,13 +85,10 @@ define float @select_fsub0_or_neg8_cmp_olt_fsub1_8_f32(float %a, float %b) #0 {
8885
; CHECK-NEXT: vmov.f32 s0, #4.000000e+00
8986
; CHECK-NEXT: vmov s2, r0
9087
; CHECK-NEXT: vmov.f32 s4, #8.000000e+00
91-
; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00
92-
; CHECK-NEXT: vsub.f32 s6, s0, s2
93-
; CHECK-NEXT: vsub.f32 s0, s2, s0
94-
; CHECK-NEXT: vcmp.f32 s4, s6
95-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
96-
; CHECK-NEXT: vselgt.f32 s0, s0, s8
88+
; CHECK-NEXT: vsub.f32 s0, s0, s2
89+
; CHECK-NEXT: vminnm.f32 s0, s0, s4
9790
; CHECK-NEXT: vmov r0, s0
91+
; CHECK-NEXT: eor r0, r0, #-2147483648
9892
; CHECK-NEXT: mov pc, lr
9993
%sub.0 = fsub nnan nsz float 4.0, %a
10094
%sub.1 = fsub nnan nsz float %a, 4.0
@@ -108,15 +102,11 @@ define float @select_mul4_or_neg8_cmp_olt_mulneg4_8_f32(float %a, float %b) #0 {
108102
; CHECK: @ %bb.0:
109103
; CHECK-NEXT: vmov.f32 s0, #-4.000000e+00
110104
; CHECK-NEXT: vmov s2, r0
111-
; CHECK-NEXT: vmov.f32 s6, #8.000000e+00
112-
; CHECK-NEXT: vmov.f32 s4, #4.000000e+00
113-
; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00
105+
; CHECK-NEXT: vmov.f32 s4, #8.000000e+00
114106
; CHECK-NEXT: vmul.f32 s0, s2, s0
115-
; CHECK-NEXT: vmul.f32 s2, s2, s4
116-
; CHECK-NEXT: vcmp.f32 s6, s0
117-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
118-
; CHECK-NEXT: vselgt.f32 s0, s2, s8
107+
; CHECK-NEXT: vminnm.f32 s0, s0, s4
119108
; CHECK-NEXT: vmov r0, s0
109+
; CHECK-NEXT: eor r0, r0, #-2147483648
120110
; CHECK-NEXT: mov pc, lr
121111
%mul.0 = fmul nnan nsz float %a, 4.0
122112
%mul.1 = fmul nnan nsz float %a, -4.0
@@ -130,15 +120,11 @@ define float @select_mul4_or_8_cmp_olt_mulneg4_neg8_f32(float %a, float %b) #0 {
130120
; CHECK: @ %bb.0:
131121
; CHECK-NEXT: vmov.f32 s0, #-4.000000e+00
132122
; CHECK-NEXT: vmov s2, r0
133-
; CHECK-NEXT: vmov.f32 s6, #-8.000000e+00
134-
; CHECK-NEXT: vmov.f32 s4, #4.000000e+00
135-
; CHECK-NEXT: vmov.f32 s8, #8.000000e+00
123+
; CHECK-NEXT: vmov.f32 s4, #-8.000000e+00
136124
; CHECK-NEXT: vmul.f32 s0, s2, s0
137-
; CHECK-NEXT: vmul.f32 s2, s2, s4
138-
; CHECK-NEXT: vcmp.f32 s6, s0
139-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
140-
; CHECK-NEXT: vselgt.f32 s0, s2, s8
125+
; CHECK-NEXT: vminnm.f32 s0, s0, s4
141126
; CHECK-NEXT: vmov r0, s0
127+
; CHECK-NEXT: eor r0, r0, #-2147483648
142128
; CHECK-NEXT: mov pc, lr
143129
%mul.0 = fmul nnan nsz float %a, 4.0
144130
%mul.1 = fmul nnan nsz float %a, -4.0
@@ -194,15 +180,11 @@ define float @select_mulneg4_or_neg8_cmp_olt_mul4_8_f32(float %a, float %b) #0 {
194180
; CHECK: @ %bb.0:
195181
; CHECK-NEXT: vmov.f32 s0, #4.000000e+00
196182
; CHECK-NEXT: vmov s2, r0
197-
; CHECK-NEXT: vmov.f32 s6, #8.000000e+00
198-
; CHECK-NEXT: vmov.f32 s4, #-4.000000e+00
199-
; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00
183+
; CHECK-NEXT: vmov.f32 s4, #8.000000e+00
200184
; CHECK-NEXT: vmul.f32 s0, s2, s0
201-
; CHECK-NEXT: vmul.f32 s2, s2, s4
202-
; CHECK-NEXT: vcmp.f32 s6, s0
203-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
204-
; CHECK-NEXT: vselgt.f32 s0, s2, s8
185+
; CHECK-NEXT: vminnm.f32 s0, s0, s4
205186
; CHECK-NEXT: vmov r0, s0
187+
; CHECK-NEXT: eor r0, r0, #-2147483648
206188
; CHECK-NEXT: mov pc, lr
207189
%mul.0 = fmul nnan nsz float %a, -4.0
208190
%mul.1 = fmul nnan nsz float %a, 4.0

0 commit comments

Comments
 (0)