Skip to content

Commit 3b80d02

Browse files
committed
DAG: Look through fneg when trying to create unsafe minnum/maxnum
This makes most sense for isFNegFree targets, but shouldn't make things worse without it. This avoids AMDGPU test regressions in a future patch. For some reason APFloat::compareAbsoluteValue is private, so compute the neg of the constants.
1 parent 586ce6a commit 3b80d02

File tree

2 files changed

+52
-30
lines changed

2 files changed

+52
-30
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10346,14 +10346,11 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
1034610346
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
1034710347
}
1034810348

10349-
/// Generate Min/Max node
10350-
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
10351-
SDValue RHS, SDValue True, SDValue False,
10352-
ISD::CondCode CC, const TargetLowering &TLI,
10353-
SelectionDAG &DAG) {
10354-
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
10355-
return SDValue();
10356-
10349+
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
10350+
SDValue RHS, SDValue True, SDValue False,
10351+
ISD::CondCode CC,
10352+
const TargetLowering &TLI,
10353+
SelectionDAG &DAG) {
1035710354
EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
1035810355
switch (CC) {
1035910356
case ISD::SETOLT:
@@ -10394,6 +10391,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
1039410391
}
1039510392
}
1039610393

10394+
/// Generate Min/Max node
10395+
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
10396+
SDValue RHS, SDValue True, SDValue False,
10397+
ISD::CondCode CC, const TargetLowering &TLI,
10398+
SelectionDAG &DAG) {
10399+
if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
10400+
return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
10401+
10402+
// If we can't directly match this, try to see if we can pull an fneg out of
10403+
// the select.
10404+
if (True.getOpcode() != ISD::FNEG)
10405+
return SDValue();
10406+
10407+
ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
10408+
ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
10409+
SDValue NegTrue = True.getOperand(0);
10410+
10411+
// Try to unfold an fneg from the select if we are comparing the negated
10412+
// constant.
10413+
//
10414+
// select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
10415+
//
10416+
// TODO: Handle fabs
10417+
if (LHS == NegTrue && CFalse && CRHS) {
10418+
APFloat NegRHS = neg(CRHS->getValueAPF());
10419+
if (NegRHS == CFalse->getValueAPF()) {
10420+
SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
10421+
False, CC, TLI, DAG);
10422+
if (Combined)
10423+
return DAG.getNode(ISD::FNEG, DL, VT, Combined);
10424+
return SDValue();
10425+
}
10426+
}
10427+
10428+
return SDValue();
10429+
}
10430+
1039710431
/// If a (v)select has a condition value that is a sign-bit test, try to smear
1039810432
/// the condition operand sign-bit across the value width and use it as a mask.
1039910433
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {

llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,9 @@ define float @select_fneg_a_or_8_cmp_olt_a_neg8_f32(float %a, float %b) #0 {
66
; CHECK: @ %bb.0:
77
; CHECK-NEXT: vmov.f32 s0, #-8.000000e+00
88
; CHECK-NEXT: vmov s2, r0
9-
; CHECK-NEXT: vmov.f32 s4, #8.000000e+00
10-
; CHECK-NEXT: vneg.f32 s6, s2
11-
; CHECK-NEXT: vcmp.f32 s0, s2
12-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
13-
; CHECK-NEXT: vselgt.f32 s0, s6, s4
9+
; CHECK-NEXT: vminnm.f32 s0, s2, s0
1410
; CHECK-NEXT: vmov r0, s0
11+
; CHECK-NEXT: eor r0, r0, #-2147483648
1512
; CHECK-NEXT: mov pc, lr
1613
%fneg.a = fneg nnan nsz float %a
1714
%cmp.a = fcmp nnan nsz olt float %a, -8.0
@@ -22,13 +19,10 @@ define float @select_fneg_a_or_8_cmp_olt_a_neg8_f32(float %a, float %b) #0 {
2219
define half @select_fneg_a_or_8_cmp_olt_a_neg8_f16(half %a, half %b) #0 {
2320
; CHECK-LABEL: select_fneg_a_or_8_cmp_olt_a_neg8_f16:
2421
; CHECK: @ %bb.0:
25-
; CHECK-NEXT: vmov.f16 s4, r0
2622
; CHECK-NEXT: vmov.f16 s0, #-8.000000e+00
27-
; CHECK-NEXT: vcmp.f16 s0, s4
28-
; CHECK-NEXT: vmov.f16 s2, #8.000000e+00
29-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
30-
; CHECK-NEXT: vneg.f16 s6, s4
31-
; CHECK-NEXT: vselgt.f16 s0, s6, s2
23+
; CHECK-NEXT: vmov.f16 s2, r0
24+
; CHECK-NEXT: vminnm.f16 s0, s2, s0
25+
; CHECK-NEXT: vneg.f16 s0, s0
3226
; CHECK-NEXT: vmov r0, s0
3327
; CHECK-NEXT: mov pc, lr
3428
%fneg.a = fneg nnan nsz half %a
@@ -42,12 +36,9 @@ define float @select_fneg_a_or_8_cmp_ogt_a_neg8_f32(float %a, float %b) #0 {
4236
; CHECK: @ %bb.0:
4337
; CHECK-NEXT: vmov.f32 s0, #-8.000000e+00
4438
; CHECK-NEXT: vmov s2, r0
45-
; CHECK-NEXT: vmov.f32 s4, #8.000000e+00
46-
; CHECK-NEXT: vneg.f32 s6, s2
47-
; CHECK-NEXT: vcmp.f32 s2, s0
48-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
49-
; CHECK-NEXT: vselgt.f32 s0, s6, s4
39+
; CHECK-NEXT: vmaxnm.f32 s0, s2, s0
5040
; CHECK-NEXT: vmov r0, s0
41+
; CHECK-NEXT: eor r0, r0, #-2147483648
5142
; CHECK-NEXT: mov pc, lr
5243
%fneg.a = fneg nnan nsz float %a
5344
%cmp.a = fcmp nnan nsz ogt float %a, -8.0
@@ -58,13 +49,10 @@ define float @select_fneg_a_or_8_cmp_ogt_a_neg8_f32(float %a, float %b) #0 {
5849
define half @select_fneg_a_or_8_cmp_ogt_a_neg8_f16(half %a, half %b) #0 {
5950
; CHECK-LABEL: select_fneg_a_or_8_cmp_ogt_a_neg8_f16:
6051
; CHECK: @ %bb.0:
61-
; CHECK-NEXT: vmov.f16 s4, r0
6252
; CHECK-NEXT: vmov.f16 s0, #-8.000000e+00
63-
; CHECK-NEXT: vcmp.f16 s4, s0
64-
; CHECK-NEXT: vmov.f16 s2, #8.000000e+00
65-
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
66-
; CHECK-NEXT: vneg.f16 s6, s4
67-
; CHECK-NEXT: vselgt.f16 s0, s6, s2
53+
; CHECK-NEXT: vmov.f16 s2, r0
54+
; CHECK-NEXT: vmaxnm.f16 s0, s2, s0
55+
; CHECK-NEXT: vneg.f16 s0, s0
6856
; CHECK-NEXT: vmov r0, s0
6957
; CHECK-NEXT: mov pc, lr
7058
%fneg.a = fneg nnan nsz half %a

0 commit comments

Comments
 (0)