Skip to content

Commit 50cd2ff

Browse files
author
Serguei Katkov
committed
[X86] Avoid usage constant -1 for fminimum/fmaximum lowering
Instead of equality comparison of value to preferred zero we can check just the sign of value and if sign is set we should put this value as second operand for minimum and first operand for maximum. In this case FMIN/FMAX will choose the right result for 0.f and -0.f comparison. This allows us: 1. avoid loading of big 64-bit constant for fminimum. 2. for double on non-64-nib platform we need to check only high part of value. 3. test against zero to check sign takes less size of instruction Additionally, if we know that any of value is guaranteed to be non-zero we should not care about 0.f and -0.f comparison. Reviewed By: e-kud Differential Revision: https://reviews.llvm.org/D149812
1 parent 4247806 commit 50cd2ff

File tree

4 files changed

+180
-193
lines changed

4 files changed

+180
-193
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30276,13 +30276,15 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
3027630276
bool IsXNeverNaN = DAG.isKnownNeverNaN(X);
3027730277
bool IsYNeverNaN = DAG.isKnownNeverNaN(Y);
3027830278
bool IgnoreSignedZero = DAG.getTarget().Options.NoSignedZerosFPMath ||
30279-
Op->getFlags().hasNoSignedZeros();
30279+
Op->getFlags().hasNoSignedZeros() ||
30280+
DAG.isKnownNeverZeroFloat(X) ||
30281+
DAG.isKnownNeverZeroFloat(Y);
3028030282
SDValue NewX, NewY;
30281-
if (IgnoreSignedZero || IsPreferredZero(Y) || DAG.isKnownNeverZeroFloat(X)) {
30283+
if (IgnoreSignedZero || IsPreferredZero(Y)) {
3028230284
// Operands are already in right order or order does not matter.
3028330285
NewX = X;
3028430286
NewY = Y;
30285-
} else if (IsPreferredZero(X) || DAG.isKnownNeverZeroFloat(Y)) {
30287+
} else if (IsPreferredZero(X)) {
3028630288
NewX = Y;
3028730289
NewY = X;
3028830290
} else if ((VT == MVT::f16 || Subtarget.hasDQI()) &&
@@ -30307,34 +30309,32 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
3030730309
NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);
3030830310
return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
3030930311
} else {
30310-
SDValue IsXZero;
30312+
SDValue IsXSigned;
3031130313
if (Subtarget.is64Bit() || VT != MVT::f64) {
3031230314
SDValue XInt = DAG.getNode(ISD::BITCAST, DL, IVT, X);
30313-
SDValue ZeroCst = DAG.getConstant(PreferredZero, DL, IVT);
30314-
IsXZero = DAG.getSetCC(DL, SetCCType, XInt, ZeroCst, ISD::SETEQ);
30315+
SDValue ZeroCst = DAG.getConstant(0, DL, IVT);
30316+
IsXSigned = DAG.getSetCC(DL, SetCCType, XInt, ZeroCst, ISD::SETLT);
3031530317
} else {
3031630318
assert(VT == MVT::f64);
3031730319
SDValue Ins = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2f64,
3031830320
DAG.getConstantFP(0, DL, MVT::v2f64), X,
3031930321
DAG.getIntPtrConstant(0, DL));
3032030322
SDValue VX = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, Ins);
30321-
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
30322-
DAG.getIntPtrConstant(0, DL));
30323-
Lo = DAG.getBitcast(MVT::i32, Lo);
3032430323
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
3032530324
DAG.getIntPtrConstant(1, DL));
3032630325
Hi = DAG.getBitcast(MVT::i32, Hi);
30327-
PreferredZero = APInt::getZero(SizeInBits / 2);
30328-
if (MinMaxOp == X86ISD::FMIN)
30329-
PreferredZero.setSignBit();
30330-
IsXZero = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
30331-
DAG.getConstant(PreferredZero, DL, MVT::i32));
30332-
IsXZero = DAG.getNode(ISD::OR, DL, MVT::i32, Lo, IsXZero);
30333-
IsXZero = DAG.getSetCC(DL, SetCCType, IsXZero,
30334-
DAG.getConstant(0, DL, MVT::i32), ISD::SETEQ);
30335-
}
30336-
NewX = DAG.getSelect(DL, VT, IsXZero, Y, X);
30337-
NewY = DAG.getSelect(DL, VT, IsXZero, X, Y);
30326+
SDValue ZeroCst = DAG.getConstant(0, DL, MVT::i32);
30327+
EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(),
30328+
*DAG.getContext(), MVT::i32);
30329+
IsXSigned = DAG.getSetCC(DL, SetCCType, Hi, ZeroCst, ISD::SETLT);
30330+
}
30331+
if (MinMaxOp == X86ISD::FMAX) {
30332+
NewX = DAG.getSelect(DL, VT, IsXSigned, X, Y);
30333+
NewY = DAG.getSelect(DL, VT, IsXSigned, Y, X);
30334+
} else {
30335+
NewX = DAG.getSelect(DL, VT, IsXSigned, Y, X);
30336+
NewY = DAG.getSelect(DL, VT, IsXSigned, X, Y);
30337+
}
3033830338
}
3033930339

3034030340
bool IgnoreNaN = DAG.getTarget().Options.NoNaNsFPMath ||

llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ define half @test_fminimum(half %x, half %y) {
1010
; CHECK-LABEL: test_fminimum:
1111
; CHECK: # %bb.0:
1212
; CHECK-NEXT: vmovw %xmm0, %eax
13-
; CHECK-NEXT: movzwl %ax, %eax
14-
; CHECK-NEXT: cmpl $32768, %eax # imm = 0x8000
15-
; CHECK-NEXT: sete %al
13+
; CHECK-NEXT: testw %ax, %ax
14+
; CHECK-NEXT: sets %al
1615
; CHECK-NEXT: kmovd %eax, %k1
1716
; CHECK-NEXT: vmovaps %xmm1, %xmm2
1817
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
@@ -119,15 +118,14 @@ define half @test_fmaximum(half %x, half %y) {
119118
; CHECK: # %bb.0:
120119
; CHECK-NEXT: vmovw %xmm0, %eax
121120
; CHECK-NEXT: testw %ax, %ax
122-
; CHECK-NEXT: sete %al
121+
; CHECK-NEXT: sets %al
123122
; CHECK-NEXT: kmovd %eax, %k1
124-
; CHECK-NEXT: vmovaps %xmm1, %xmm2
125-
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
126-
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
127-
; CHECK-NEXT: vmaxsh %xmm2, %xmm0, %xmm1
128-
; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
123+
; CHECK-NEXT: vmovaps %xmm0, %xmm2
124+
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
129125
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
130-
; CHECK-NEXT: vmovaps %xmm1, %xmm0
126+
; CHECK-NEXT: vmaxsh %xmm2, %xmm1, %xmm0
127+
; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
128+
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
131129
; CHECK-NEXT: retq
132130
%r = call half @llvm.maximum.f16(half %x, half %y)
133131
ret half %r

llvm/test/CodeGen/X86/extractelement-fp.ll

Lines changed: 42 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -677,35 +677,35 @@ define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
677677
; X64: # %bb.0:
678678
; X64-NEXT: vmovd %xmm0, %eax
679679
; X64-NEXT: testl %eax, %eax
680-
; X64-NEXT: je .LBB30_1
680+
; X64-NEXT: js .LBB30_1
681681
; X64-NEXT: # %bb.2:
682-
; X64-NEXT: vmovdqa %xmm1, %xmm2
682+
; X64-NEXT: vmovdqa %xmm0, %xmm2
683683
; X64-NEXT: jmp .LBB30_3
684684
; X64-NEXT: .LBB30_1:
685-
; X64-NEXT: vmovdqa %xmm0, %xmm2
686-
; X64-NEXT: vmovdqa %xmm1, %xmm0
685+
; X64-NEXT: vmovdqa %xmm1, %xmm2
686+
; X64-NEXT: vmovdqa %xmm0, %xmm1
687687
; X64-NEXT: .LBB30_3:
688-
; X64-NEXT: vmaxss %xmm2, %xmm0, %xmm1
689-
; X64-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
690-
; X64-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
688+
; X64-NEXT: vmaxss %xmm2, %xmm1, %xmm0
689+
; X64-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
690+
; X64-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
691691
; X64-NEXT: retq
692692
;
693693
; X86-LABEL: fmaximum_v4f32:
694694
; X86: # %bb.0:
695695
; X86-NEXT: vmovd %xmm0, %eax
696696
; X86-NEXT: testl %eax, %eax
697-
; X86-NEXT: je .LBB30_1
697+
; X86-NEXT: js .LBB30_1
698698
; X86-NEXT: # %bb.2:
699-
; X86-NEXT: vmovdqa %xmm1, %xmm2
699+
; X86-NEXT: vmovdqa %xmm0, %xmm2
700700
; X86-NEXT: jmp .LBB30_3
701701
; X86-NEXT: .LBB30_1:
702-
; X86-NEXT: vmovdqa %xmm0, %xmm2
703-
; X86-NEXT: vmovdqa %xmm1, %xmm0
702+
; X86-NEXT: vmovdqa %xmm1, %xmm2
703+
; X86-NEXT: vmovdqa %xmm0, %xmm1
704704
; X86-NEXT: .LBB30_3:
705705
; X86-NEXT: pushl %eax
706-
; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1
707-
; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
708-
; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
706+
; X86-NEXT: vmaxss %xmm2, %xmm1, %xmm0
707+
; X86-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
708+
; X86-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
709709
; X86-NEXT: vmovss %xmm0, (%esp)
710710
; X86-NEXT: flds (%esp)
711711
; X86-NEXT: popl %eax
@@ -720,40 +720,39 @@ define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
720720
; X64: # %bb.0:
721721
; X64-NEXT: vmovq %xmm0, %rax
722722
; X64-NEXT: testq %rax, %rax
723-
; X64-NEXT: je .LBB31_1
723+
; X64-NEXT: js .LBB31_1
724724
; X64-NEXT: # %bb.2:
725-
; X64-NEXT: vmovdqa %xmm1, %xmm2
725+
; X64-NEXT: vmovdqa %xmm0, %xmm2
726726
; X64-NEXT: jmp .LBB31_3
727727
; X64-NEXT: .LBB31_1:
728-
; X64-NEXT: vmovdqa %xmm0, %xmm2
729-
; X64-NEXT: vmovdqa %xmm1, %xmm0
728+
; X64-NEXT: vmovdqa %xmm1, %xmm2
729+
; X64-NEXT: vmovdqa %xmm0, %xmm1
730730
; X64-NEXT: .LBB31_3:
731-
; X64-NEXT: vmaxsd %xmm2, %xmm0, %xmm1
732-
; X64-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
733-
; X64-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
731+
; X64-NEXT: vmaxsd %xmm2, %xmm1, %xmm0
732+
; X64-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
733+
; X64-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
734734
; X64-NEXT: vzeroupper
735735
; X64-NEXT: retq
736736
;
737737
; X86-LABEL: fmaximum_v4f64:
738738
; X86: # %bb.0:
739-
; X86-NEXT: vpextrd $1, %xmm0, %eax
740-
; X86-NEXT: vmovd %xmm0, %ecx
741-
; X86-NEXT: orl %eax, %ecx
742-
; X86-NEXT: je .LBB31_1
739+
; X86-NEXT: vextractps $1, %xmm0, %eax
740+
; X86-NEXT: testl %eax, %eax
741+
; X86-NEXT: js .LBB31_1
743742
; X86-NEXT: # %bb.2:
744-
; X86-NEXT: vmovdqa %xmm1, %xmm2
743+
; X86-NEXT: vmovapd %xmm0, %xmm2
745744
; X86-NEXT: jmp .LBB31_3
746745
; X86-NEXT: .LBB31_1:
747-
; X86-NEXT: vmovdqa %xmm0, %xmm2
748-
; X86-NEXT: vmovdqa %xmm1, %xmm0
746+
; X86-NEXT: vmovapd %xmm1, %xmm2
747+
; X86-NEXT: vmovapd %xmm0, %xmm1
749748
; X86-NEXT: .LBB31_3:
750749
; X86-NEXT: pushl %ebp
751750
; X86-NEXT: movl %esp, %ebp
752751
; X86-NEXT: andl $-8, %esp
753752
; X86-NEXT: subl $8, %esp
754-
; X86-NEXT: vmaxsd %xmm2, %xmm0, %xmm1
755-
; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2
756-
; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
753+
; X86-NEXT: vmaxsd %xmm2, %xmm1, %xmm0
754+
; X86-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
755+
; X86-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
757756
; X86-NEXT: vmovlpd %xmm0, (%esp)
758757
; X86-NEXT: fldl (%esp)
759758
; X86-NEXT: movl %ebp, %esp
@@ -769,8 +768,8 @@ define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
769768
; X64-LABEL: fminimum_v4f32:
770769
; X64: # %bb.0:
771770
; X64-NEXT: vmovd %xmm0, %eax
772-
; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000
773-
; X64-NEXT: je .LBB32_1
771+
; X64-NEXT: testl %eax, %eax
772+
; X64-NEXT: js .LBB32_1
774773
; X64-NEXT: # %bb.2:
775774
; X64-NEXT: vmovdqa %xmm1, %xmm2
776775
; X64-NEXT: jmp .LBB32_3
@@ -786,8 +785,8 @@ define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
786785
; X86-LABEL: fminimum_v4f32:
787786
; X86: # %bb.0:
788787
; X86-NEXT: vmovd %xmm0, %eax
789-
; X86-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000
790-
; X86-NEXT: je .LBB32_1
788+
; X86-NEXT: testl %eax, %eax
789+
; X86-NEXT: js .LBB32_1
791790
; X86-NEXT: # %bb.2:
792791
; X86-NEXT: vmovdqa %xmm1, %xmm2
793792
; X86-NEXT: jmp .LBB32_3
@@ -812,9 +811,8 @@ define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
812811
; X64-LABEL: fminimum_v4f64:
813812
; X64: # %bb.0:
814813
; X64-NEXT: vmovq %xmm0, %rax
815-
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
816-
; X64-NEXT: cmpq %rcx, %rax
817-
; X64-NEXT: je .LBB33_1
814+
; X64-NEXT: testq %rax, %rax
815+
; X64-NEXT: js .LBB33_1
818816
; X64-NEXT: # %bb.2:
819817
; X64-NEXT: vmovdqa %xmm1, %xmm2
820818
; X64-NEXT: jmp .LBB33_3
@@ -830,17 +828,15 @@ define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
830828
;
831829
; X86-LABEL: fminimum_v4f64:
832830
; X86: # %bb.0:
833-
; X86-NEXT: vmovd %xmm0, %eax
834-
; X86-NEXT: vpextrd $1, %xmm0, %ecx
835-
; X86-NEXT: addl $-2147483648, %ecx # imm = 0x80000000
836-
; X86-NEXT: orl %eax, %ecx
837-
; X86-NEXT: je .LBB33_1
831+
; X86-NEXT: vextractps $1, %xmm0, %eax
832+
; X86-NEXT: testl %eax, %eax
833+
; X86-NEXT: js .LBB33_1
838834
; X86-NEXT: # %bb.2:
839-
; X86-NEXT: vmovdqa %xmm1, %xmm2
835+
; X86-NEXT: vmovapd %xmm1, %xmm2
840836
; X86-NEXT: jmp .LBB33_3
841837
; X86-NEXT: .LBB33_1:
842-
; X86-NEXT: vmovdqa %xmm0, %xmm2
843-
; X86-NEXT: vmovdqa %xmm1, %xmm0
838+
; X86-NEXT: vmovapd %xmm0, %xmm2
839+
; X86-NEXT: vmovapd %xmm1, %xmm0
844840
; X86-NEXT: .LBB33_3:
845841
; X86-NEXT: pushl %ebp
846842
; X86-NEXT: movl %esp, %ebp

0 commit comments

Comments
 (0)