Skip to content

Commit 5e4e245

Browse files
committed
DAG: Handle lowering unordered compare with inf
Try to take advantage of the nan check behavior of fcmp. x86_64 looks better, x86_32 looks worse.
1 parent 7b7b0b9 commit 5e4e245

File tree

4 files changed

+83
-63
lines changed

4 files changed

+83
-63
lines changed

llvm/include/llvm/CodeGen/CodeGenCommonISel.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,15 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
218218
/// Evaluates if the specified FP class test is better performed as the inverse
219219
/// (i.e. fewer instructions should be required to lower it). An example is the
220220
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
221+
///
221222
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
223+
///
224+
/// \param UseFCmp The intention is to perform the comparison using
225+
/// floating-point compare instructions which check for nan.
226+
///
222227
/// \returns The inverted test, or fcNone, if inversion does not produce a
223228
/// simpler test.
224-
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
229+
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);
225230

226231
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
227232
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.

llvm/lib/CodeGen/CodeGenCommonISel.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
173173
return SplitPoint;
174174
}
175175

176-
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
176+
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
177177
FPClassTest InvertedTest = ~Test;
178+
178179
// Pick the direction with fewer tests
179180
// TODO: Handle more combinations of cases that can be handled together
180181
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,11 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
200201
case fcSubnormal | fcZero:
201202
case fcSubnormal | fcZero | fcNan:
202203
return InvertedTest;
204+
case fcInf | fcNan:
205+
// If we're trying to use fcmp, we can take advantage of the nan check
206+
// behavior of the compare (but this is more instructions in the integer
207+
// expansion).
208+
return UseFCmp ? InvertedTest : fcNone;
203209
default:
204210
return fcNone;
205211
}

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8672,7 +8672,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86728672
// Degenerated cases.
86738673
if (Test == fcNone)
86748674
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8675-
if ((Test & fcAllFlags) == fcAllFlags)
8675+
if (Test == fcAllFlags)
86768676
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
86778677

86788678
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8683,14 +8683,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86838683
OperandVT = MVT::f64;
86848684
}
86858685

8686-
// Some checks may be represented as inversion of simpler check, for example
8687-
// "inf|normal|subnormal|zero" => !"nan".
8688-
bool IsInverted = false;
8689-
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8690-
IsInverted = true;
8691-
Test = InvertedCheck;
8692-
}
8693-
86948686
// Floating-point type properties.
86958687
EVT ScalarFloatVT = OperandVT.getScalarType();
86968688
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8702,9 +8694,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87028694
if (Flags.hasNoFPExcept() &&
87038695
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
87048696
FPClassTest FPTestMask = Test;
8697+
bool IsInvertedFP = false;
8698+
8699+
if (FPClassTest InvertedFPCheck =
8700+
invertFPClassTestIfSimpler(FPTestMask, true)) {
8701+
FPTestMask = InvertedFPCheck;
8702+
IsInvertedFP = true;
8703+
}
87058704

8706-
ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8707-
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8705+
ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8706+
ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
87088707

87098708
// See if we can fold an | fcNan into an unordered compare.
87108709
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
@@ -8717,7 +8716,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87178716
const bool IsOrdered = FPTestMask == OrderedFPTestMask;
87188717

87198718
if (std::optional<bool> IsCmp0 =
8720-
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8719+
isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
87218720
IsCmp0 && (isCondCodeLegalOrCustom(
87228721
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
87238722
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8729,31 +8728,32 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87298728
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
87308729
}
87318730

8732-
if (Test == fcNan &&
8733-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8734-
OperandVT.getScalarType().getSimpleVT())) {
8731+
if (FPTestMask == fcNan &&
8732+
isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8733+
OperandVT.getScalarType().getSimpleVT()))
87358734
return DAG.getSetCC(DL, ResultVT, Op, Op,
8736-
IsInverted ? ISD::SETO : ISD::SETUO);
8737-
}
8735+
IsInvertedFP ? ISD::SETO : ISD::SETUO);
87388736

8739-
if (Test == fcInf &&
8740-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8737+
bool IsOrderedInf = FPTestMask == fcInf;
8738+
if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8739+
isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8740+
: UnorderedCmpOpcode,
87418741
OperandVT.getScalarType().getSimpleVT()) &&
87428742
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
87438743
// isinf(x) --> fabs(x) == inf
87448744
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
87458745
SDValue Inf =
87468746
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
87478747
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8748-
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8748+
IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
87498749
}
87508750

87518751
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
87528752
// TODO: Could handle ordered case, but it produces worse code for
87538753
// x86. Maybe handle ordered if fabs is free?
87548754

8755-
ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
8756-
ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
8755+
ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8756+
ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
87578757

87588758
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
87598759
OperandVT.getScalarType().getSimpleVT())) {
@@ -8770,6 +8770,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87708770
}
87718771
}
87728772

8773+
// Some checks may be represented as inversion of simpler check, for example
8774+
// "inf|normal|subnormal|zero" => !"nan".
8775+
bool IsInverted = false;
8776+
8777+
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8778+
Test = InvertedCheck;
8779+
IsInverted = true;
8780+
}
8781+
87738782
// In the general case use integer operations.
87748783
unsigned BitSize = OperandVT.getScalarSizeInBits();
87758784
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);

llvm/test/CodeGen/X86/is_fpclass.ll

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -240,18 +240,22 @@ entry:
240240
define i1 @isfinite_f(float %x) {
241241
; X86-LABEL: isfinite_f:
242242
; X86: # %bb.0: # %entry
243-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
244-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
245-
; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
246-
; X86-NEXT: setl %al
243+
; X86-NEXT: flds {{[0-9]+}}(%esp)
244+
; X86-NEXT: fabs
245+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
246+
; X86-NEXT: fxch %st(1)
247+
; X86-NEXT: fucompp
248+
; X86-NEXT: fnstsw %ax
249+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
250+
; X86-NEXT: sahf
251+
; X86-NEXT: setne %al
247252
; X86-NEXT: retl
248253
;
249254
; X64-LABEL: isfinite_f:
250255
; X64: # %bb.0: # %entry
251-
; X64-NEXT: movd %xmm0, %eax
252-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
253-
; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
254-
; X64-NEXT: setl %al
256+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
257+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
258+
; X64-NEXT: setne %al
255259
; X64-NEXT: retq
256260
entry:
257261
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
@@ -1150,31 +1154,23 @@ entry:
11501154
define i1 @isfinite_d(double %x) {
11511155
; X86-LABEL: isfinite_d:
11521156
; X86: # %bb.0: # %entry
1153-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
1154-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
1155-
; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000
1156-
; X86-NEXT: setl %al
1157+
; X86-NEXT: fldl {{[0-9]+}}(%esp)
1158+
; X86-NEXT: fabs
1159+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
1160+
; X86-NEXT: fxch %st(1)
1161+
; X86-NEXT: fucompp
1162+
; X86-NEXT: fnstsw %ax
1163+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
1164+
; X86-NEXT: sahf
1165+
; X86-NEXT: setne %al
11571166
; X86-NEXT: retl
11581167
;
1159-
; X64-GENERIC-LABEL: isfinite_d:
1160-
; X64-GENERIC: # %bb.0: # %entry
1161-
; X64-GENERIC-NEXT: movq %xmm0, %rax
1162-
; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
1163-
; X64-GENERIC-NEXT: andq %rax, %rcx
1164-
; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
1165-
; X64-GENERIC-NEXT: cmpq %rax, %rcx
1166-
; X64-GENERIC-NEXT: setl %al
1167-
; X64-GENERIC-NEXT: retq
1168-
;
1169-
; X64-NDD-LABEL: isfinite_d:
1170-
; X64-NDD: # %bb.0: # %entry
1171-
; X64-NDD-NEXT: movq %xmm0, %rax
1172-
; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
1173-
; X64-NDD-NEXT: andq %rcx, %rax
1174-
; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
1175-
; X64-NDD-NEXT: cmpq %rcx, %rax
1176-
; X64-NDD-NEXT: setl %al
1177-
; X64-NDD-NEXT: retq
1168+
; X64-LABEL: isfinite_d:
1169+
; X64: # %bb.0: # %entry
1170+
; X64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1171+
; X64-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1172+
; X64-NEXT: setne %al
1173+
; X64-NEXT: retq
11781174
entry:
11791175
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite"
11801176
ret i1 %0
@@ -2053,18 +2049,22 @@ entry:
20532049
define i1 @not_isinf_or_nan_f(float %x) {
20542050
; X86-LABEL: not_isinf_or_nan_f:
20552051
; X86: # %bb.0: # %entry
2056-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
2057-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
2058-
; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
2059-
; X86-NEXT: setl %al
2052+
; X86-NEXT: flds {{[0-9]+}}(%esp)
2053+
; X86-NEXT: fabs
2054+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
2055+
; X86-NEXT: fxch %st(1)
2056+
; X86-NEXT: fucompp
2057+
; X86-NEXT: fnstsw %ax
2058+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
2059+
; X86-NEXT: sahf
2060+
; X86-NEXT: setne %al
20602061
; X86-NEXT: retl
20612062
;
20622063
; X64-LABEL: not_isinf_or_nan_f:
20632064
; X64: # %bb.0: # %entry
2064-
; X64-NEXT: movd %xmm0, %eax
2065-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2066-
; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
2067-
; X64-NEXT: setl %al
2065+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2066+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2067+
; X64-NEXT: setne %al
20682068
; X64-NEXT: retq
20692069
entry:
20702070
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)"

0 commit comments

Comments
 (0)