Skip to content

Commit 1e5da52

Browse files
committed
DAG: Handle lowering unordered compare with inf
Try to take advantage of the nan check behavior of fcmp. x86_64 looks better, x86_32 looks worse.
1 parent 2bb18e2 commit 1e5da52

File tree

4 files changed

+88
-63
lines changed

4 files changed

+88
-63
lines changed

llvm/include/llvm/CodeGen/CodeGenCommonISel.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,13 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
219219
/// (i.e. fewer instructions should be required to lower it). An example is the
220220
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
221221
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
222+
///
223+
/// \param UseFP The intention is to perform the comparison using floating-point
224+
/// compare instructions which check for nan.
225+
///
222226
/// \returns The inverted test, or fcNone, if inversion does not produce a
223227
/// simpler test.
224-
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
228+
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP);
225229

226230
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
227231
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.

llvm/lib/CodeGen/CodeGenCommonISel.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
173173
return SplitPoint;
174174
}
175175

176-
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
176+
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP) {
177177
FPClassTest InvertedTest = ~Test;
178+
178179
// Pick the direction with fewer tests
179180
// TODO: Handle more combinations of cases that can be handled together
180181
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,14 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
200201
case fcSubnormal | fcZero:
201202
case fcSubnormal | fcZero | fcNan:
202203
return InvertedTest;
204+
case fcInf | fcNan:
205+
// If we're trying to use fcmp, we can take advantage of the nan check
206+
// behavior of the compare (but this is more instructions in the integer
207+
// expansion).
208+
return UseFP ? InvertedTest : fcNone;
209+
case fcFinite | fcNan:
210+
// Inversion of fcInf, which can be done in a combined check.
211+
return fcNone;
203212
default:
204213
return fcNone;
205214
}

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8556,16 +8556,17 @@ static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
85568556
}
85578557

85588558
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8559-
FPClassTest Test, SDNodeFlags Flags,
8560-
const SDLoc &DL,
8559+
const FPClassTest OrigTestMask,
8560+
SDNodeFlags Flags, const SDLoc &DL,
85618561
SelectionDAG &DAG) const {
85628562
EVT OperandVT = Op.getValueType();
85638563
assert(OperandVT.isFloatingPoint());
8564+
FPClassTest Test = OrigTestMask;
85648565

85658566
// Degenerated cases.
85668567
if (Test == fcNone)
85678568
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8568-
if ((Test & fcAllFlags) == fcAllFlags)
8569+
if (Test == fcAllFlags)
85698570
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
85708571

85718572
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8576,14 +8577,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
85768577
OperandVT = MVT::f64;
85778578
}
85788579

8579-
// Some checks may be represented as inversion of simpler check, for example
8580-
// "inf|normal|subnormal|zero" => !"nan".
8581-
bool IsInverted = false;
8582-
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8583-
IsInverted = true;
8584-
Test = InvertedCheck;
8585-
}
8586-
85878580
// Floating-point type properties.
85888581
EVT ScalarFloatVT = OperandVT.getScalarType();
85898582
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8594,11 +8587,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
85948587
// exceptions are ignored.
85958588
if (Flags.hasNoFPExcept() &&
85968589
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8597-
ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8598-
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8590+
FPClassTest FPTestMask = Test;
8591+
bool IsInvertedFP = false;
8592+
8593+
if (FPClassTest InvertedFPCheck =
8594+
invertFPClassTestIfSimpler(FPTestMask, true)) {
8595+
FPTestMask = InvertedFPCheck;
8596+
IsInvertedFP = true;
8597+
}
8598+
8599+
ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8600+
ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
85998601

86008602
if (std::optional<bool> IsCmp0 =
8601-
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8603+
isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
86028604
IsCmp0 && (isCondCodeLegalOrCustom(
86038605
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
86048606
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8610,26 +8612,36 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86108612
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
86118613
}
86128614

8613-
if (Test == fcNan &&
8614-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8615-
OperandVT.getScalarType().getSimpleVT())) {
8615+
if (FPTestMask == fcNan &&
8616+
isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8617+
OperandVT.getScalarType().getSimpleVT()))
86168618
return DAG.getSetCC(DL, ResultVT, Op, Op,
8617-
IsInverted ? ISD::SETO : ISD::SETUO);
8618-
}
8619+
IsInvertedFP ? ISD::SETO : ISD::SETUO);
86198620

8620-
if (Test == fcInf &&
8621-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8621+
bool IsOrderedInf = FPTestMask == fcInf;
8622+
if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8623+
isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8624+
: UnorderedCmpOpcode,
86228625
OperandVT.getScalarType().getSimpleVT()) &&
86238626
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
86248627
// isinf(x) --> fabs(x) == inf
86258628
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
86268629
SDValue Inf =
86278630
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
86288631
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8629-
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8632+
IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
86308633
}
86318634
}
86328635

8636+
// Some checks may be represented as inversion of simpler check, for example
8637+
// "inf|normal|subnormal|zero" => !"nan".
8638+
bool IsInverted = false;
8639+
8640+
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8641+
Test = InvertedCheck;
8642+
IsInverted = true;
8643+
}
8644+
86338645
// In the general case use integer operations.
86348646
unsigned BitSize = OperandVT.getScalarSizeInBits();
86358647
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);

llvm/test/CodeGen/X86/is_fpclass.ll

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -240,18 +240,22 @@ entry:
240240
define i1 @isfinite_f(float %x) {
241241
; X86-LABEL: isfinite_f:
242242
; X86: # %bb.0: # %entry
243-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
244-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
245-
; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
246-
; X86-NEXT: setl %al
243+
; X86-NEXT: flds {{[0-9]+}}(%esp)
244+
; X86-NEXT: fabs
245+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
246+
; X86-NEXT: fxch %st(1)
247+
; X86-NEXT: fucompp
248+
; X86-NEXT: fnstsw %ax
249+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
250+
; X86-NEXT: sahf
251+
; X86-NEXT: setne %al
247252
; X86-NEXT: retl
248253
;
249254
; X64-LABEL: isfinite_f:
250255
; X64: # %bb.0: # %entry
251-
; X64-NEXT: movd %xmm0, %eax
252-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
253-
; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
254-
; X64-NEXT: setl %al
256+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
257+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
258+
; X64-NEXT: setne %al
255259
; X64-NEXT: retq
256260
entry:
257261
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
@@ -1150,31 +1154,23 @@ entry:
11501154
define i1 @isfinite_d(double %x) {
11511155
; X86-LABEL: isfinite_d:
11521156
; X86: # %bb.0: # %entry
1153-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
1154-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
1155-
; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000
1156-
; X86-NEXT: setl %al
1157+
; X86-NEXT: fldl {{[0-9]+}}(%esp)
1158+
; X86-NEXT: fabs
1159+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
1160+
; X86-NEXT: fxch %st(1)
1161+
; X86-NEXT: fucompp
1162+
; X86-NEXT: fnstsw %ax
1163+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
1164+
; X86-NEXT: sahf
1165+
; X86-NEXT: setne %al
11571166
; X86-NEXT: retl
11581167
;
1159-
; X64-GENERIC-LABEL: isfinite_d:
1160-
; X64-GENERIC: # %bb.0: # %entry
1161-
; X64-GENERIC-NEXT: movq %xmm0, %rax
1162-
; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
1163-
; X64-GENERIC-NEXT: andq %rax, %rcx
1164-
; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
1165-
; X64-GENERIC-NEXT: cmpq %rax, %rcx
1166-
; X64-GENERIC-NEXT: setl %al
1167-
; X64-GENERIC-NEXT: retq
1168-
;
1169-
; X64-NDD-LABEL: isfinite_d:
1170-
; X64-NDD: # %bb.0: # %entry
1171-
; X64-NDD-NEXT: movq %xmm0, %rax
1172-
; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
1173-
; X64-NDD-NEXT: andq %rcx, %rax
1174-
; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
1175-
; X64-NDD-NEXT: cmpq %rcx, %rax
1176-
; X64-NDD-NEXT: setl %al
1177-
; X64-NDD-NEXT: retq
1168+
; X64-LABEL: isfinite_d:
1169+
; X64: # %bb.0: # %entry
1170+
; X64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1171+
; X64-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1172+
; X64-NEXT: setne %al
1173+
; X64-NEXT: retq
11781174
entry:
11791175
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite"
11801176
ret i1 %0
@@ -2053,18 +2049,22 @@ entry:
20532049
define i1 @not_isinf_or_nan_f(float %x) {
20542050
; X86-LABEL: not_isinf_or_nan_f:
20552051
; X86: # %bb.0: # %entry
2056-
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
2057-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
2058-
; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
2059-
; X86-NEXT: setl %al
2052+
; X86-NEXT: flds {{[0-9]+}}(%esp)
2053+
; X86-NEXT: fabs
2054+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
2055+
; X86-NEXT: fxch %st(1)
2056+
; X86-NEXT: fucompp
2057+
; X86-NEXT: fnstsw %ax
2058+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
2059+
; X86-NEXT: sahf
2060+
; X86-NEXT: setne %al
20602061
; X86-NEXT: retl
20612062
;
20622063
; X64-LABEL: not_isinf_or_nan_f:
20632064
; X64: # %bb.0: # %entry
2064-
; X64-NEXT: movd %xmm0, %eax
2065-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2066-
; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
2067-
; X64-NEXT: setl %al
2065+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2066+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2067+
; X64-NEXT: setne %al
20682068
; X64-NEXT: retq
20692069
entry:
20702070
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)"

0 commit comments

Comments
 (0)