-
Notifications
You must be signed in to change notification settings - Fork 14.3k
DAG: Handle lowering unordered compare with inf #100378
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAG: Handle lowering unordered compare with inf #100378
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesTry to take advantage of the nan check behavior of fcmp. Full diff: https://github.com/llvm/llvm-project/pull/100378.diff 4 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
index 90ef890f22d1b..38408620cd7ec 100644
--- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -219,9 +219,13 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
/// (i.e. fewer instructions should be required to lower it). An example is the
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
+///
+/// \param UseFP The intention is to perform the comparison using floating-point
+/// compare instructions which check for nan.
+///
/// \returns The inverted test, or fcNone, if inversion does not produce a
/// simpler test.
-FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
+FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP);
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index fe144d3c18203..f5207d8b9d124 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
-FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP) {
FPClassTest InvertedTest = ~Test;
+
// Pick the direction with fewer tests
// TODO: Handle more combinations of cases that can be handled together
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,14 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
case fcSubnormal | fcZero:
case fcSubnormal | fcZero | fcNan:
return InvertedTest;
+ case fcInf | fcNan:
+ // If we're trying to use fcmp, we can take advantage of the nan check
+ // behavior of the compare (but this is more instructions in the integer
+ // expansion).
+ return UseFP ? InvertedTest : fcNone;
+ case fcFinite | fcNan:
+ // Inversion of fcInf, which can be done in a combined check.
+ return fcNone;
default:
return fcNone;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 140c97ccd90ba..ba7c89a33f604 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8556,16 +8556,17 @@ static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
}
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
- FPClassTest Test, SDNodeFlags Flags,
- const SDLoc &DL,
+ const FPClassTest OrigTestMask,
+ SDNodeFlags Flags, const SDLoc &DL,
SelectionDAG &DAG) const {
EVT OperandVT = Op.getValueType();
assert(OperandVT.isFloatingPoint());
+ FPClassTest Test = OrigTestMask;
// Degenerated cases.
if (Test == fcNone)
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
- if ((Test & fcAllFlags) == fcAllFlags)
+ if (Test == fcAllFlags)
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8576,14 +8577,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
OperandVT = MVT::f64;
}
- // Some checks may be represented as inversion of simpler check, for example
- // "inf|normal|subnormal|zero" => !"nan".
- bool IsInverted = false;
- if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
- IsInverted = true;
- Test = InvertedCheck;
- }
-
// Floating-point type properties.
EVT ScalarFloatVT = OperandVT.getScalarType();
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8594,11 +8587,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// exceptions are ignored.
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
- ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
- ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+ FPClassTest FPTestMask = Test;
+ bool IsInvertedFP = false;
+
+ if (FPClassTest InvertedFPCheck =
+ invertFPClassTestIfSimpler(FPTestMask, true)) {
+ FPTestMask = InvertedFPCheck;
+ IsInvertedFP = true;
+ }
+
+ ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
+ ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
if (std::optional<bool> IsCmp0 =
- isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+ isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
IsCmp0 && (isCondCodeLegalOrCustom(
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8610,15 +8612,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
- if (Test == fcNan &&
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
- OperandVT.getScalarType().getSimpleVT())) {
+ if (FPTestMask == fcNan &&
+ isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
+ OperandVT.getScalarType().getSimpleVT()))
return DAG.getSetCC(DL, ResultVT, Op, Op,
- IsInverted ? ISD::SETO : ISD::SETUO);
- }
+ IsInvertedFP ? ISD::SETO : ISD::SETUO);
- if (Test == fcInf &&
- isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ bool IsOrderedInf = FPTestMask == fcInf;
+ if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
+ isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
+ : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()) &&
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
// isinf(x) --> fabs(x) == inf
@@ -8626,10 +8629,19 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
- IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
}
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
+ Test = InvertedCheck;
+ IsInverted = true;
+ }
+
// In the general case use integer operations.
unsigned BitSize = OperandVT.getScalarSizeInBits();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 532b2c09a9175..86cee3d2c8e69 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -240,18 +240,22 @@ entry:
define i1 @isfinite_f(float %x) {
; X86-LABEL: isfinite_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: isfinite_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
@@ -1150,31 +1154,23 @@ entry:
define i1 @isfinite_d(double %x) {
; X86-LABEL: isfinite_d:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000
-; X86-NEXT: setl %al
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
-; X64-GENERIC-LABEL: isfinite_d:
-; X64-GENERIC: # %bb.0: # %entry
-; X64-GENERIC-NEXT: movq %xmm0, %rax
-; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-GENERIC-NEXT: andq %rax, %rcx
-; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
-; X64-GENERIC-NEXT: cmpq %rax, %rcx
-; X64-GENERIC-NEXT: setl %al
-; X64-GENERIC-NEXT: retq
-;
-; X64-NDD-LABEL: isfinite_d:
-; X64-NDD: # %bb.0: # %entry
-; X64-NDD-NEXT: movq %xmm0, %rax
-; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
-; X64-NDD-NEXT: cmpq %rcx, %rax
-; X64-NDD-NEXT: setl %al
-; X64-NDD-NEXT: retq
+; X64-LABEL: isfinite_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite"
ret i1 %0
@@ -2053,18 +2049,22 @@ entry:
define i1 @not_isinf_or_nan_f(float %x) {
; X86-LABEL: not_isinf_or_nan_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: not_isinf_or_nan_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)"
|
@@ -219,9 +219,13 @@ findSplitPointForStackProtector(MachineBasicBlock *BB, | |||
/// (i.e. fewer instructions should be required to lower it). An example is the | |||
/// test "inf|normal|subnormal|zero", which is an inversion of "nan". | |||
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation. | |||
/// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Blank line is not needed here.
return UseFP ? InvertedTest : fcNone; | ||
case fcFinite | fcNan: | ||
// Inversion of fcInf, which can be done in a combined check. | ||
return fcNone; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was fcInf
intended here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably, I think this got lost in one of the many rebases
1e5da52
to
b62d261
Compare
b62d261
to
1246ce4
Compare
1246ce4
to
f0ea19f
Compare
Use of the comparison with infinity constant generally is not a good solution unless the target has a cheap way to materialize the infinity, but this comparison has already been implemented in the code and this patch enhances the code generation. LGTM. |
b57fb07
to
9e23bae
Compare
f0ea19f
to
238f697
Compare
9e23bae
to
5552946
Compare
Try to take advantage of the nan check behavior of fcmp. x86_64 looks better, x86_32 looks worse.
This avoids the x86_32 regressions, at the expense of several other cases.
238f697
to
72509a7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm Are you happy with the legality check workaround to avoid the x87 regression? We could attempt a custom lowering if you'd prefer?
I think it's fine (though we don't really have a proper way of checking what to do for vector constants) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Try to take advantage of the nan check behavior of fcmp.
x86_64 looks better, x86_32 looks worse.