Skip to content

DAG: Handle lowering unordered compare with inf #100378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/include/llvm/CodeGen/CodeGenCommonISel.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,14 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
/// Evaluates if the specified FP class test is better performed as the inverse
/// (i.e. fewer instructions should be required to lower it). An example is the
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
///
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
/// \param UseFCmp The intention is to perform the comparison using
/// floating-point compare instructions which check for nan.
///
/// \returns The inverted test, or fcNone, if inversion does not produce a
/// simpler test.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);

/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/CodeGen/CodeGenCommonISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}

FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
FPClassTest InvertedTest = ~Test;

// Pick the direction with fewer tests
// TODO: Handle more combinations of cases that can be handled together
switch (static_cast<unsigned>(InvertedTest)) {
Expand All @@ -200,6 +201,11 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
case fcSubnormal | fcZero:
case fcSubnormal | fcZero | fcNan:
return InvertedTest;
case fcInf | fcNan:
// If we're trying to use fcmp, we can take advantage of the nan check
// behavior of the compare (but this is more instructions in the integer
// expansion).
return UseFCmp ? InvertedTest : fcNone;
default:
return fcNone;
}
Expand Down
58 changes: 35 additions & 23 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8672,7 +8672,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// Degenerated cases.
if (Test == fcNone)
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
if ((Test & fcAllFlags) == fcAllFlags)
if (Test == fcAllFlags)
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);

// PPC double double is a pair of doubles, of which the higher part determines
Expand All @@ -8683,14 +8683,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
OperandVT = MVT::f64;
}

// Some checks may be represented as inversion of simpler check, for example
// "inf|normal|subnormal|zero" => !"nan".
bool IsInverted = false;
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
IsInverted = true;
Test = InvertedCheck;
}

// Floating-point type properties.
EVT ScalarFloatVT = OperandVT.getScalarType();
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
Expand All @@ -8702,9 +8694,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
FPClassTest FPTestMask = Test;
bool IsInvertedFP = false;

if (FPClassTest InvertedFPCheck =
invertFPClassTestIfSimpler(FPTestMask, true)) {
FPTestMask = InvertedFPCheck;
IsInvertedFP = true;
}

ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;

// See if we can fold an | fcNan into an unordered compare.
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
Expand All @@ -8717,7 +8716,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
const bool IsOrdered = FPTestMask == OrderedFPTestMask;

if (std::optional<bool> IsCmp0 =
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
IsCmp0 && (isCondCodeLegalOrCustom(
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()))) {
Expand All @@ -8729,31 +8728,35 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
}

if (Test == fcNan &&
isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
OperandVT.getScalarType().getSimpleVT())) {
if (FPTestMask == fcNan &&
isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
OperandVT.getScalarType().getSimpleVT()))
return DAG.getSetCC(DL, ResultVT, Op, Op,
IsInverted ? ISD::SETO : ISD::SETUO);
}
IsInvertedFP ? ISD::SETO : ISD::SETUO);

if (Test == fcInf &&
isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
bool IsOrderedInf = FPTestMask == fcInf;
if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
: UnorderedCmpOpcode,
OperandVT.getScalarType().getSimpleVT()) &&
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
(isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
(OperandVT.isVector() &&
isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
// isinf(x) --> fabs(x) == inf
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
}

if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
// TODO: Could handle ordered case, but it produces worse code for
// x86. Maybe handle ordered if fabs is free?

ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;

if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
OperandVT.getScalarType().getSimpleVT())) {
Expand All @@ -8770,6 +8773,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
}

// Some checks may be represented as inversion of simpler check, for example
// "inf|normal|subnormal|zero" => !"nan".
bool IsInverted = false;

if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
Test = InvertedCheck;
IsInverted = true;
}

// In the general case use integer operations.
unsigned BitSize = OperandVT.getScalarSizeInBits();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/isinf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ define i32 @replace_isinf_call_f16(half %x) {
define i32 @replace_isinf_call_f32(float %x) {
; CHECK-LABEL: replace_isinf_call_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: and w9, w9, #0x7fffffff
; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call float @llvm.fabs.f32(float %x)
Expand All @@ -42,10 +42,10 @@ define i32 @replace_isinf_call_f32(float %x) {
define i32 @replace_isinf_call_f64(double %x) {
; CHECK-LABEL: replace_isinf_call_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fabs d0, d0
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fcmp d0, d1
; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %x)
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/PowerPC/fp-classify.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
define zeroext i1 @abs_isinff(float %x) {
; P8-LABEL: abs_isinff:
; P8: # %bb.0: # %entry
; P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; P8-NEXT: xsabsdp 0, 1
; P8-NEXT: li 4, 1
; P8-NEXT: lfs 1, .LCPI0_0@toc@l(3)
; P8-NEXT: li 3, 0
; P8-NEXT: fcmpu 0, 0, 1
; P8-NEXT: iseleq 3, 4, 3
; P8-NEXT: xscvdpspn 0, 1
; P8-NEXT: lis 4, 32640
; P8-NEXT: mffprwz 3, 0
; P8-NEXT: clrlwi 3, 3, 1
; P8-NEXT: xor 3, 3, 4
; P8-NEXT: cntlzw 3, 3
; P8-NEXT: srwi 3, 3, 5
; P8-NEXT: blr
;
; P9-LABEL: abs_isinff:
Expand All @@ -32,13 +32,13 @@ entry:
define zeroext i1 @abs_isinf(double %x) {
; P8-LABEL: abs_isinf:
; P8: # %bb.0: # %entry
; P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; P8-NEXT: xsabsdp 0, 1
; P8-NEXT: li 4, 1
; P8-NEXT: lfs 1, .LCPI1_0@toc@l(3)
; P8-NEXT: li 3, 0
; P8-NEXT: fcmpu 0, 0, 1
; P8-NEXT: iseleq 3, 4, 3
; P8-NEXT: mffprd 3, 1
; P8-NEXT: li 4, 2047
; P8-NEXT: rldic 4, 4, 52, 1
; P8-NEXT: clrldi 3, 3, 1
; P8-NEXT: xor 3, 3, 4
; P8-NEXT: cntlzd 3, 3
; P8-NEXT: rldicl 3, 3, 58, 63
; P8-NEXT: blr
;
; P9-LABEL: abs_isinf:
Expand Down
Loading