Skip to content

Commit b59022b

Browse files
committed
DAG: Handle lowering of unordered fcZero|fcSubnormal to fcmp
1 parent a709c49 commit b59022b

File tree

4 files changed

+114
-167
lines changed

4 files changed

+114
-167
lines changed

llvm/lib/CodeGen/CodeGenCommonISel.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,9 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
196196
case fcFinite:
197197
case fcPosFinite:
198198
case fcNegFinite:
199+
case fcZero | fcNan:
199200
case fcSubnormal | fcZero:
201+
case fcSubnormal | fcZero | fcNan:
200202
return InvertedTest;
201203
default:
202204
return fcNone;

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8056,18 +8056,28 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
80568056
return SDValue();
80578057
}
80588058

8059-
/// If this FPClassTest can be performed with a fcmp to 0, return the test mask
8060-
/// for the floating-point mode.
8061-
static bool isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics,
8062-
const MachineFunction &MF) {
8063-
// TODO: Handle unordered compares
8064-
if (Test == fcZero &&
8059+
/// Returns a true value if if this FPClassTest can be performed with an ordered
8060+
/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8061+
/// std::nullopt if it cannot be performed as a compare with 0.
8062+
static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8063+
const fltSemantics &Semantics,
8064+
const MachineFunction &MF) {
8065+
FPClassTest OrderedMask = Test & ~fcNan;
8066+
FPClassTest NanTest = Test & fcNan;
8067+
bool IsOrdered = NanTest == fcNone;
8068+
bool IsUnordered = NanTest == fcNan;
8069+
8070+
// Skip cases that are testing for only a qnan or snan.
8071+
if (!IsOrdered && !IsUnordered)
8072+
return std::nullopt;
8073+
8074+
if (OrderedMask == fcZero &&
80658075
MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8066-
return true;
8067-
if (Test == (fcZero | fcSubnormal) &&
8076+
return IsOrdered;
8077+
if (OrderedMask == (fcZero | fcSubnormal) &&
80688078
MF.getDenormalMode(Semantics).inputsAreZero())
8069-
return true;
8070-
return false;
8079+
return IsOrdered;
8080+
return std::nullopt;
80718081
}
80728082

80738083
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
@@ -8109,14 +8119,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
81098119
// exceptions are ignored.
81108120
if (Flags.hasNoFPExcept() &&
81118121
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8112-
if (isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()) &&
8113-
(isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8114-
OperandVT.getScalarType().getSimpleVT()))) {
8122+
ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8123+
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8124+
8125+
if (std::optional<bool> IsCmp0 =
8126+
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8127+
IsCmp0 && (isCondCodeLegalOrCustom(
8128+
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8129+
OperandVT.getScalarType().getSimpleVT()))) {
8130+
81158131
// If denormals could be implicitly treated as 0, this is not equivalent
81168132
// to a compare with 0 since it will also be true for denormals.
81178133
return DAG.getSetCC(DL, ResultVT, Op,
81188134
DAG.getConstantFP(0.0, DL, OperandVT),
8119-
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8135+
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
81208136
}
81218137

81228138
if (Test == fcNan &&

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll

Lines changed: 12 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2545,18 +2545,11 @@ define i1 @not_iszero_or_nan_f16(half %x) {
25452545
; GFX7SELDAG: ; %bb.0: ; %entry
25462546
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25472547
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2548-
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
2549-
; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800
2548+
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c01
25502549
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2551-
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
2552-
; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
2553-
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
2554-
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
2555-
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2556-
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
2557-
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
2558-
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
2559-
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2550+
; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2551+
; GFX7SELDAG-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2552+
; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
25602553
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
25612554
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
25622555
;
@@ -2619,18 +2612,11 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
26192612
; GFX7SELDAG: ; %bb.0: ; %entry
26202613
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26212614
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2622-
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
2623-
; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800
2615+
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c01
26242616
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2625-
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
2626-
; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
2627-
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
2628-
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
2629-
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2630-
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
2631-
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
2632-
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
2633-
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2617+
; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2618+
; GFX7SELDAG-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2619+
; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
26342620
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
26352621
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
26362622
;
@@ -2693,18 +2679,11 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
26932679
; GFX7SELDAG: ; %bb.0: ; %entry
26942680
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26952681
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
2696-
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
2697-
; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800
2682+
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c01
26982683
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2699-
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
2700-
; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0
2701-
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff
2702-
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1
2703-
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2704-
; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
2705-
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
2706-
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
2707-
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
2684+
; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
2685+
; GFX7SELDAG-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2686+
; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc
27082687
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
27092688
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
27102689
;

0 commit comments

Comments
 (0)