Skip to content

[DAG] isKnownNeverNaN - add DemandedElts element mask to isKnownNeverNaN calls #135952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -2142,11 +2142,25 @@ class SelectionDAG {
/// X|Cst == X+Cst iff X&Cst = 0.
bool isBaseWithConstantOffset(SDValue Op) const;

/// Test whether the given SDValue (or all elements of it, if it is a
/// vector) is known to never be NaN in \p DemandedElts. If \p SNaN is true,
/// returns if \p Op is known to never be a signaling NaN (it may still be a
/// qNaN).
bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN = false,
unsigned Depth = 0) const;

/// Test whether the given SDValue (or all elements of it, if it is a
/// vector) is known to never be NaN. If \p SNaN is true, returns if \p Op is
/// known to never be a signaling NaN (it may still be a qNaN).
bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;

/// \returns true if \p Op is known to never be a signaling NaN in \p
/// DemandedElts.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts,
unsigned Depth = 0) const {
return isKnownNeverNaN(Op, DemandedElts, true, Depth);
}

/// \returns true if \p Op is known to never be a signaling NaN.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
return isKnownNeverNaN(Op, true, Depth);
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -4283,6 +4283,7 @@ class TargetLowering : public TargetLoweringBase {
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
/// NaN.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const;
Expand Down
82 changes: 59 additions & 23 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5601,7 +5601,24 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
(Op.getOpcode() == ISD::ADD || isADDLike(Op));
}

bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN,
unsigned Depth) const {
EVT VT = Op.getValueType();

// Since the number of lanes in a scalable vector is unknown at compile time,
// we track one bit which is implicitly broadcast to all lanes. This means
// that all lanes in a scalable vector are considered demanded.
APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);

return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth);
}

bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
bool SNaN, unsigned Depth) const {
assert(!DemandedElts.isZero() && "No demanded elements");

// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
Expand Down Expand Up @@ -5657,21 +5674,21 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FLDEXP: {
if (SNaN)
return true;
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::FABS:
case ISD::FNEG:
case ISD::FCOPYSIGN: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SELECT:
return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
return isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1) &&
isKnownNeverNaN(Op.getOperand(2), DemandedElts, SNaN, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND: {
if (SNaN)
return true;
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
Expand All @@ -5693,42 +5710,61 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FMAXIMUMNUM: {
// Only one needs to be known not-nan, since it will be returned if the
// other ends up being one.
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) ||
isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE: {
if (SNaN)
return true;
// This can return a NaN if either operand is an sNaN, or if both operands
// are NaN.
return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
(isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
return (isKnownNeverNaN(Op.getOperand(0), DemandedElts, false, Depth + 1) &&
isKnownNeverSNaN(Op.getOperand(1), DemandedElts, Depth + 1)) ||
(isKnownNeverNaN(Op.getOperand(1), DemandedElts, false, Depth + 1) &&
isKnownNeverSNaN(Op.getOperand(0), DemandedElts, Depth + 1));
}
case ISD::FMINIMUM:
case ISD::FMAXIMUM: {
// TODO: Does this quiet or return the origina NaN as-is?
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) &&
isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Src = Op.getOperand(0);
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
EVT SrcVT = Src.getValueType();
if (SrcVT.isFixedLengthVector() && Idx &&
Idx->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
Idx->getZExtValue());
return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
}
return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isFixedLengthVector()) {
unsigned Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
}
return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
case ISD::BUILD_VECTOR: {
for (const SDValue &Opnd : Op->ops())
if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
unsigned NumElts = Op.getNumOperands();
for (unsigned I = 0; I != NumElts; ++I)
if (DemandedElts[I] &&
!isKnownNeverNaN(Op.getOperand(I), SNaN, Depth + 1))
return false;
return true;
}
default:
if (Opcode >= ISD::BUILTIN_OP_END ||
Opcode == ISD::INTRINSIC_WO_CHAIN ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::INTRINSIC_VOID) {
return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) {
return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
Depth);
}

return false;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3928,6 +3928,7 @@ bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
}

bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5971,10 +5971,9 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
}
}

bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
unsigned Depth) const {
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case AMDGPUISD::FMIN_LEGACY:
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,8 @@ class AMDGPUTargetLowering : public TargetLowering {
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;

bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN = false,
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;

bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16688,6 +16688,7 @@ bool SITargetLowering::denormalsEnabledForType(
}

bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
Expand All @@ -16700,8 +16701,8 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}

return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG, SNaN,
Depth);
return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DemandedElts,
DAG, SNaN, Depth);
}

// On older subtargets, global FP atomic instructions have a hardcoded FP mode
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {

bool isProfitableToHoist(Instruction *I) const override;

bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN = false,
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
Expand Down
33 changes: 16 additions & 17 deletions llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1057,54 +1057,53 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v5, v6 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
Expand Down
Loading