-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] isKnownNeverNaN - add DemandedElts element mask to isKnownNeverNaN calls #135952
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
7648629
to
31230c0
Compare
…Zero calls. Matches what we've done for computeKnownBits etc. to improve vector handling
31230c0
to
6456a0f
Compare
@llvm/pr-subscribers-backend-amdgpu Author: Simon Pilgrim (RKSimon) ChangesMatches what we've done for computeKnownBits etc. to improve vector handling Full diff: https://github.com/llvm/llvm-project/pull/135952.diff 9 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 63423463eeee2..2ab6b4d3027e9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2142,11 +2142,25 @@ class SelectionDAG {
/// X|Cst == X+Cst iff X&Cst = 0.
bool isBaseWithConstantOffset(SDValue Op) const;
+ /// Test whether the given SDValue (or all elements of it, if it is a
+ /// vector) is known to never be NaN in \p DemandedElts. If \p SNaN is true,
+ /// returns if \p Op is known to never be a signaling NaN (it may still be a
+ /// qNaN).
+ bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN = false,
+ unsigned Depth = 0) const;
+
/// Test whether the given SDValue (or all elements of it, if it is a
/// vector) is known to never be NaN. If \p SNaN is true, returns if \p Op is
/// known to never be a signaling NaN (it may still be a qNaN).
bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
+ /// \returns true if \p Op is known to never be a signaling NaN in \p
+ /// DemandedElts.
+ bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const {
+ return isKnownNeverNaN(Op, DemandedElts, true, Depth);
+ }
+
/// \returns true if \p Op is known to never be a signaling NaN.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
return isKnownNeverNaN(Op, true, Depth);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0a36975f4f625..00c36266a069f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4283,6 +4283,7 @@ class TargetLowering : public TargetLoweringBase {
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
/// NaN.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8682c40898046..64def735afa78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5601,7 +5601,22 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
(Op.getOpcode() == ISD::ADD || isADDLike(Op));
}
-bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+
+ return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth);
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
+ bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
@@ -5615,6 +5630,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
(SNaN && !C->getValueAPF().isSignaling());
}
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::FADD:
@@ -5657,21 +5675,21 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FLDEXP: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::FABS:
case ISD::FNEG:
case ISD::FCOPYSIGN: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SELECT:
- return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), DemandedElts, SNaN, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
@@ -5693,8 +5711,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FMAXIMUMNUM: {
// Only one needs to be known not-nan, since it will be returned if the
// other ends up being one.
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE: {
@@ -5702,33 +5720,52 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
return true;
// This can return a NaN if either operand is an sNaN, or if both operands
// are NaN.
- return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
- (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ return (isKnownNeverNaN(Op.getOperand(0), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), DemandedElts, Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), DemandedElts, Depth + 1));
}
case ISD::FMINIMUM:
case ISD::FMAXIMUM: {
// TODO: Does this quiet or return the origina NaN as-is?
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isFixedLengthVector() && Idx &&
+ Idx->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
+ APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
+ Idx->getZExtValue());
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
- case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isFixedLengthVector()) {
+ unsigned Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
case ISD::BUILD_VECTOR: {
- for (const SDValue &Opnd : Op->ops())
- if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ unsigned NumElts = Op.getNumOperands();
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (DemandedElts[I] &&
+ !isKnownNeverNaN(Op.getOperand(I), SNaN, Depth + 1))
return false;
return true;
}
default:
- if (Opcode >= ISD::BUILTIN_OP_END ||
- Opcode == ISD::INTRINSIC_WO_CHAIN ||
- Opcode == ISD::INTRINSIC_W_CHAIN ||
- Opcode == ISD::INTRINSIC_VOID) {
- return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
+ Depth);
}
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5308593b5c988..3995216e3d689 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3928,6 +3928,7 @@ bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
}
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 533ad349f7500..2846405a2538c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5971,10 +5971,9 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
}
}
-bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN,
- unsigned Depth) const {
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
+ unsigned Depth) const {
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case AMDGPUISD::FMIN_LEGACY:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 6705f86e15fc2..fa9d61ec37c24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -321,9 +321,8 @@ class AMDGPUTargetLowering : public TargetLowering {
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index bd95bcd89e183..9181d03f9f593 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16687,6 +16687,7 @@ bool SITargetLowering::denormalsEnabledForType(
}
bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
@@ -16699,8 +16700,8 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
- return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG, SNaN,
- Depth);
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DemandedElts,
+ DAG, SNaN, Depth);
}
// On older subtargets, global FP atomic instructions have a hardcoded FP mode
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index dc0634331caf9..c42366a1c04c8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -546,9 +546,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isProfitableToHoist(Instruction *I) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index beac41e42e0c6..ef325da272005 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1057,54 +1057,53 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v5, v6 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
|
@llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesMatches what we've done for computeKnownBits etc. to improve vector handling Full diff: https://github.com/llvm/llvm-project/pull/135952.diff 9 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 63423463eeee2..2ab6b4d3027e9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2142,11 +2142,25 @@ class SelectionDAG {
/// X|Cst == X+Cst iff X&Cst = 0.
bool isBaseWithConstantOffset(SDValue Op) const;
+ /// Test whether the given SDValue (or all elements of it, if it is a
+ /// vector) is known to never be NaN in \p DemandedElts. If \p SNaN is true,
+ /// returns if \p Op is known to never be a signaling NaN (it may still be a
+ /// qNaN).
+ bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN = false,
+ unsigned Depth = 0) const;
+
/// Test whether the given SDValue (or all elements of it, if it is a
/// vector) is known to never be NaN. If \p SNaN is true, returns if \p Op is
/// known to never be a signaling NaN (it may still be a qNaN).
bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
+ /// \returns true if \p Op is known to never be a signaling NaN in \p
+ /// DemandedElts.
+ bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const {
+ return isKnownNeverNaN(Op, DemandedElts, true, Depth);
+ }
+
/// \returns true if \p Op is known to never be a signaling NaN.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
return isKnownNeverNaN(Op, true, Depth);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0a36975f4f625..00c36266a069f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4283,6 +4283,7 @@ class TargetLowering : public TargetLoweringBase {
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
/// NaN.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8682c40898046..64def735afa78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5601,7 +5601,22 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
(Op.getOpcode() == ISD::ADD || isADDLike(Op));
}
-bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+
+ return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth);
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
+ bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
@@ -5615,6 +5630,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
(SNaN && !C->getValueAPF().isSignaling());
}
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::FADD:
@@ -5657,21 +5675,21 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FLDEXP: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::FABS:
case ISD::FNEG:
case ISD::FCOPYSIGN: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SELECT:
- return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), DemandedElts, SNaN, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND: {
if (SNaN)
return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1);
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
@@ -5693,8 +5711,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FMAXIMUMNUM: {
// Only one needs to be known not-nan, since it will be returned if the
// other ends up being one.
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
}
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE: {
@@ -5702,33 +5720,52 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
return true;
// This can return a NaN if either operand is an sNaN, or if both operands
// are NaN.
- return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
- (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
- isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ return (isKnownNeverNaN(Op.getOperand(0), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), DemandedElts, Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), DemandedElts, false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), DemandedElts, Depth + 1));
}
case ISD::FMINIMUM:
case ISD::FMAXIMUM: {
// TODO: Does this quiet or return the origina NaN as-is?
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isFixedLengthVector() && Idx &&
+ Idx->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
+ APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
+ Idx->getZExtValue());
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
- case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR: {
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isFixedLengthVector()) {
+ unsigned Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1);
+ }
+ return isKnownNeverNaN(Src, SNaN, Depth + 1);
}
case ISD::BUILD_VECTOR: {
- for (const SDValue &Opnd : Op->ops())
- if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ unsigned NumElts = Op.getNumOperands();
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (DemandedElts[I] &&
+ !isKnownNeverNaN(Op.getOperand(I), SNaN, Depth + 1))
return false;
return true;
}
default:
- if (Opcode >= ISD::BUILTIN_OP_END ||
- Opcode == ISD::INTRINSIC_WO_CHAIN ||
- Opcode == ISD::INTRINSIC_W_CHAIN ||
- Opcode == ISD::INTRINSIC_VOID) {
- return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
+ Depth);
}
return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5308593b5c988..3995216e3d689 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3928,6 +3928,7 @@ bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
}
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 533ad349f7500..2846405a2538c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5971,10 +5971,9 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
}
}
-bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN,
- unsigned Depth) const {
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN,
+ unsigned Depth) const {
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case AMDGPUISD::FMIN_LEGACY:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 6705f86e15fc2..fa9d61ec37c24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -321,9 +321,8 @@ class AMDGPUTargetLowering : public TargetLowering {
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index bd95bcd89e183..9181d03f9f593 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16687,6 +16687,7 @@ bool SITargetLowering::denormalsEnabledForType(
}
bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
@@ -16699,8 +16700,8 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
- return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG, SNaN,
- Depth);
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DemandedElts,
+ DAG, SNaN, Depth);
}
// On older subtargets, global FP atomic instructions have a hardcoded FP mode
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index dc0634331caf9..c42366a1c04c8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -546,9 +546,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isProfitableToHoist(Instruction *I) const override;
- bool isKnownNeverNaNForTargetNode(SDValue Op,
- const SelectionDAG &DAG,
- bool SNaN = false,
+ bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index beac41e42e0c6..ef325da272005 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1057,54 +1057,53 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v5, v6 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX900: ; %bb.0:
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX906: ; %bb.0:
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
|
if (!DemandedElts) | ||
return false; // No demanded elts, better to assume we don't know anything. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be an assertion? Since there is no legacy code with this parameter, can we just require this to be correct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could, I've mainly matched what we've done in the past. IIRC it's helped in strange edge cases but ideally we'd catch these in the caller.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIRC it's helped in strange edge cases but ideally we'd catch these in the caller.
If none of the current call-sites require a 0, I think it would be better to be strict now and use an assert. Later if a call-site has a strange edge case that needs DemandedElts to be 0, the assert will force the caller to think carefully about why this is necessary and encourage them to create a testcase so that we know why we are allowing this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally we'd move all this to be like the IR computeKnownFPClass too
LGTM |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/166/builds/1180 Here is the relevant piece of the build log for the reference
|
…NaN calls (llvm#135952) Matches what we've done for computeKnownBits etc. to improve vector handling
…NaN calls (llvm#135952) Matches what we've done for computeKnownBits etc. to improve vector handling
Matches what we've done for computeKnownBits etc. to improve vector handling