Skip to content

AMDGPU: Use minimumnum/maximumnum for fmed3 with amdgpu-ieee=0 #139546

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 97 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
return maxnum(Src0, Src1);
}

enum class KnownIEEEMode { Unknown, On, Off };

/// Return KnownIEEEMode::On if we know if the use context can assume
/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
/// "amdgpu-ieee"="false".
static KnownIEEEMode fpenvIEEEMode(const Instruction &I,
const GCNSubtarget &ST) {
if (!ST.hasIEEEMode()) // Only mode on gfx12
return KnownIEEEMode::On;

const Function *F = I.getFunction();
if (!F)
return KnownIEEEMode::Unknown;

Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
if (IEEEAttr.isValid())
return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;

return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
: KnownIEEEMode::On;
}

// Check if a value can be converted to a 16-bit value without losing
// precision.
// The value is expected to be either a float (IsFloat = true) or an unsigned
Expand Down Expand Up @@ -843,9 +865,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_fmed3: {
// Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
// for the shader.

Value *Src0 = II.getArgOperand(0);
Value *Src1 = II.getArgOperand(1);
Value *Src2 = II.getArgOperand(2);
Expand All @@ -858,16 +877,85 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (II.isStrictFP())
break;

// med3 with a nan input acts like
// v_min_f32(v_min_f32(s0, s1), s2)
//
// Signalingness is ignored with ieee=0, so we fold to
// minimumnum/maximumnum. With ieee=1, the v_min_f32 acts like llvm.minnum
// with signaling nan handling. With ieee=0, like llvm.minimumnum except a
// returned signaling nan will not be quieted.

// ieee=1
// s0 snan: s2
// s1 snan: s2
// s2 snan: qnan

// s0 qnan: min(s1, s2)
// s1 qnan: min(s0, s2)
// s2 qnan: min(s0, s1)

// ieee=0
// s0 _nan: min(s1, s2)
// s1 _nan: min(s0, s2)
// s2 _nan: min(s0, s1)

// Checking for NaN before canonicalization provides better fidelity when
// mapping other operations onto fmed3 since the order of operands is
// unchanged.
Value *V = nullptr;
if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
V = IC.Builder.CreateMinNum(Src1, Src2);
} else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
V = IC.Builder.CreateMinNum(Src0, Src2);
} else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
V = IC.Builder.CreateMinNum(Src0, Src1);
const APFloat *ConstSrc0 = nullptr;
const APFloat *ConstSrc1 = nullptr;
const APFloat *ConstSrc2 = nullptr;

// TODO: Also can fold to 2 operands with infinities.
if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
isa<UndefValue>(Src0)) {
switch (fpenvIEEEMode(II, *ST)) {
case KnownIEEEMode::On:
// TODO: If Src2 is snan, does it need quieting?
if (ConstSrc0 && ConstSrc0->isSignaling())
return IC.replaceInstUsesWith(II, Src2);
V = IC.Builder.CreateMinNum(Src1, Src2);
break;
case KnownIEEEMode::Off:
V = IC.Builder.CreateMinimumNum(Src1, Src2);
break;
case KnownIEEEMode::Unknown:
break;
}
} else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
isa<UndefValue>(Src1)) {
switch (fpenvIEEEMode(II, *ST)) {
case KnownIEEEMode::On:
// TODO: If Src2 is snan, does it need quieting?
if (ConstSrc1 && ConstSrc1->isSignaling())
return IC.replaceInstUsesWith(II, Src2);

V = IC.Builder.CreateMinNum(Src0, Src2);
break;
case KnownIEEEMode::Off:
V = IC.Builder.CreateMinimumNum(Src0, Src2);
break;
case KnownIEEEMode::Unknown:
break;
}
} else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
isa<UndefValue>(Src2)) {
switch (fpenvIEEEMode(II, *ST)) {
case KnownIEEEMode::On:
if (ConstSrc2 && ConstSrc2->isSignaling()) {
auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());
return IC.replaceInstUsesWith(II, Quieted);
}

V = IC.Builder.CreateMinNum(Src0, Src1);
break;
case KnownIEEEMode::Off:
V = IC.Builder.CreateMaximumNum(Src0, Src1);
break;
case KnownIEEEMode::Unknown:
break;
}
}

if (V) {
Expand Down
Loading
Loading