-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Use minimumnum/maximumnum for fmed3 with amdgpu-ieee=0 #139546
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Use minimumnum/maximumnum for fmed3 with amdgpu-ieee=0 #139546
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesAMDGPU: Add fmed3 fold tests with flags AMDGPU: Use minimumnum/maximumnum for fmed3 with amdgpu-ieee=0 Try to respect the signaling nan behavior of the instruction, Patch is 32.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139546.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 1ca300464a697..f2a2cf41eab90 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -59,6 +59,28 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
return maxnum(Src0, Src1);
}
+enum class KnownIEEEMode { Unknown, On, Off };
+
+/// Return KnownIEEEMode::On if we know if the use context can assume
+/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
+/// "amdgpu-ieee"="false".
+static KnownIEEEMode fpenvIEEEMode(const Instruction &I,
+ const GCNSubtarget &ST) {
+ if (!ST.hasIEEEMode()) // Only mode on gfx12
+ return KnownIEEEMode::On;
+
+ const Function *F = I.getFunction();
+ if (!F)
+ return KnownIEEEMode::Unknown;
+
+ Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
+ if (IEEEAttr.isValid())
+ return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
+
+ return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
+ : KnownIEEEMode::On;
+}
+
// Check if a value can be converted to a 16-bit value without losing
// precision.
// The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -843,9 +865,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_fmed3: {
- // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
- // for the shader.
-
Value *Src0 = II.getArgOperand(0);
Value *Src1 = II.getArgOperand(1);
Value *Src2 = II.getArgOperand(2);
@@ -858,16 +877,85 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (II.isStrictFP())
break;
+ // med3 with a nan input acts like
+ // v_min_f32(v_min_f32(s0, s1), s2)
+ //
+ // Signalingness is ignored with ieee=0, so we fold to
+ // minimumnum/maximumnum. With ieee=1, the v_min_f32 acts like llvm.minnum
+ // with signaling nan handling. With ieee=0, like llvm.minimumnum except a
+ // returned signaling nan will not be quieted.
+
+ // ieee=1
+ // s0 snan: s2
+ // s1 snan: s2
+ // s2 snan: qnan
+
+ // s0 qnan: min(s1, s2)
+ // s1 qnan: min(s0, s2)
+ // s2 qnan: min(s0, s1)
+
+ // ieee=0
+ // s0 _nan: min(s1, s2)
+ // s1 _nan: min(s0, s2)
+ // s2 _nan: min(s0, s1)
+
// Checking for NaN before canonicalization provides better fidelity when
// mapping other operations onto fmed3 since the order of operands is
// unchanged.
Value *V = nullptr;
- if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
- V = IC.Builder.CreateMinNum(Src1, Src2);
- } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
- V = IC.Builder.CreateMinNum(Src0, Src2);
- } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
- V = IC.Builder.CreateMinNum(Src0, Src1);
+ const APFloat *ConstSrc0 = nullptr;
+ const APFloat *ConstSrc1 = nullptr;
+ const APFloat *ConstSrc2 = nullptr;
+
+ // TODO: Also can fold to 2 operands with infinities.
+ if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
+ isa<UndefValue>(Src0)) {
+ switch (fpenvIEEEMode(II, *ST)) {
+ case KnownIEEEMode::On:
+ // TODO: If Src2 is snan, does it need quieting?
+ if (ConstSrc0 && ConstSrc0->isSignaling())
+ return IC.replaceInstUsesWith(II, Src2);
+ V = IC.Builder.CreateMinNum(Src1, Src2);
+ break;
+ case KnownIEEEMode::Off:
+ V = IC.Builder.CreateMinimumNum(Src1, Src2);
+ break;
+ case KnownIEEEMode::Unknown:
+ break;
+ }
+ } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
+ isa<UndefValue>(Src1)) {
+ switch (fpenvIEEEMode(II, *ST)) {
+ case KnownIEEEMode::On:
+ // TODO: If Src2 is snan, does it need quieting?
+ if (ConstSrc1 && ConstSrc1->isSignaling())
+ return IC.replaceInstUsesWith(II, Src2);
+
+ V = IC.Builder.CreateMinNum(Src0, Src2);
+ break;
+ case KnownIEEEMode::Off:
+ V = IC.Builder.CreateMinimumNum(Src0, Src2);
+ break;
+ case KnownIEEEMode::Unknown:
+ break;
+ }
+ } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
+ isa<UndefValue>(Src2)) {
+ switch (fpenvIEEEMode(II, *ST)) {
+ case KnownIEEEMode::On:
+ if (ConstSrc2 && ConstSrc2->isSignaling()) {
+ auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());
+ return IC.replaceInstUsesWith(II, Quieted);
+ }
+
+ V = IC.Builder.CreateMinNum(Src0, Src1);
+ break;
+ case KnownIEEEMode::Off:
+ V = IC.Builder.CreateMaximumNum(Src0, Src1);
+ break;
+ case KnownIEEEMode::Unknown:
+ break;
+ }
}
if (V) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index 972862d8e327e..d9311008bd680 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; Test with "amdgpu-ieee" set to true and false
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1,HAS-IEEE-BIT1 %s
; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s
-; Test with gfx12 since there is no ieee bit anymore.
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s
-; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s
+; Test with gfx12 since there is no ieee bit anymore and the attribute is ignored.
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1,NO-IEEE-BIT %s
+; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE1,NO-IEEE-BIT %s
; --------------------------------------------------------------------
; llvm.amdgcn.fmed3
@@ -85,80 +85,120 @@ define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) #1 {
}
define float @fmed3_undef_x_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_undef_x_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_undef_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_undef_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
ret float %med3
}
define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_fmf_undef_x_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_fmf_undef_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_fmf_undef_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call nnan float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
ret float %med3
}
define float @fmed3_x_undef_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_x_undef_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_x_undef_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_undef_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
ret float %med3
}
define float @fmed3_x_y_undef_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_x_y_undef_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_x_y_undef_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_y_undef_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
ret float %med3
}
define float @fmed3_qnan0_x_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_qnan0_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_qnan0_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
ret float %med3
}
define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_x_qnan0_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_qnan0_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
ret float %med3
}
define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_x_y_qnan0_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_x_y_qnan0_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
ret float %med3
}
define float @fmed3_qnan1_x_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_qnan1_x_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_qnan1_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; IEEE1-NEXT: ret float [[MED3]]
+;
+; IEEE0-LABEL: define float @fmed3_qnan1_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
ret float %med3
@@ -229,27 +269,42 @@ define float @fmed3_constant_src2_1_f32(float %x, float %y) #1 {
}
define float @fmed3_x_qnan0_qnan1_f32(float %x) #1 {
-; CHECK-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
-; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: ret float [[X]]
+; IEEE1-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
+; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float [[X]]
+;
+; IEEE0-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
+; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
ret float %med3
}
define float @fmed3_qnan0_x_qnan1_f32(float %x) #1 {
-; CHECK-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
-; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: ret float [[X]]
+; IEEE1-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
+; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float [[X]]
+;
+; IEEE0-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
+; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
ret float %med3
}
define float @fmed3_qnan0_qnan1_x_f32(float %x) #1 {
-; CHECK-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
-; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: ret float [[X]]
+; IEEE1-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
+; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float [[X]]
+;
+; IEEE0-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
+; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
ret float %med3
@@ -274,9 +329,13 @@ define float @fmed3_0_nan_1_f32() #1 {
}
define float @fmed3_0_1_nan_f32() #1 {
-; CHECK-LABEL: define float @fmed3_0_1_nan_f32(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: ret float 0.000000e+00
+; IEEE1-LABEL: define float @fmed3_0_1_nan_f32(
+; IEEE1-SAME: ) #[[ATTR1]] {
+; IEEE1-NEXT: ret float 0.000000e+00
+;
+; IEEE0-LABEL: define float @fmed3_0_1_nan_f32(
+; IEEE0-SAME: ) #[[ATTR1]] {
+; IEEE0-NEXT: ret float 1.000000e+00
;
%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
ret float %med
@@ -301,9 +360,13 @@ define float @fmed3_0_undef_1_f32() #1 {
}
define float @fmed3_0_1_undef_f32() #1 {
-; CHECK-LABEL: define float @fmed3_0_1_undef_f32(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: ret float 0.000000e+00
+; IEEE1-LABEL: define float @fmed3_0_1_undef_f32(
+; IEEE1-SAME: ) #[[ATTR1]] {
+; IEEE1-NEXT: ret float 0.000000e+00
+;
+; IEEE0-LABEL: define float @fmed3_0_1_undef_f32(
+; IEEE0-SAME: ) #[[ATTR1]] {
+; IEEE0-NEXT: ret float 1.000000e+00
;
%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
ret float %med
@@ -337,84 +400,122 @@ define float @fmed3_x_y_poison_f32(float %x, float %y) #1 {
}
define float @fmed3_snan1_x_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_snan1_x_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_snan1_x_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float [[Y]]
+;
+; IEEE0-LABEL: define float @fmed3_snan1_x_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
ret float %med3
}
define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_x_snan1_y_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_x_snan1_y_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float [[Y]]
+;
+; IEEE0-LABEL: define float @fmed3_x_snan1_y_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
ret float %med3
}
define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
-; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
-; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
-; CHECK-NEXT: ret float [[MED3]]
+; IEEE1-LABEL: define float @fmed3_x_y_snan1_f32(
+; IEEE1-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float 0x7FFC000000000000
+;
+; IEEE0-LABEL: define float @fmed3_x_y_snan1_f32(
+; IEEE0-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[Y]])
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
ret float %med3
}
define float @fmed3_snan1_x_snan2_f32(float %x) #1 {
-; CHECK-LABEL: define float @fmed3_snan1_x_snan2_f32(
-; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: ret float [[X]]
+; IEEE1-LABEL: define float @fmed3_snan1_x_snan2_f32(
+; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE1-NEXT: ret float 0x7FF0000040000000
+;
+; IEEE0-LABEL: define float @fmed3_snan1_x_snan2_f32(
+; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
+; IEEE0-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3....
[truncated]
|
Merge activity
|
c50539d
to
1bce310
Compare
8da4a4a
to
e1186ec
Compare
Try to respect the signaling nan behavior of the instruction, so also start the special case fold for src2.
e1186ec
to
79ef4a0
Compare
Try to respect the signaling nan behavior of the instruction,
so also start the special case fold for src2.