-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesBy the pseudocode in the ISA manual, if any input is a nan it acts Full diff: https://github.com/llvm/llvm-project/pull/139531.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 1494428cb2bf5..1ca300464a697 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
} else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
V = IC.Builder.CreateMinNum(Src0, Src2);
} else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
- V = IC.Builder.CreateMaxNum(Src0, Src1);
+ V = IC.Builder.CreateMinNum(Src0, Src1);
}
if (V) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index bf94637b36a34..972862d8e327e 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 {
define float @fmed3_x_y_undef_f32(float %x, float %y) #1 {
; CHECK-LABEL: define float @fmed3_x_y_undef_f32(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
; CHECK-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
@@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 {
define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 {
; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
; CHECK-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
@@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 {
define float @fmed3_0_1_nan_f32() #1 {
; CHECK-LABEL: define float @fmed3_0_1_nan_f32(
; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: ret float 1.000000e+00
+; CHECK-NEXT: ret float 0.000000e+00
;
%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
ret float %med
@@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 {
define float @fmed3_0_1_undef_f32() #1 {
; CHECK-LABEL: define float @fmed3_0_1_undef_f32(
; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: ret float 1.000000e+00
+; CHECK-NEXT: ret float 0.000000e+00
;
%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
ret float %med
@@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
; CHECK-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
@@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 {
define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: ret float 3.000000e+00
+; CHECK-NEXT: ret float -2.000000e+00
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000)
ret float %med3
@@ -447,7 +447,7 @@ define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y
define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) {
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
; CHECK-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
@@ -480,7 +480,7 @@ define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #
define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 {
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
; CHECK-NEXT: ret float [[MED3]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
|
Merge activity
|
012d451
to
50773b4
Compare
069254f
to
c50539d
Compare
By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong.
c50539d
to
1bce310
Compare
By the pseudocode in the ISA manual, if any input is a nan it acts
like min3, which will fold to min2 of the other operands. The other
cases fold to min, I'm not sure how this one was wrong.