AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531

arsenm · 2025-05-12T10:28:20Z

By the pseudocode in the ISA manual, if any input is a nan it acts
like min3, which will fold to min2 of the other operands. The other
cases fold to min, I'm not sure how this one was wrong.

arsenm · 2025-05-12T10:28:32Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-05-12T10:29:18Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

By the pseudocode in the ISA manual, if any input is a nan it acts
like min3, which will fold to min2 of the other operands. The other
cases fold to min, I'm not sure how this one was wrong.

Full diff: https://github.com/llvm/llvm-project/pull/139531.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+1-1)
(modified) llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll (+8-8)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 1494428cb2bf5..1ca300464a697 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
       V = IC.Builder.CreateMinNum(Src0, Src2);
     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
-      V = IC.Builder.CreateMaxNum(Src0, Src1);
+      V = IC.Builder.CreateMinNum(Src0, Src1);
     }
 
     if (V) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index bf94637b36a34..972862d8e327e 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_undef_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_undef_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
@@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
@@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 {
 define float @fmed3_0_1_nan_f32() #1 {
 ; CHECK-LABEL: define float @fmed3_0_1_nan_f32(
 ; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    ret float 1.000000e+00
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
   ret float %med
@@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 {
 define float @fmed3_0_1_undef_f32() #1 {
 ; CHECK-LABEL: define float @fmed3_0_1_undef_f32(
 ; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    ret float 1.000000e+00
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
   ret float %med
@@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
 define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
@@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 {
 define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret float 3.000000e+00
+; CHECK-NEXT:    ret float -2.000000e+00
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000)
   ret float %med3
@@ -447,7 +447,7 @@ define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y
 define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) {
 ; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
@@ -480,7 +480,7 @@ define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #
 define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 {
 ; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(
 ; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
 ; CHECK-NEXT:    ret float [[MED3]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)

arsenm · 2025-05-12T18:11:24Z

Merge activity

May 12, 2:11 PM EDT: A user started a stack merge that includes this pull request via Graphite.
May 12, 2:21 PM EDT: Graphite rebased this pull request as part of a merge.
May 12, 2:24 PM EDT: Graphite rebased this pull request as part of a merge.
May 12, 2:26 PM EDT: @arsenm merged this pull request with Graphite.

By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong.

arsenm mentioned this pull request May 12, 2025

AMDGPU: Add more tests for fmed3 instcombine folds #139529

Merged

arsenm mentioned this pull request May 12, 2025

AMDGPU: Disable most fmed3 folds for strictfp #139530

Merged

arsenm added the backend:AMDGPU label May 12, 2025 — with Graphite App

arsenm requested review from jayfoad, Pierre-vh and rampitec May 12, 2025 10:29

arsenm marked this pull request as ready for review May 12, 2025 10:29

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels May 12, 2025

arsenm mentioned this pull request May 12, 2025

AMDGPU: Use minimumnum/maximumnum for fmed3 with amdgpu-ieee=0 #139546

Merged

rampitec approved these changes May 12, 2025

View reviewed changes

arsenm force-pushed the users/arsenm/amdgpu/strictfp-disable-fmed3-folds branch from 012d451 to 50773b4 Compare May 12, 2025 18:18

Base automatically changed from users/arsenm/amdgpu/strictfp-disable-fmed3-folds to main May 12, 2025 18:21

arsenm force-pushed the users/arsenm/amdgpu/fix-nan-constant-src2-fmed3-fold-minnum-not-maxnum branch from 069254f to c50539d Compare May 12, 2025 18:21

AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold

1bce310

By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong.

arsenm force-pushed the users/arsenm/amdgpu/fix-nan-constant-src2-fmed3-fold-minnum-not-maxnum branch from c50539d to 1bce310 Compare May 12, 2025 18:23

arsenm merged commit 08dd040 into main May 12, 2025
6 of 9 checks passed

arsenm deleted the users/arsenm/amdgpu/fix-nan-constant-src2-fmed3-fold-minnum-not-maxnum branch May 12, 2025 18:26

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531

AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531

Uh oh!

arsenm commented May 12, 2025

Uh oh!

arsenm commented May 12, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 12, 2025 •

edited

Loading

Uh oh!

arsenm commented May 12, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531

AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold #139531

Uh oh!

Conversation

arsenm commented May 12, 2025

Uh oh!

arsenm commented May 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented May 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

arsenm commented May 12, 2025 •

edited

Loading

llvmbot commented May 12, 2025 •

edited

Loading

arsenm commented May 12, 2025 •

edited

Loading