Skip to content

AMDGPU: Handle minimumnum/maximumnum in fneg combines #139133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,8 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
case ISD::FMINIMUMNUM:
case ISD::FMAXIMUMNUM:
case ISD::SELECT:
case ISD::FSIN:
case ISD::FTRUNC:
Expand Down Expand Up @@ -4807,10 +4809,14 @@ static unsigned inverseMinMax(unsigned Opc) {
return ISD::FMINIMUM;
case ISD::FMINIMUM:
return ISD::FMAXIMUM;
case ISD::FMAXIMUMNUM:
return ISD::FMINIMUMNUM;
case ISD::FMINIMUMNUM:
return ISD::FMAXIMUMNUM;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
return AMDGPUISD::FMAX_LEGACY;
return AMDGPUISD::FMAX_LEGACY;
default:
llvm_unreachable("invalid min/max opcode");
}
Expand Down Expand Up @@ -4932,6 +4938,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FMINNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
case ISD::FMINIMUMNUM:
case ISD::FMAXIMUMNUM:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2167,8 +2167,8 @@ define half @v_fneg_inv2pi_minimumnum_f16(half %a) #0 {
; SI-LABEL: v_fneg_inv2pi_minimumnum_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_max_f32_e32 v0, 0xbe230000, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -2188,8 +2188,8 @@ define half @v_fneg_neg_inv2pi_minimumnum_f16(half %a) #0 {
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_max_f32_e32 v0, 0x3e230000, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -2208,10 +2208,10 @@ define double @v_fneg_inv2pi_minimumnum_f64(double %a) #0 {
; SI-LABEL: v_fneg_inv2pi_minimumnum_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
; SI-NEXT: s_mov_b32 s5, 0xbfc45f30
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fneg_inv2pi_minimumnum_f64:
Expand All @@ -2230,17 +2230,17 @@ define double @v_fneg_neg_inv2pi_minimumnum_f64(double %a) #0 {
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
; SI-NEXT: s_mov_b32 s5, 0x3fc45f30
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; VI-NEXT: v_max_f64 v[0:1], -v[0:1], 0.15915494309189532
; VI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; VI-NEXT: v_max_f64 v[0:1], v[0:1], 0.15915494309189532
; VI-NEXT: s_setpc_b64 s[30:31]
%min = call double @llvm.minimumnum.f64(double 0xbfc45f306dc9c882, double %a)
%fneg = fneg double %min
Expand Down Expand Up @@ -2313,9 +2313,9 @@ define { float, float } @v_fneg_minimumnum_multi_use_minimumnum_f32_ieee(float %
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
Expand All @@ -2330,9 +2330,9 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
Expand Down Expand Up @@ -2513,9 +2513,9 @@ define { float, float } @v_fneg_maximumnum_multi_use_maximumnum_f32_ieee(float %
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an encoding size improvement

; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
Expand All @@ -2530,9 +2530,9 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
Expand Down
Loading