Skip to content

Commit 9a987f2

Browse files
committed
AMDGPU: Handle minimumnum/maximumnum in fneg combines
1 parent 07f36f2 commit 9a987f2

File tree

2 files changed

+31
-23
lines changed

2 files changed

+31
-23
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,8 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
661661
case ISD::FMAXNUM_IEEE:
662662
case ISD::FMINIMUM:
663663
case ISD::FMAXIMUM:
664+
case ISD::FMINIMUMNUM:
665+
case ISD::FMAXIMUMNUM:
664666
case ISD::SELECT:
665667
case ISD::FSIN:
666668
case ISD::FTRUNC:
@@ -4807,10 +4809,14 @@ static unsigned inverseMinMax(unsigned Opc) {
48074809
return ISD::FMINIMUM;
48084810
case ISD::FMINIMUM:
48094811
return ISD::FMAXIMUM;
4812+
case ISD::FMAXIMUMNUM:
4813+
return ISD::FMINIMUMNUM;
4814+
case ISD::FMINIMUMNUM:
4815+
return ISD::FMAXIMUMNUM;
48104816
case AMDGPUISD::FMAX_LEGACY:
48114817
return AMDGPUISD::FMIN_LEGACY;
48124818
case AMDGPUISD::FMIN_LEGACY:
4813-
return AMDGPUISD::FMAX_LEGACY;
4819+
return AMDGPUISD::FMAX_LEGACY;
48144820
default:
48154821
llvm_unreachable("invalid min/max opcode");
48164822
}
@@ -4932,6 +4938,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
49324938
case ISD::FMINNUM_IEEE:
49334939
case ISD::FMINIMUM:
49344940
case ISD::FMAXIMUM:
4941+
case ISD::FMINIMUMNUM:
4942+
case ISD::FMAXIMUMNUM:
49354943
case AMDGPUISD::FMAX_LEGACY:
49364944
case AMDGPUISD::FMIN_LEGACY: {
49374945
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)

llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2167,8 +2167,8 @@ define half @v_fneg_inv2pi_minimumnum_f16(half %a) #0 {
21672167
; SI-LABEL: v_fneg_inv2pi_minimumnum_f16:
21682168
; SI: ; %bb.0:
21692169
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2170-
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
2171-
; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
2170+
; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
2171+
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
21722172
; SI-NEXT: v_max_f32_e32 v0, 0xbe230000, v0
21732173
; SI-NEXT: s_setpc_b64 s[30:31]
21742174
;
@@ -2188,8 +2188,8 @@ define half @v_fneg_neg_inv2pi_minimumnum_f16(half %a) #0 {
21882188
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f16:
21892189
; SI: ; %bb.0:
21902190
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2191-
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
2192-
; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
2191+
; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
2192+
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
21932193
; SI-NEXT: v_max_f32_e32 v0, 0x3e230000, v0
21942194
; SI-NEXT: s_setpc_b64 s[30:31]
21952195
;
@@ -2208,10 +2208,10 @@ define double @v_fneg_inv2pi_minimumnum_f64(double %a) #0 {
22082208
; SI-LABEL: v_fneg_inv2pi_minimumnum_f64:
22092209
; SI: ; %bb.0:
22102210
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2211-
; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2211+
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
22122212
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
22132213
; SI-NEXT: s_mov_b32 s5, 0xbfc45f30
2214-
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
2214+
; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
22152215
; SI-NEXT: s_setpc_b64 s[30:31]
22162216
;
22172217
; VI-LABEL: v_fneg_inv2pi_minimumnum_f64:
@@ -2230,17 +2230,17 @@ define double @v_fneg_neg_inv2pi_minimumnum_f64(double %a) #0 {
22302230
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
22312231
; SI: ; %bb.0:
22322232
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2233-
; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2233+
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
22342234
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
22352235
; SI-NEXT: s_mov_b32 s5, 0x3fc45f30
2236-
; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
2236+
; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
22372237
; SI-NEXT: s_setpc_b64 s[30:31]
22382238
;
22392239
; VI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
22402240
; VI: ; %bb.0:
22412241
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2242-
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2243-
; VI-NEXT: v_max_f64 v[0:1], -v[0:1], 0.15915494309189532
2242+
; VI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
2243+
; VI-NEXT: v_max_f64 v[0:1], v[0:1], 0.15915494309189532
22442244
; VI-NEXT: s_setpc_b64 s[30:31]
22452245
%min = call double @llvm.minimumnum.f64(double 0xbfc45f306dc9c882, double %a)
22462246
%fneg = fneg double %min
@@ -2313,9 +2313,9 @@ define { float, float } @v_fneg_minimumnum_multi_use_minimumnum_f32_ieee(float %
23132313
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_ieee:
23142314
; GCN: ; %bb.0:
23152315
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316-
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
2317-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
2318-
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
2316+
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2317+
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2318+
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
23192319
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
23202320
; GCN-NEXT: s_setpc_b64 s[30:31]
23212321
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2330,9 +2330,9 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
23302330
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
23312331
; GCN: ; %bb.0:
23322332
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2333-
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
2334-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
2335-
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
2333+
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2334+
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2335+
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
23362336
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
23372337
; GCN-NEXT: s_setpc_b64 s[30:31]
23382338
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2513,9 +2513,9 @@ define { float, float } @v_fneg_maximumnum_multi_use_maximumnum_f32_ieee(float %
25132513
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_ieee:
25142514
; GCN: ; %bb.0:
25152515
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2516-
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
2517-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
2518-
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
2516+
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2517+
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2518+
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
25192519
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
25202520
; GCN-NEXT: s_setpc_b64 s[30:31]
25212521
%max = call float @llvm.maximumnum.f32(float %a, float %b)
@@ -2530,9 +2530,9 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
25302530
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
25312531
; GCN: ; %bb.0:
25322532
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2533-
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
2534-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
2535-
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
2533+
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2534+
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2535+
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
25362536
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
25372537
; GCN-NEXT: s_setpc_b64 s[30:31]
25382538
%max = call float @llvm.maximumnum.f32(float %a, float %b)

0 commit comments

Comments
 (0)