Skip to content

Commit 00cad3e

Browse files
authored
[SDAG] Handle extract_subvector in isKnownNeverNaN (#131581)
Propagate nnan across extract_subvector.
1 parent a5107be commit 00cad3e

File tree

4 files changed

+52
-85
lines changed

4 files changed

+52
-85
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5723,7 +5723,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
57235723
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
57245724
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
57255725
}
5726-
case ISD::EXTRACT_VECTOR_ELT: {
5726+
case ISD::EXTRACT_VECTOR_ELT:
5727+
case ISD::EXTRACT_SUBVECTOR: {
57275728
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
57285729
}
57295730
case ISD::BUILD_VECTOR: {

llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll

Lines changed: 36 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,55 +1287,55 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
12871287
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
12881288
; SDAG-GFX1100-TRUE16: ; %bb.0:
12891289
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1291-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1292-
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1293-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
1294-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v7.l
12951290
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l
12961291
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
1292+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
1293+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
1294+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.l
1295+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v11.l, v5.l
12971296
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
1298-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1]
1299-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v6, v7, v4 op_sel_hi:[1,1,1]
1297+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v6, v7, v8 op_sel_hi:[1,1,1] clamp
1298+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v9, v10, v11 op_sel_hi:[1,1,1] clamp
13001299
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1301-
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v2, v2 clamp
1302-
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 clamp
1300+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1301+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1302+
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1303+
; SDAG-GFX1100-TRUE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
13031304
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
13041305
;
13051306
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
13061307
; SDAG-GFX1100-FAKE16: ; %bb.0:
13071308
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1309-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
1309+
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1310+
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
13101311
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1311-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1312-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1313-
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1314-
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1315-
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
1312+
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1313+
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1314+
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1315+
; SDAG-GFX1100-FAKE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
13161316
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
13171317
;
1318-
; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1319-
; SDAG-GFX900: ; %bb.0:
1320-
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321-
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1322-
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
1323-
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1324-
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1325-
; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1326-
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v7, v7 clamp
1327-
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1318+
; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1319+
; GFX900: ; %bb.0:
1320+
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321+
; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1322+
; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1323+
; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1324+
; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1325+
; GFX900-NEXT: v_mov_b32_e32 v0, v6
1326+
; GFX900-NEXT: v_mov_b32_e32 v1, v2
1327+
; GFX900-NEXT: s_setpc_b64 s[30:31]
13281328
;
1329-
; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1330-
; SDAG-GFX906: ; %bb.0:
1331-
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332-
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1333-
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
1334-
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1335-
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1336-
; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1337-
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v7, v7 clamp
1338-
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1329+
; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1330+
; GFX906: ; %bb.0:
1331+
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332+
; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1333+
; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1334+
; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1335+
; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1336+
; GFX906-NEXT: v_mov_b32_e32 v0, v6
1337+
; GFX906-NEXT: v_mov_b32_e32 v1, v2
1338+
; GFX906-NEXT: s_setpc_b64 s[30:31]
13391339
;
13401340
; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
13411341
; SDAG-VI: ; %bb.0:
@@ -1426,28 +1426,6 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
14261426
; GISEL-GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
14271427
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
14281428
;
1429-
; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1430-
; GISEL-GFX900: ; %bb.0:
1431-
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432-
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1433-
; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1434-
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1435-
; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1436-
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
1437-
; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v2
1438-
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1439-
;
1440-
; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
1441-
; GISEL-GFX906: ; %bb.0:
1442-
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443-
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1444-
; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1445-
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
1446-
; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1447-
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
1448-
; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v2
1449-
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1450-
;
14511429
; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
14521430
; GISEL-VI: ; %bb.0:
14531431
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -290,17 +290,14 @@ define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
290290
ret <2 x half> %v
291291
}
292292

293-
; FIXME: The nnan from fadd isn't propagating.
294293
define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
295294
; ZVFH-LABEL: vfmax_v2f16_vv_nnana:
296295
; ZVFH: # %bb.0:
297-
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
296+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
298297
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
299-
; ZVFH-NEXT: vfadd.vv v8, v8, v8
300-
; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0
301-
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
302-
; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0
303-
; ZVFH-NEXT: vfmax.vv v8, v10, v8
298+
; ZVFH-NEXT: vmv1r.v v10, v9
299+
; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
300+
; ZVFH-NEXT: vfmax.vv v8, v10, v9
304301
; ZVFH-NEXT: ret
305302
;
306303
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana:
@@ -327,16 +324,13 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
327324
ret <2 x half> %v
328325
}
329326

330-
; FIXME: The nnan from fadd isn't propagating.
331327
define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
332328
; ZVFH-LABEL: vfmax_v2f16_vv_nnanb:
333329
; ZVFH: # %bb.0:
334-
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
330+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
335331
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
336-
; ZVFH-NEXT: vfadd.vv v9, v9, v9
337-
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
338-
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
339-
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
332+
; ZVFH-NEXT: vmv1r.v v10, v8
333+
; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
340334
; ZVFH-NEXT: vfmax.vv v8, v8, v10
341335
; ZVFH-NEXT: ret
342336
;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -290,17 +290,14 @@ define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
290290
ret <2 x half> %v
291291
}
292292

293-
; FIXME: The nnan from fadd isn't propagating.
294293
define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
295294
; ZVFH-LABEL: vfmin_v2f16_vv_nnana:
296295
; ZVFH: # %bb.0:
297-
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
296+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
298297
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
299-
; ZVFH-NEXT: vfadd.vv v8, v8, v8
300-
; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0
301-
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
302-
; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0
303-
; ZVFH-NEXT: vfmin.vv v8, v10, v8
298+
; ZVFH-NEXT: vmv1r.v v10, v9
299+
; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
300+
; ZVFH-NEXT: vfmin.vv v8, v10, v9
304301
; ZVFH-NEXT: ret
305302
;
306303
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana:
@@ -327,16 +324,13 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
327324
ret <2 x half> %v
328325
}
329326

330-
; FIXME: The nnan from fadd isn't propagating.
331327
define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
332328
; ZVFH-LABEL: vfmin_v2f16_vv_nnanb:
333329
; ZVFH: # %bb.0:
334-
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
330+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
335331
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
336-
; ZVFH-NEXT: vfadd.vv v9, v9, v9
337-
; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
338-
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
339-
; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
332+
; ZVFH-NEXT: vmv1r.v v10, v8
333+
; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
340334
; ZVFH-NEXT: vfmin.vv v8, v8, v10
341335
; ZVFH-NEXT: ret
342336
;

0 commit comments

Comments
 (0)