Skip to content

Commit 6846661

Browse files
committed
[SDAG] Handle insert_subvector in isKnownNeverNaN
Propagate nnan across insert_subvector.
1 parent 7e13154 commit 6846661

File tree

5 files changed

+27
-80
lines changed

5 files changed

+27
-80
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5632,6 +5632,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
56325632
(SNaN && !C->getValueAPF().isSignaling());
56335633
}
56345634

5635+
if (Op.isUndef())
5636+
return true;
5637+
56355638
unsigned Opcode = Op.getOpcode();
56365639
switch (Opcode) {
56375640
case ISD::FADD:
@@ -5752,6 +5755,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
57525755
}
57535756
return isKnownNeverNaN(Src, SNaN, Depth + 1);
57545757
}
5758+
case ISD::INSERT_SUBVECTOR:
5759+
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
5760+
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
57555761
case ISD::BUILD_VECTOR: {
57565762
unsigned NumElts = Op.getNumOperands();
57575763
for (unsigned I = 0; I != NumElts; ++I)

llvm/test/CodeGen/AMDGPU/clamp.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
29862986
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
29872987
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
29882988
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
2989-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
29902989
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
29912990
; GFX6-NEXT: s_waitcnt vmcnt(0)
29922991
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2993-
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
29942992
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
2995-
; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
2996-
; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
2993+
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
2994+
; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
2995+
; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
2996+
; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
29972997
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
29982998
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
29992999
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
30063006
; GFX8: ; %bb.0:
30073007
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
30083008
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3009-
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
30103009
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
30113010
; GFX8-NEXT: v_mov_b32_e32 v1, s3
30123011
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
30133012
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
30143013
; GFX8-NEXT: flat_load_dword v3, v[0:1]
30153014
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3015+
; GFX8-NEXT: v_mov_b32_e32 v4, s0
30163016
; GFX8-NEXT: v_mov_b32_e32 v1, s1
30173017
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
30183018
; GFX8-NEXT: s_waitcnt vmcnt(0)
30193019
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
30203020
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
30213021
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3022-
; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
3022+
; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
30233023
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
30243024
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
30253025
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
37473747
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
37483748
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
37493749
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3750-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
37513750
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
37523751
; GFX6-NEXT: s_waitcnt vmcnt(0)
37533752
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
37543753
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
37553754
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3756-
; GFX6-NEXT: v_max_f32_e32 v3, 0x7fc00000, v3
3755+
; GFX6-NEXT: v_max_f32_e32 v3, s0, v3
3756+
; GFX6-NEXT: v_max_f32_e32 v2, 0, v2
37573757
; GFX6-NEXT: v_min_f32_e32 v3, 1.0, v3
37583758
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
3759-
; GFX6-NEXT: v_med3_f32 v2, v2, 0, v4
3759+
; GFX6-NEXT: v_min_f32_e32 v2, s0, v2
37603760
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
37613761
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
37623762
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
37793779
; GFX8-NEXT: s_waitcnt vmcnt(0)
37803780
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
37813781
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
3782-
; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00, v2
3782+
; GFX8-NEXT: v_max_f16_e32 v2, s0, v2
37833783
; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
3784-
; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00, v3
3784+
; GFX8-NEXT: v_min_f16_e32 v3, s0, v3
37853785
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
37863786
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
37873787
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
38453845
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
38463846
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
38473847
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3848-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
38493848
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
38503849
; GFX6-NEXT: s_waitcnt vmcnt(0)
38513850
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3852-
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
38533851
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
3854-
; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
3855-
; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
3852+
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3853+
; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
3854+
; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
3855+
; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
38563856
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
38573857
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
38583858
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
38653865
; GFX8: ; %bb.0:
38663866
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
38673867
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3868-
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
38693868
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
38703869
; GFX8-NEXT: v_mov_b32_e32 v1, s3
38713870
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
38723871
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38733872
; GFX8-NEXT: flat_load_dword v3, v[0:1]
38743873
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3874+
; GFX8-NEXT: v_mov_b32_e32 v4, s0
38753875
; GFX8-NEXT: v_mov_b32_e32 v1, s1
38763876
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38773877
; GFX8-NEXT: s_waitcnt vmcnt(0)
38783878
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38793879
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
38803880
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3881-
; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
3881+
; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
38823882
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
38833883
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38843884
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2

llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,13 +1057,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
10571057
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
10581058
; SDAG-GFX1100-TRUE16: ; %bb.0:
10591059
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1060-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1061-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
1062-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
1063-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
1064-
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
1065-
; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, 0
1066-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v5, v6 op_sel_hi:[1,1,1] clamp
10671060
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
10681061
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
10691062
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
@@ -1077,34 +1070,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
10771070
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
10781071
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
10791072
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1080-
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
1081-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1082-
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1083-
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1084-
; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
1085-
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
1086-
;
1087-
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1088-
; SDAG-GFX900: ; %bb.0:
1089-
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090-
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1091-
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1092-
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
1093-
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1094-
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1095-
; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
1096-
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1097-
;
1098-
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1099-
; SDAG-GFX906: ; %bb.0:
1100-
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101-
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1102-
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1103-
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
1104-
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1105-
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1106-
; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
1107-
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
11081073
;
11091074
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
11101075
; SDAG-VI: ; %bb.0:
@@ -1178,26 +1143,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
11781143
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v6
11791144
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
11801145
;
1181-
; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1182-
; GISEL-GFX900: ; %bb.0:
1183-
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184-
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1185-
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1186-
; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1187-
; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1188-
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
1189-
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1190-
;
1191-
; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1192-
; GISEL-GFX906: ; %bb.0:
1193-
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194-
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1195-
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1196-
; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1197-
; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1198-
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
1199-
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1200-
;
12011146
; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
12021147
; GISEL-VI: ; %bb.0:
12031148
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,10 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
367367
; ZVFH-NEXT: vfadd.vv v8, v8, v8
368368
; ZVFH-NEXT: vfadd.vv v9, v9, v9
369369
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370-
; ZVFH-NEXT: vslideup.vi v8, v9, 2
371-
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
372-
; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
373370
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
371+
; ZVFH-NEXT: vslideup.vi v8, v9, 2
374372
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
375-
; ZVFH-NEXT: vfmax.vv v8, v8, v9
373+
; ZVFH-NEXT: vfmax.vv v8, v8, v10
376374
; ZVFH-NEXT: ret
377375
;
378376
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,10 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
367367
; ZVFH-NEXT: vfadd.vv v8, v8, v8
368368
; ZVFH-NEXT: vfadd.vv v9, v9, v9
369369
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370-
; ZVFH-NEXT: vslideup.vi v8, v9, 2
371-
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
372-
; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
373370
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
371+
; ZVFH-NEXT: vslideup.vi v8, v9, 2
374372
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
375-
; ZVFH-NEXT: vfmin.vv v8, v8, v9
373+
; ZVFH-NEXT: vfmin.vv v8, v8, v10
376374
; ZVFH-NEXT: ret
377375
;
378376
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:

0 commit comments

Comments
 (0)