Skip to content

Commit eefc405

Browse files
committed
[SDag] Notify listeners when deleting a node
SelectionDAG::DeleteNode does not notify listeners about node deletion. As a result, SelectionDAG::Legalize may skip legalization of some nodes resulting in "Legalized selection DAG" containing illegal nodes. These nodes will be legalized during subsequent DAG combining phase, but this may be too late for some patterns to match.
1 parent d47c498 commit eefc405

11 files changed

+661
-653
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,9 @@ void SelectionDAG::RemoveDeadNode(SDNode *N){
10491049
}
10501050

10511051
void SelectionDAG::DeleteNode(SDNode *N) {
1052+
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
1053+
DUL->NodeDeleted(N, nullptr);
1054+
10521055
// First take this out of the appropriate CSE map.
10531056
RemoveNodeFromCSEMaps(N);
10541057

llvm/test/CodeGen/AMDGPU/half.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,19 +1969,19 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
19691969
; VI-NEXT: s_addc_u32 s3, s1, 0
19701970
; VI-NEXT: v_mov_b32_e32 v15, s3
19711971
; VI-NEXT: v_mov_b32_e32 v14, s2
1972-
; VI-NEXT: s_add_u32 s2, s0, 0x50
1972+
; VI-NEXT: s_add_u32 s2, s0, 0x70
19731973
; VI-NEXT: s_addc_u32 s3, s1, 0
19741974
; VI-NEXT: v_mov_b32_e32 v17, s3
19751975
; VI-NEXT: v_mov_b32_e32 v16, s2
1976-
; VI-NEXT: s_add_u32 s2, s0, 64
1976+
; VI-NEXT: s_add_u32 s2, s0, 0x60
19771977
; VI-NEXT: s_addc_u32 s3, s1, 0
19781978
; VI-NEXT: v_mov_b32_e32 v19, s3
19791979
; VI-NEXT: v_mov_b32_e32 v11, s1
19801980
; VI-NEXT: v_mov_b32_e32 v18, s2
1981-
; VI-NEXT: s_add_u32 s2, s0, 0x70
1981+
; VI-NEXT: s_add_u32 s2, s0, 0x50
19821982
; VI-NEXT: v_mov_b32_e32 v10, s0
19831983
; VI-NEXT: s_addc_u32 s3, s1, 0
1984-
; VI-NEXT: s_add_u32 s0, s0, 0x60
1984+
; VI-NEXT: s_add_u32 s0, s0, 64
19851985
; VI-NEXT: s_addc_u32 s1, s1, 0
19861986
; VI-NEXT: s_waitcnt vmcnt(1)
19871987
; VI-NEXT: v_cvt_f32_f16_e32 v22, v4
@@ -1995,15 +1995,15 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
19951995
; VI-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
19961996
; VI-NEXT: v_cvt_f64_f32_e32 v[6:7], v7
19971997
; VI-NEXT: s_waitcnt vmcnt(0)
1998-
; VI-NEXT: v_cvt_f32_f16_e32 v26, v2
1999-
; VI-NEXT: v_cvt_f32_f16_sdwa v27, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2000-
; VI-NEXT: v_cvt_f32_f16_sdwa v28, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1998+
; VI-NEXT: v_cvt_f32_f16_e32 v26, v0
1999+
; VI-NEXT: v_cvt_f32_f16_sdwa v27, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2000+
; VI-NEXT: v_cvt_f32_f16_sdwa v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
20012001
; VI-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
2002-
; VI-NEXT: v_cvt_f32_f16_e32 v8, v3
2003-
; VI-NEXT: v_cvt_f32_f16_e32 v29, v0
2004-
; VI-NEXT: v_cvt_f32_f16_sdwa v30, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2005-
; VI-NEXT: v_cvt_f32_f16_e32 v31, v1
2006-
; VI-NEXT: v_cvt_f32_f16_sdwa v32, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2002+
; VI-NEXT: v_cvt_f32_f16_e32 v8, v1
2003+
; VI-NEXT: v_cvt_f32_f16_e32 v29, v2
2004+
; VI-NEXT: v_cvt_f32_f16_sdwa v30, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2005+
; VI-NEXT: v_cvt_f32_f16_e32 v31, v3
2006+
; VI-NEXT: v_cvt_f32_f16_sdwa v32, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
20072007
; VI-NEXT: v_cvt_f64_f32_e32 v[0:1], v20
20082008
; VI-NEXT: v_cvt_f64_f32_e32 v[2:3], v21
20092009
; VI-NEXT: v_cvt_f64_f32_e32 v[4:5], v22
@@ -2040,40 +2040,40 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
20402040
; GFX11-NEXT: global_load_b128 v[0:3], v32, s[2:3]
20412041
; GFX11-NEXT: global_load_b128 v[4:7], v32, s[2:3] offset:16
20422042
; GFX11-NEXT: s_waitcnt vmcnt(1)
2043-
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v1
2044-
; GFX11-NEXT: s_waitcnt vmcnt(0)
2045-
; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v5
20462043
; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v1
2047-
; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v4
2048-
; GFX11-NEXT: v_cvt_f32_f16_e32 v15, v7
2044+
; GFX11-NEXT: s_waitcnt vmcnt(0)
2045+
; GFX11-NEXT: v_cvt_f32_f16_e32 v23, v7
20492046
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v7
2050-
; GFX11-NEXT: v_cvt_f32_f16_e32 v14, v6
2047+
; GFX11-NEXT: v_cvt_f32_f16_e32 v22, v6
20512048
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v6
2049+
; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v5
2050+
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v1
2051+
; GFX11-NEXT: v_lshrrev_b32_e32 v15, 16, v4
20522052
; GFX11-NEXT: v_cvt_f32_f16_e32 v13, v3
20532053
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v3
20542054
; GFX11-NEXT: v_cvt_f32_f16_e32 v12, v2
20552055
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2056-
; GFX11-NEXT: v_cvt_f32_f16_e32 v18, v4
2057-
; GFX11-NEXT: v_cvt_f32_f16_e32 v22, v5
2058-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v10
2059-
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v23
2060-
; GFX11-NEXT: v_cvt_f32_f16_e32 v34, v11
2061-
; GFX11-NEXT: v_cvt_f32_f16_e32 v11, v19
2062-
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v0
20632056
; GFX11-NEXT: v_cvt_f32_f16_e32 v7, v7
20642057
; GFX11-NEXT: v_cvt_f32_f16_e32 v6, v6
2058+
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v0
2059+
; GFX11-NEXT: v_cvt_f32_f16_e32 v18, v5
2060+
; GFX11-NEXT: v_cvt_f32_f16_e32 v34, v11
2061+
; GFX11-NEXT: v_cvt_f32_f16_e32 v11, v19
2062+
; GFX11-NEXT: v_cvt_f32_f16_e32 v14, v4
2063+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v10
2064+
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v15
20652065
; GFX11-NEXT: v_cvt_f32_f16_e32 v8, v0
20662066
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
20672067
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
2068-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[28:29], v22
2069-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[30:31], v10
2070-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[24:25], v18
2071-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[26:27], v11
2068+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[28:29], v23
2069+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[30:31], v7
2070+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[24:25], v22
2071+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[26:27], v6
20722072
; GFX11-NEXT: v_cvt_f32_f16_e32 v33, v9
2073-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[20:21], v15
2074-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[22:23], v7
2073+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[20:21], v18
2074+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[22:23], v11
20752075
; GFX11-NEXT: v_cvt_f64_f32_e32 v[16:17], v14
2076-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[18:19], v6
2076+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[18:19], v10
20772077
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v8
20782078
; GFX11-NEXT: v_cvt_f64_f32_e32 v[8:9], v12
20792079
; GFX11-NEXT: v_cvt_f64_f32_e32 v[12:13], v13
@@ -2082,10 +2082,10 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
20822082
; GFX11-NEXT: v_cvt_f64_f32_e32 v[6:7], v34
20832083
; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v33
20842084
; GFX11-NEXT: s_clause 0x7
2085-
; GFX11-NEXT: global_store_b128 v32, v[28:31], s[0:1] offset:80
2086-
; GFX11-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:64
2087-
; GFX11-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:112
2088-
; GFX11-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:96
2085+
; GFX11-NEXT: global_store_b128 v32, v[28:31], s[0:1] offset:112
2086+
; GFX11-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:96
2087+
; GFX11-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:80
2088+
; GFX11-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:64
20892089
; GFX11-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:48
20902090
; GFX11-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:32
20912091
; GFX11-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:16

llvm/test/CodeGen/AMDGPU/load-constant-i1.ll

Lines changed: 50 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5719,33 +5719,33 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57195719
; GFX6-NEXT: s_mov_b32 s0, s4
57205720
; GFX6-NEXT: s_mov_b32 s1, s5
57215721
; GFX6-NEXT: s_waitcnt vmcnt(0)
5722-
; GFX6-NEXT: v_bfe_u32 v2, v29, 11, 1
5723-
; GFX6-NEXT: v_bfe_u32 v0, v29, 10, 1
5724-
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5725-
; GFX6-NEXT: v_bfe_u32 v5, v29, 9, 1
5722+
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 15, v29
5723+
; GFX6-NEXT: v_bfe_u32 v0, v29, 14, 1
5724+
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5725+
; GFX6-NEXT: v_bfe_u32 v5, v29, 13, 1
57265726
; GFX6-NEXT: s_waitcnt expcnt(0)
5727-
; GFX6-NEXT: v_bfe_u32 v3, v29, 8, 1
5728-
; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
5729-
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 15, v29
5727+
; GFX6-NEXT: v_bfe_u32 v3, v29, 12, 1
5728+
; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:96
5729+
; GFX6-NEXT: v_bfe_u32 v8, v29, 11, 1
57305730
; GFX6-NEXT: s_waitcnt expcnt(0)
5731-
; GFX6-NEXT: v_bfe_u32 v6, v29, 14, 1
5732-
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
5733-
; GFX6-NEXT: v_bfe_u32 v27, v29, 5, 1
5734-
; GFX6-NEXT: v_bfe_u32 v23, v29, 7, 1
5735-
; GFX6-NEXT: v_bfe_u32 v19, v29, 1, 1
5736-
; GFX6-NEXT: v_bfe_u32 v15, v29, 3, 1
5737-
; GFX6-NEXT: v_bfe_u32 v11, v29, 13, 1
5738-
; GFX6-NEXT: v_bfe_u32 v25, v29, 4, 1
5739-
; GFX6-NEXT: v_bfe_u32 v21, v29, 6, 1
5740-
; GFX6-NEXT: v_and_b32_e32 v17, 1, v29
5741-
; GFX6-NEXT: v_bfe_u32 v13, v29, 2, 1
5731+
; GFX6-NEXT: v_bfe_u32 v6, v29, 10, 1
5732+
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
5733+
; GFX6-NEXT: v_bfe_u32 v27, v29, 1, 1
5734+
; GFX6-NEXT: v_bfe_u32 v23, v29, 3, 1
5735+
; GFX6-NEXT: v_bfe_u32 v19, v29, 5, 1
5736+
; GFX6-NEXT: v_bfe_u32 v15, v29, 7, 1
5737+
; GFX6-NEXT: v_bfe_u32 v11, v29, 9, 1
5738+
; GFX6-NEXT: v_and_b32_e32 v25, 1, v29
5739+
; GFX6-NEXT: v_bfe_u32 v21, v29, 2, 1
5740+
; GFX6-NEXT: v_bfe_u32 v17, v29, 4, 1
5741+
; GFX6-NEXT: v_bfe_u32 v13, v29, 6, 1
57425742
; GFX6-NEXT: s_waitcnt expcnt(0)
5743-
; GFX6-NEXT: v_bfe_u32 v9, v29, 12, 1
5744-
; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
5745-
; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
5746-
; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0
5747-
; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
5748-
; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
5743+
; GFX6-NEXT: v_bfe_u32 v9, v29, 8, 1
5744+
; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
5745+
; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:48
5746+
; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:32
5747+
; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:16
5748+
; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0
57495749
; GFX6-NEXT: s_endpgm
57505750
;
57515751
; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -5761,7 +5761,7 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57615761
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
57625762
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
57635763
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5764-
; GFX8-NEXT: s_add_u32 s4, s0, 0x50
5764+
; GFX8-NEXT: s_add_u32 s4, s0, 0x60
57655765
; GFX8-NEXT: s_addc_u32 s5, s1, 0
57665766
; GFX8-NEXT: v_mov_b32_e32 v23, s5
57675767
; GFX8-NEXT: v_mov_b32_e32 v22, s4
@@ -5775,9 +5775,9 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57755775
; GFX8-NEXT: v_mov_b32_e32 v21, v2
57765776
; GFX8-NEXT: v_mov_b32_e32 v25, v2
57775777
; GFX8-NEXT: s_waitcnt vmcnt(0)
5778-
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 10, v0
5778+
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 12, v0
57795779
; GFX8-NEXT: v_and_b32_e32 v5, 1, v1
5780-
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v0
5780+
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 13, v0
57815781
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
57825782
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff, v1
57835783
; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[5:8]
@@ -5786,31 +5786,31 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57865786
; GFX8-NEXT: v_mov_b32_e32 v22, s2
57875787
; GFX8-NEXT: v_lshrrev_b16_e32 v3, 15, v0
57885788
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
5789-
; GFX8-NEXT: s_add_u32 s2, s0, 64
5789+
; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[1:4]
57905790
; GFX8-NEXT: v_mov_b32_e32 v5, v2
5791+
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v0
57915792
; GFX8-NEXT: v_mov_b32_e32 v7, v2
5792-
; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[1:4]
57935793
; GFX8-NEXT: v_mov_b32_e32 v23, v2
5794-
; GFX8-NEXT: v_mov_b32_e32 v3, 1
5794+
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
5795+
; GFX8-NEXT: v_lshrrev_b16_e32 v2, 10, v0
5796+
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
57955797
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5798+
; GFX8-NEXT: v_and_b32_e32 v8, 1, v2
5799+
; GFX8-NEXT: v_and_b32_e32 v10, 0xffff, v1
57965800
; GFX8-NEXT: v_mov_b32_e32 v1, s2
57975801
; GFX8-NEXT: v_mov_b32_e32 v2, s3
5798-
; GFX8-NEXT: v_and_b32_sdwa v8, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
5799-
; GFX8-NEXT: v_lshrrev_b16_e32 v3, 9, v0
5800-
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
5801-
; GFX8-NEXT: v_and_b32_e32 v3, 1, v3
5802+
; GFX8-NEXT: s_add_u32 s2, s0, 64
58025803
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5803-
; GFX8-NEXT: v_and_b32_e32 v10, 0xffff, v3
58045804
; GFX8-NEXT: v_mov_b32_e32 v4, s3
58055805
; GFX8-NEXT: v_mov_b32_e32 v3, s2
58065806
; GFX8-NEXT: s_add_u32 s2, s0, 48
5807+
; GFX8-NEXT: v_mov_b32_e32 v6, 1
58075808
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5808-
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 12, v0
58095809
; GFX8-NEXT: flat_store_dwordx4 v[1:2], v[8:11]
58105810
; GFX8-NEXT: v_mov_b32_e32 v2, s1
58115811
; GFX8-NEXT: v_mov_b32_e32 v9, s3
5812-
; GFX8-NEXT: v_and_b32_e32 v11, 1, v6
5813-
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 13, v0
5812+
; GFX8-NEXT: v_and_b32_sdwa v11, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
5813+
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 9, v0
58145814
; GFX8-NEXT: v_mov_b32_e32 v8, s2
58155815
; GFX8-NEXT: s_add_u32 s2, s0, 32
58165816
; GFX8-NEXT: v_and_b32_e32 v6, 1, v6
@@ -5940,35 +5940,33 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
59405940
; GFX12-NEXT: global_load_u16 v0, v1, s[2:3]
59415941
; GFX12-NEXT: s_wait_loadcnt 0x0
59425942
; GFX12-NEXT: v_and_b32_e32 v28, 1, v0
5943-
; GFX12-NEXT: v_lshrrev_b16 v4, 11, v0
5944-
; GFX12-NEXT: v_lshrrev_b16 v8, 9, v0
5945-
; GFX12-NEXT: v_lshrrev_b16 v12, 13, v0
5943+
; GFX12-NEXT: v_lshrrev_b16 v4, 13, v0
5944+
; GFX12-NEXT: v_lshrrev_b16 v8, 11, v0
5945+
; GFX12-NEXT: v_lshrrev_b16 v12, 9, v0
59465946
; GFX12-NEXT: v_lshrrev_b16 v16, 7, v0
59475947
; GFX12-NEXT: v_lshrrev_b16 v2, 15, v0
59485948
; GFX12-NEXT: v_lshrrev_b16 v6, 14, v0
5949-
; GFX12-NEXT: v_lshrrev_b16 v10, 10, v0
59505949
; GFX12-NEXT: v_lshrrev_b16 v20, 5, v0
59515950
; GFX12-NEXT: v_lshrrev_b16 v24, 3, v0
59525951
; GFX12-NEXT: v_lshrrev_b16 v32, 1, v0
5952+
; GFX12-NEXT: v_lshrrev_b16 v10, 12, v0
59535953
; GFX12-NEXT: v_and_b32_e32 v33, 1, v4
59545954
; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v34, 1, v8
5955-
; GFX12-NEXT: v_lshrrev_b16 v14, 8, v0
5956-
; GFX12-NEXT: v_lshrrev_b16 v18, 12, v0
5955+
; GFX12-NEXT: v_lshrrev_b16 v14, 10, v0
5956+
; GFX12-NEXT: v_lshrrev_b16 v18, 8, v0
59575957
; GFX12-NEXT: v_and_b32_e32 v35, 1, v12
59585958
; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_and_b32 v36, 1, v16
59595959
; GFX12-NEXT: v_lshrrev_b16 v22, 6, v0
59605960
; GFX12-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v38, 1, v24
59615961
; GFX12-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_and_b32 v32, 1, v32
5962-
; GFX12-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_and_b32 v4, 1, v10
59635962
; GFX12-NEXT: v_mov_b32_e32 v23, v1
59645963
; GFX12-NEXT: v_dual_mov_b32 v25, v1 :: v_dual_and_b32 v2, 0xffff, v2
5965-
; GFX12-NEXT: v_mov_b32_e32 v31, v1
59665964
; GFX12-NEXT: v_lshrrev_b16 v26, 4, v0
59675965
; GFX12-NEXT: v_lshrrev_b16 v30, 2, v0
59685966
; GFX12-NEXT: v_and_b32_e32 v37, 1, v20
5969-
; GFX12-NEXT: v_and_b32_e32 v0, 1, v6
5967+
; GFX12-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_and_b32 v0, 1, v6
5968+
; GFX12-NEXT: v_and_b32_e32 v4, 1, v10
59705969
; GFX12-NEXT: v_dual_mov_b32 v11, v1 :: v_dual_and_b32 v6, 0xffff, v33
5971-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
59725970
; GFX12-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_and_b32 v24, 1, v30
59735971
; GFX12-NEXT: v_and_b32_e32 v8, 1, v14
59745972
; GFX12-NEXT: v_dual_mov_b32 v13, v1 :: v_dual_and_b32 v10, 0xffff, v34
@@ -5978,13 +5976,13 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
59785976
; GFX12-NEXT: v_and_b32_e32 v14, 0xffff, v35
59795977
; GFX12-NEXT: v_dual_mov_b32 v27, v1 :: v_dual_and_b32 v18, 0xffff, v36
59805978
; GFX12-NEXT: v_dual_mov_b32 v29, v1 :: v_dual_and_b32 v30, 0xffff, v32
5981-
; GFX12-NEXT: v_and_b32_e32 v26, 0xffff, v38
5979+
; GFX12-NEXT: v_dual_mov_b32 v31, v1 :: v_dual_and_b32 v26, 0xffff, v38
59825980
; GFX12-NEXT: v_and_b32_e32 v22, 0xffff, v37
59835981
; GFX12-NEXT: s_clause 0x7
5984-
; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:80
5985-
; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:64
59865982
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
5987-
; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1] offset:96
5983+
; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:96
5984+
; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:80
5985+
; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1] offset:64
59885986
; GFX12-NEXT: global_store_b128 v1, v[16:19], s[0:1] offset:48
59895987
; GFX12-NEXT: global_store_b128 v1, v[20:23], s[0:1] offset:32
59905988
; GFX12-NEXT: global_store_b128 v1, v[24:27], s[0:1] offset:16

0 commit comments

Comments
 (0)