Skip to content

Commit 29fbcd0

Browse files
committed
[SDag] Notify listeners when deleting a node
Branched off llvm#66991
1 parent 66e1d2c commit 29fbcd0

11 files changed

+653
-649
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,9 @@ void SelectionDAG::RemoveDeadNode(SDNode *N){
10491049
}
10501050

10511051
void SelectionDAG::DeleteNode(SDNode *N) {
1052+
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
1053+
DUL->NodeDeleted(N, nullptr);
1054+
10521055
// First take this out of the appropriate CSE map.
10531056
RemoveNodeFromCSEMaps(N);
10541057

llvm/test/CodeGen/AMDGPU/half.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,19 +1969,19 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
19691969
; VI-NEXT: s_addc_u32 s3, s1, 0
19701970
; VI-NEXT: v_mov_b32_e32 v15, s3
19711971
; VI-NEXT: v_mov_b32_e32 v14, s2
1972-
; VI-NEXT: s_add_u32 s2, s0, 0x50
1972+
; VI-NEXT: s_add_u32 s2, s0, 0x70
19731973
; VI-NEXT: s_addc_u32 s3, s1, 0
19741974
; VI-NEXT: v_mov_b32_e32 v17, s3
19751975
; VI-NEXT: v_mov_b32_e32 v16, s2
1976-
; VI-NEXT: s_add_u32 s2, s0, 64
1976+
; VI-NEXT: s_add_u32 s2, s0, 0x60
19771977
; VI-NEXT: s_addc_u32 s3, s1, 0
19781978
; VI-NEXT: v_mov_b32_e32 v19, s3
19791979
; VI-NEXT: v_mov_b32_e32 v11, s1
19801980
; VI-NEXT: v_mov_b32_e32 v18, s2
1981-
; VI-NEXT: s_add_u32 s2, s0, 0x70
1981+
; VI-NEXT: s_add_u32 s2, s0, 0x50
19821982
; VI-NEXT: v_mov_b32_e32 v10, s0
19831983
; VI-NEXT: s_addc_u32 s3, s1, 0
1984-
; VI-NEXT: s_add_u32 s0, s0, 0x60
1984+
; VI-NEXT: s_add_u32 s0, s0, 64
19851985
; VI-NEXT: s_addc_u32 s1, s1, 0
19861986
; VI-NEXT: s_waitcnt vmcnt(1)
19871987
; VI-NEXT: v_cvt_f32_f16_e32 v22, v4
@@ -1995,15 +1995,15 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
19951995
; VI-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
19961996
; VI-NEXT: v_cvt_f64_f32_e32 v[6:7], v7
19971997
; VI-NEXT: s_waitcnt vmcnt(0)
1998-
; VI-NEXT: v_cvt_f32_f16_e32 v26, v2
1999-
; VI-NEXT: v_cvt_f32_f16_sdwa v27, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2000-
; VI-NEXT: v_cvt_f32_f16_sdwa v28, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1998+
; VI-NEXT: v_cvt_f32_f16_e32 v26, v0
1999+
; VI-NEXT: v_cvt_f32_f16_sdwa v27, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2000+
; VI-NEXT: v_cvt_f32_f16_sdwa v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
20012001
; VI-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
2002-
; VI-NEXT: v_cvt_f32_f16_e32 v8, v3
2003-
; VI-NEXT: v_cvt_f32_f16_e32 v29, v0
2004-
; VI-NEXT: v_cvt_f32_f16_sdwa v30, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2005-
; VI-NEXT: v_cvt_f32_f16_e32 v31, v1
2006-
; VI-NEXT: v_cvt_f32_f16_sdwa v32, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2002+
; VI-NEXT: v_cvt_f32_f16_e32 v8, v1
2003+
; VI-NEXT: v_cvt_f32_f16_e32 v29, v2
2004+
; VI-NEXT: v_cvt_f32_f16_sdwa v30, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2005+
; VI-NEXT: v_cvt_f32_f16_e32 v31, v3
2006+
; VI-NEXT: v_cvt_f32_f16_sdwa v32, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
20072007
; VI-NEXT: v_cvt_f64_f32_e32 v[0:1], v20
20082008
; VI-NEXT: v_cvt_f64_f32_e32 v[2:3], v21
20092009
; VI-NEXT: v_cvt_f64_f32_e32 v[4:5], v22
@@ -2040,40 +2040,40 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
20402040
; GFX11-NEXT: global_load_b128 v[0:3], v32, s[2:3]
20412041
; GFX11-NEXT: global_load_b128 v[4:7], v32, s[2:3] offset:16
20422042
; GFX11-NEXT: s_waitcnt vmcnt(1)
2043-
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v1
2044-
; GFX11-NEXT: s_waitcnt vmcnt(0)
2045-
; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v5
20462043
; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v1
2047-
; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v4
2048-
; GFX11-NEXT: v_cvt_f32_f16_e32 v15, v7
2044+
; GFX11-NEXT: s_waitcnt vmcnt(0)
2045+
; GFX11-NEXT: v_cvt_f32_f16_e32 v23, v7
20492046
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v7
2050-
; GFX11-NEXT: v_cvt_f32_f16_e32 v14, v6
2047+
; GFX11-NEXT: v_cvt_f32_f16_e32 v22, v6
20512048
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v6
2049+
; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v5
2050+
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v1
2051+
; GFX11-NEXT: v_lshrrev_b32_e32 v15, 16, v4
20522052
; GFX11-NEXT: v_cvt_f32_f16_e32 v13, v3
20532053
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v3
20542054
; GFX11-NEXT: v_cvt_f32_f16_e32 v12, v2
20552055
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2056-
; GFX11-NEXT: v_cvt_f32_f16_e32 v18, v4
2057-
; GFX11-NEXT: v_cvt_f32_f16_e32 v22, v5
2058-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v10
2059-
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v23
2060-
; GFX11-NEXT: v_cvt_f32_f16_e32 v34, v11
2061-
; GFX11-NEXT: v_cvt_f32_f16_e32 v11, v19
2062-
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v0
20632056
; GFX11-NEXT: v_cvt_f32_f16_e32 v7, v7
20642057
; GFX11-NEXT: v_cvt_f32_f16_e32 v6, v6
2058+
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 16, v0
2059+
; GFX11-NEXT: v_cvt_f32_f16_e32 v18, v5
2060+
; GFX11-NEXT: v_cvt_f32_f16_e32 v34, v11
2061+
; GFX11-NEXT: v_cvt_f32_f16_e32 v11, v19
2062+
; GFX11-NEXT: v_cvt_f32_f16_e32 v14, v4
2063+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v10
2064+
; GFX11-NEXT: v_cvt_f32_f16_e32 v10, v15
20652065
; GFX11-NEXT: v_cvt_f32_f16_e32 v8, v0
20662066
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
20672067
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
2068-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[28:29], v22
2069-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[30:31], v10
2070-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[24:25], v18
2071-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[26:27], v11
2068+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[28:29], v23
2069+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[30:31], v7
2070+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[24:25], v22
2071+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[26:27], v6
20722072
; GFX11-NEXT: v_cvt_f32_f16_e32 v33, v9
2073-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[20:21], v15
2074-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[22:23], v7
2073+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[20:21], v18
2074+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[22:23], v11
20752075
; GFX11-NEXT: v_cvt_f64_f32_e32 v[16:17], v14
2076-
; GFX11-NEXT: v_cvt_f64_f32_e32 v[18:19], v6
2076+
; GFX11-NEXT: v_cvt_f64_f32_e32 v[18:19], v10
20772077
; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v8
20782078
; GFX11-NEXT: v_cvt_f64_f32_e32 v[8:9], v12
20792079
; GFX11-NEXT: v_cvt_f64_f32_e32 v[12:13], v13
@@ -2082,10 +2082,10 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
20822082
; GFX11-NEXT: v_cvt_f64_f32_e32 v[6:7], v34
20832083
; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v33
20842084
; GFX11-NEXT: s_clause 0x7
2085-
; GFX11-NEXT: global_store_b128 v32, v[28:31], s[0:1] offset:80
2086-
; GFX11-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:64
2087-
; GFX11-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:112
2088-
; GFX11-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:96
2085+
; GFX11-NEXT: global_store_b128 v32, v[28:31], s[0:1] offset:112
2086+
; GFX11-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:96
2087+
; GFX11-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:80
2088+
; GFX11-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:64
20892089
; GFX11-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:48
20902090
; GFX11-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:32
20912091
; GFX11-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:16

llvm/test/CodeGen/AMDGPU/load-constant-i1.ll

Lines changed: 50 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5719,33 +5719,33 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57195719
; GFX6-NEXT: s_mov_b32 s0, s4
57205720
; GFX6-NEXT: s_mov_b32 s1, s5
57215721
; GFX6-NEXT: s_waitcnt vmcnt(0)
5722-
; GFX6-NEXT: v_bfe_u32 v2, v29, 11, 1
5723-
; GFX6-NEXT: v_bfe_u32 v0, v29, 10, 1
5724-
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5725-
; GFX6-NEXT: v_bfe_u32 v5, v29, 9, 1
5722+
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 15, v29
5723+
; GFX6-NEXT: v_bfe_u32 v0, v29, 14, 1
5724+
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5725+
; GFX6-NEXT: v_bfe_u32 v5, v29, 13, 1
57265726
; GFX6-NEXT: s_waitcnt expcnt(0)
5727-
; GFX6-NEXT: v_bfe_u32 v3, v29, 8, 1
5728-
; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
5729-
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 15, v29
5727+
; GFX6-NEXT: v_bfe_u32 v3, v29, 12, 1
5728+
; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:96
5729+
; GFX6-NEXT: v_bfe_u32 v8, v29, 11, 1
57305730
; GFX6-NEXT: s_waitcnt expcnt(0)
5731-
; GFX6-NEXT: v_bfe_u32 v6, v29, 14, 1
5732-
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
5733-
; GFX6-NEXT: v_bfe_u32 v27, v29, 5, 1
5734-
; GFX6-NEXT: v_bfe_u32 v23, v29, 7, 1
5735-
; GFX6-NEXT: v_bfe_u32 v19, v29, 1, 1
5736-
; GFX6-NEXT: v_bfe_u32 v15, v29, 3, 1
5737-
; GFX6-NEXT: v_bfe_u32 v11, v29, 13, 1
5738-
; GFX6-NEXT: v_bfe_u32 v25, v29, 4, 1
5739-
; GFX6-NEXT: v_bfe_u32 v21, v29, 6, 1
5740-
; GFX6-NEXT: v_and_b32_e32 v17, 1, v29
5741-
; GFX6-NEXT: v_bfe_u32 v13, v29, 2, 1
5731+
; GFX6-NEXT: v_bfe_u32 v6, v29, 10, 1
5732+
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
5733+
; GFX6-NEXT: v_bfe_u32 v27, v29, 1, 1
5734+
; GFX6-NEXT: v_bfe_u32 v23, v29, 3, 1
5735+
; GFX6-NEXT: v_bfe_u32 v19, v29, 5, 1
5736+
; GFX6-NEXT: v_bfe_u32 v15, v29, 7, 1
5737+
; GFX6-NEXT: v_bfe_u32 v11, v29, 9, 1
5738+
; GFX6-NEXT: v_and_b32_e32 v25, 1, v29
5739+
; GFX6-NEXT: v_bfe_u32 v21, v29, 2, 1
5740+
; GFX6-NEXT: v_bfe_u32 v17, v29, 4, 1
5741+
; GFX6-NEXT: v_bfe_u32 v13, v29, 6, 1
57425742
; GFX6-NEXT: s_waitcnt expcnt(0)
5743-
; GFX6-NEXT: v_bfe_u32 v9, v29, 12, 1
5744-
; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
5745-
; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
5746-
; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0
5747-
; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
5748-
; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
5743+
; GFX6-NEXT: v_bfe_u32 v9, v29, 8, 1
5744+
; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
5745+
; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:48
5746+
; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:32
5747+
; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:16
5748+
; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0
57495749
; GFX6-NEXT: s_endpgm
57505750
;
57515751
; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -5761,7 +5761,7 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57615761
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
57625762
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
57635763
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5764-
; GFX8-NEXT: s_add_u32 s4, s0, 0x50
5764+
; GFX8-NEXT: s_add_u32 s4, s0, 0x60
57655765
; GFX8-NEXT: s_addc_u32 s5, s1, 0
57665766
; GFX8-NEXT: v_mov_b32_e32 v23, s5
57675767
; GFX8-NEXT: v_mov_b32_e32 v22, s4
@@ -5775,9 +5775,9 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57755775
; GFX8-NEXT: v_mov_b32_e32 v21, v2
57765776
; GFX8-NEXT: v_mov_b32_e32 v25, v2
57775777
; GFX8-NEXT: s_waitcnt vmcnt(0)
5778-
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 10, v0
5778+
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 12, v0
57795779
; GFX8-NEXT: v_and_b32_e32 v5, 1, v1
5780-
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v0
5780+
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 13, v0
57815781
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
57825782
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff, v1
57835783
; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[5:8]
@@ -5786,31 +5786,31 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
57865786
; GFX8-NEXT: v_mov_b32_e32 v22, s2
57875787
; GFX8-NEXT: v_lshrrev_b16_e32 v3, 15, v0
57885788
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
5789-
; GFX8-NEXT: s_add_u32 s2, s0, 64
5789+
; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[1:4]
57905790
; GFX8-NEXT: v_mov_b32_e32 v5, v2
5791+
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v0
57915792
; GFX8-NEXT: v_mov_b32_e32 v7, v2
5792-
; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[1:4]
57935793
; GFX8-NEXT: v_mov_b32_e32 v23, v2
5794-
; GFX8-NEXT: v_mov_b32_e32 v3, 1
5794+
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
5795+
; GFX8-NEXT: v_lshrrev_b16_e32 v2, 10, v0
5796+
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
57955797
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5798+
; GFX8-NEXT: v_and_b32_e32 v8, 1, v2
5799+
; GFX8-NEXT: v_and_b32_e32 v10, 0xffff, v1
57965800
; GFX8-NEXT: v_mov_b32_e32 v1, s2
57975801
; GFX8-NEXT: v_mov_b32_e32 v2, s3
5798-
; GFX8-NEXT: v_and_b32_sdwa v8, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
5799-
; GFX8-NEXT: v_lshrrev_b16_e32 v3, 9, v0
5800-
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
5801-
; GFX8-NEXT: v_and_b32_e32 v3, 1, v3
5802+
; GFX8-NEXT: s_add_u32 s2, s0, 64
58025803
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5803-
; GFX8-NEXT: v_and_b32_e32 v10, 0xffff, v3
58045804
; GFX8-NEXT: v_mov_b32_e32 v4, s3
58055805
; GFX8-NEXT: v_mov_b32_e32 v3, s2
58065806
; GFX8-NEXT: s_add_u32 s2, s0, 48
5807+
; GFX8-NEXT: v_mov_b32_e32 v6, 1
58075808
; GFX8-NEXT: s_addc_u32 s3, s1, 0
5808-
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 12, v0
58095809
; GFX8-NEXT: flat_store_dwordx4 v[1:2], v[8:11]
58105810
; GFX8-NEXT: v_mov_b32_e32 v2, s1
58115811
; GFX8-NEXT: v_mov_b32_e32 v9, s3
5812-
; GFX8-NEXT: v_and_b32_e32 v11, 1, v6
5813-
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 13, v0
5812+
; GFX8-NEXT: v_and_b32_sdwa v11, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
5813+
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 9, v0
58145814
; GFX8-NEXT: v_mov_b32_e32 v8, s2
58155815
; GFX8-NEXT: s_add_u32 s2, s0, 32
58165816
; GFX8-NEXT: v_and_b32_e32 v6, 1, v6
@@ -5940,35 +5940,33 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
59405940
; GFX12-NEXT: global_load_u16 v0, v1, s[2:3]
59415941
; GFX12-NEXT: s_wait_loadcnt 0x0
59425942
; GFX12-NEXT: v_and_b32_e32 v28, 1, v0
5943-
; GFX12-NEXT: v_lshrrev_b16 v4, 11, v0
5944-
; GFX12-NEXT: v_lshrrev_b16 v8, 9, v0
5945-
; GFX12-NEXT: v_lshrrev_b16 v12, 13, v0
5943+
; GFX12-NEXT: v_lshrrev_b16 v4, 13, v0
5944+
; GFX12-NEXT: v_lshrrev_b16 v8, 11, v0
5945+
; GFX12-NEXT: v_lshrrev_b16 v12, 9, v0
59465946
; GFX12-NEXT: v_lshrrev_b16 v16, 7, v0
59475947
; GFX12-NEXT: v_lshrrev_b16 v2, 15, v0
59485948
; GFX12-NEXT: v_lshrrev_b16 v6, 14, v0
5949-
; GFX12-NEXT: v_lshrrev_b16 v10, 10, v0
59505949
; GFX12-NEXT: v_lshrrev_b16 v20, 5, v0
59515950
; GFX12-NEXT: v_lshrrev_b16 v24, 3, v0
59525951
; GFX12-NEXT: v_lshrrev_b16 v32, 1, v0
5952+
; GFX12-NEXT: v_lshrrev_b16 v10, 12, v0
59535953
; GFX12-NEXT: v_and_b32_e32 v33, 1, v4
59545954
; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v34, 1, v8
5955-
; GFX12-NEXT: v_lshrrev_b16 v14, 8, v0
5956-
; GFX12-NEXT: v_lshrrev_b16 v18, 12, v0
5955+
; GFX12-NEXT: v_lshrrev_b16 v14, 10, v0
5956+
; GFX12-NEXT: v_lshrrev_b16 v18, 8, v0
59575957
; GFX12-NEXT: v_and_b32_e32 v35, 1, v12
59585958
; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_and_b32 v36, 1, v16
59595959
; GFX12-NEXT: v_lshrrev_b16 v22, 6, v0
59605960
; GFX12-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v38, 1, v24
59615961
; GFX12-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_and_b32 v32, 1, v32
5962-
; GFX12-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_and_b32 v4, 1, v10
59635962
; GFX12-NEXT: v_mov_b32_e32 v23, v1
59645963
; GFX12-NEXT: v_dual_mov_b32 v25, v1 :: v_dual_and_b32 v2, 0xffff, v2
5965-
; GFX12-NEXT: v_mov_b32_e32 v31, v1
59665964
; GFX12-NEXT: v_lshrrev_b16 v26, 4, v0
59675965
; GFX12-NEXT: v_lshrrev_b16 v30, 2, v0
59685966
; GFX12-NEXT: v_and_b32_e32 v37, 1, v20
5969-
; GFX12-NEXT: v_and_b32_e32 v0, 1, v6
5967+
; GFX12-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_and_b32 v0, 1, v6
5968+
; GFX12-NEXT: v_and_b32_e32 v4, 1, v10
59705969
; GFX12-NEXT: v_dual_mov_b32 v11, v1 :: v_dual_and_b32 v6, 0xffff, v33
5971-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
59725970
; GFX12-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_and_b32 v24, 1, v30
59735971
; GFX12-NEXT: v_and_b32_e32 v8, 1, v14
59745972
; GFX12-NEXT: v_dual_mov_b32 v13, v1 :: v_dual_and_b32 v10, 0xffff, v34
@@ -5978,13 +5976,13 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
59785976
; GFX12-NEXT: v_and_b32_e32 v14, 0xffff, v35
59795977
; GFX12-NEXT: v_dual_mov_b32 v27, v1 :: v_dual_and_b32 v18, 0xffff, v36
59805978
; GFX12-NEXT: v_dual_mov_b32 v29, v1 :: v_dual_and_b32 v30, 0xffff, v32
5981-
; GFX12-NEXT: v_and_b32_e32 v26, 0xffff, v38
5979+
; GFX12-NEXT: v_dual_mov_b32 v31, v1 :: v_dual_and_b32 v26, 0xffff, v38
59825980
; GFX12-NEXT: v_and_b32_e32 v22, 0xffff, v37
59835981
; GFX12-NEXT: s_clause 0x7
5984-
; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:80
5985-
; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:64
59865982
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
5987-
; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1] offset:96
5983+
; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:96
5984+
; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:80
5985+
; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1] offset:64
59885986
; GFX12-NEXT: global_store_b128 v1, v[16:19], s[0:1] offset:48
59895987
; GFX12-NEXT: global_store_b128 v1, v[20:23], s[0:1] offset:32
59905988
; GFX12-NEXT: global_store_b128 v1, v[24:27], s[0:1] offset:16

0 commit comments

Comments
 (0)