@@ -21,11 +21,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
21
21
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
22
22
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
23
23
; GFX9-NEXT: s_and_b32 s0, s0, 15
24
- ; GFX9-NEXT: s_add_i32 s1, s1, 0
25
24
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
26
25
; GFX9-NEXT: scratch_store_dword off, v0, s1
27
26
; GFX9-NEXT: s_waitcnt vmcnt(0)
28
- ; GFX9-NEXT: s_add_i32 s0, s0, 0
29
27
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
30
28
; GFX9-NEXT: s_waitcnt vmcnt(0)
31
29
; GFX9-NEXT: s_endpgm
@@ -42,8 +40,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
42
40
; GFX10-NEXT: s_and_b32 s1, s0, 15
43
41
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
44
42
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
45
- ; GFX10-NEXT: s_add_i32 s0, s0, 0
46
- ; GFX10-NEXT: s_add_i32 s1, s1, 0
47
43
; GFX10-NEXT: scratch_store_dword off, v0, s0
48
44
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
49
45
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -57,7 +53,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
57
53
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
58
54
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
59
55
; GFX940-NEXT: s_and_b32 s0, s0, 15
60
- ; GFX940-NEXT: s_add_i32 s1, s1, 0
61
56
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
62
57
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
63
58
; GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -75,7 +70,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
75
70
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
76
71
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
77
72
; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
78
- ; GFX11-NEXT: s_add_i32 s0, s0, 0
79
73
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
80
74
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
81
75
; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -108,11 +102,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
108
102
; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0)
109
103
; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2
110
104
; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15
111
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s1, 0
112
105
; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2
113
106
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1
114
107
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
115
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 0
116
108
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
117
109
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
118
110
; UNALIGNED_GFX9-NEXT: s_endpgm
@@ -129,8 +121,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
129
121
; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15
130
122
; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2
131
123
; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2
132
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 0
133
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s1, 0
134
124
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0
135
125
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
136
126
; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -144,7 +134,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
144
134
; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0)
145
135
; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2
146
136
; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15
147
- ; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s1, 0
148
137
; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2
149
138
; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
150
139
; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -162,7 +151,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
162
151
; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2
163
152
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
164
153
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
165
- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 0
166
154
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
167
155
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
168
156
; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -1923,13 +1911,13 @@ define void @store_load_large_imm_offset_foo() {
1923
1911
; GFX9-LABEL: store_load_large_imm_offset_foo:
1924
1912
; GFX9: ; %bb.0: ; %bb
1925
1913
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926
- ; GFX9-NEXT: v_mov_b32_e32 v0, 13
1927
1914
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1928
- ; GFX9-NEXT: s_add_i32 s1, s32, 4
1915
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 13
1916
+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
1929
1917
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1930
1918
; GFX9-NEXT: s_waitcnt vmcnt(0)
1931
1919
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1932
- ; GFX9-NEXT: s_add_i32 s0, s0, s1
1920
+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
1933
1921
; GFX9-NEXT: scratch_store_dword off, v0, s0
1934
1922
; GFX9-NEXT: s_waitcnt vmcnt(0)
1935
1923
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1940,10 +1928,10 @@ define void @store_load_large_imm_offset_foo() {
1940
1928
; GFX10: ; %bb.0: ; %bb
1941
1929
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942
1930
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943
- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1944
1931
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1945
- ; GFX10-NEXT: s_add_i32 s1, s32, 4
1946
- ; GFX10-NEXT: s_add_i32 s0, s0, s1
1932
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1933
+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
1934
+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
1947
1935
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
1948
1936
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1949
1937
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1999,13 +1987,13 @@ define void @store_load_large_imm_offset_foo() {
1999
1987
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
2000
1988
; UNALIGNED_GFX9: ; %bb.0: ; %bb
2001
1989
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002
- ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2003
1990
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2004
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, 4
1991
+ ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
1992
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
2005
1993
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
2006
1994
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
2007
1995
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2008
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, s1
1996
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
2009
1997
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
2010
1998
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
2011
1999
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2016,10 +2004,10 @@ define void @store_load_large_imm_offset_foo() {
2016
2004
; UNALIGNED_GFX10: ; %bb.0: ; %bb
2017
2005
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2018
2006
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2019
- ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2020
2007
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2021
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, 4
2022
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, s1
2008
+ ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2009
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2010
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2023
2011
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
2024
2012
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2025
2013
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
0 commit comments