@@ -67,6 +67,8 @@ define amdgpu_kernel void @kernel_caller_byval() {
67
67
; MUBUF-NEXT: s_mov_b32 s3, 0xe00000
68
68
; MUBUF-NEXT: s_mov_b64 s[0:1], flat_scratch
69
69
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
70
+ ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0
71
+ ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
70
72
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
71
73
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
72
74
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:16
@@ -97,25 +99,23 @@ define amdgpu_kernel void @kernel_caller_byval() {
97
99
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:116
98
100
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:120
99
101
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:124
100
- ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
101
- ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:132
102
- ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
102
+ ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0
103
103
; MUBUF-NEXT: s_nop 0
104
- ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12
105
- ; MUBUF-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:16
106
- ; MUBUF-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:20
107
- ; MUBUF-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:24
108
- ; MUBUF-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:28
109
- ; MUBUF-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:32
110
- ; MUBUF-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:36
111
- ; MUBUF-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:40
112
- ; MUBUF-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:44
113
- ; MUBUF-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:48
114
- ; MUBUF-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:52
115
- ; MUBUF-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:56
116
- ; MUBUF-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:60
117
- ; MUBUF-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:64
118
- ; MUBUF-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:68
104
+ ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
105
+ ; MUBUF-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:8
106
+ ; MUBUF-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:12
107
+ ; MUBUF-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:16
108
+ ; MUBUF-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:20
109
+ ; MUBUF-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:24
110
+ ; MUBUF-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:28
111
+ ; MUBUF-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:32
112
+ ; MUBUF-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:36
113
+ ; MUBUF-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:40
114
+ ; MUBUF-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:44
115
+ ; MUBUF-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:48
116
+ ; MUBUF-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:52
117
+ ; MUBUF-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:56
118
+ ; MUBUF-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:60
119
119
; MUBUF-NEXT: s_movk_i32 s32, 0x1400
120
120
; MUBUF-NEXT: s_getpc_b64 s[4:5]
121
121
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4
@@ -162,6 +162,7 @@ define amdgpu_kernel void @kernel_caller_byval() {
162
162
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
163
163
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
164
164
; FLATSCR-NEXT: s_mov_b32 s0, 0
165
+ ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
165
166
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:8
166
167
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:16
167
168
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:24
@@ -177,16 +178,15 @@ define amdgpu_kernel void @kernel_caller_byval() {
177
178
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:104
178
179
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:112
179
180
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:120
180
- ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:128
181
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:8
181
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0
182
182
; FLATSCR-NEXT: s_nop 0
183
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:16
184
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:24
185
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s0 offset:32
186
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:40
187
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:48
188
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s0 offset:56
189
- ; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s0 offset:64
183
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:8
184
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:16
185
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s0 offset:24
186
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:32
187
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:40
188
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s0 offset:48
189
+ ; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s0 offset:56
190
190
; FLATSCR-NEXT: s_movk_i32 s32, 0x50
191
191
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
192
192
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4
0 commit comments