@@ -208,45 +208,36 @@ define void @mubuf_clause(ptr addrspace(5) noalias nocapture readonly %arg, ptr
208
208
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31
209
209
; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2
210
210
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18
211
- ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v6, 16, v0
212
- ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v10, 32, v0
213
- ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v14, 48, v0
214
211
; GCN-SCRATCH-NEXT: s_clause 0x3
215
212
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[2:5], v0, off
216
- ; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v6 , off
217
- ; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v10 , off
218
- ; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v14 , off
213
+ ; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v0 , off offset:16
214
+ ; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v0 , off offset:32
215
+ ; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v0 , off offset:48
219
216
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v1, v18
220
- ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v1, 16, v0
221
- ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v18, 32, v0
222
- ; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v19, 48, v0
223
217
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(3)
224
218
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[2:5], off
225
219
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(2)
226
- ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v1 , v[6:9], off
220
+ ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0 , v[6:9], off offset:16
227
221
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1)
228
- ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v18 , v[10:13], off
222
+ ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0 , v[10:13], off offset:32
229
223
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0)
230
- ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v19 , v[14:17], off
224
+ ; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0 , v[14:17], off offset:48
231
225
; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31]
232
226
bb:
233
227
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x ()
234
- %tmp2 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg , i32 %tmp
235
- %tmp3 = load <4 x i32 >, ptr addrspace (5 ) %tmp2 , align 16
236
- %tmp4 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg1 , i32 %tmp
237
- %tmp5 = add nuw nsw i32 %tmp , 1
238
- %tmp6 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg , i32 %tmp5
228
+ %base = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg , i32 %tmp
229
+ %tmp3 = load <4 x i32 >, ptr addrspace (5 ) %base , align 16
230
+ %base1 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg1 , i32 %tmp
231
+ %tmp6 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %base , i32 1
239
232
%tmp7 = load <4 x i32 >, ptr addrspace (5 ) %tmp6 , align 16
240
- %tmp8 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg1 , i32 %tmp5
241
- %tmp9 = add nuw nsw i32 %tmp , 2
242
- %tmp10 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg , i32 %tmp9
233
+ %tmp8 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %base1 , i32 1
234
+ %tmp10 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %base , i32 2
243
235
%tmp11 = load <4 x i32 >, ptr addrspace (5 ) %tmp10 , align 16
244
- %tmp12 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg1 , i32 %tmp9
245
- %tmp13 = add nuw nsw i32 %tmp , 3
246
- %tmp14 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg , i32 %tmp13
236
+ %tmp12 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %base1 , i32 2
237
+ %tmp14 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %base , i32 3
247
238
%tmp15 = load <4 x i32 >, ptr addrspace (5 ) %tmp14 , align 16
248
- %tmp16 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %arg1 , i32 %tmp13
249
- store <4 x i32 > %tmp3 , ptr addrspace (5 ) %tmp4 , align 16
239
+ %tmp16 = getelementptr inbounds <4 x i32 >, ptr addrspace (5 ) %base1 , i32 3
240
+ store <4 x i32 > %tmp3 , ptr addrspace (5 ) %base1 , align 16
250
241
store <4 x i32 > %tmp7 , ptr addrspace (5 ) %tmp8 , align 16
251
242
store <4 x i32 > %tmp11 , ptr addrspace (5 ) %tmp12 , align 16
252
243
store <4 x i32 > %tmp15 , ptr addrspace (5 ) %tmp16 , align 16
0 commit comments