Skip to content

Commit aaf1629

Browse files
committed
Massage memory_clause.ll so that offsets are still generated
1 parent a42f28c commit aaf1629

File tree

1 file changed

+16
-25
lines changed

1 file changed

+16
-25
lines changed

llvm/test/CodeGen/AMDGPU/memory_clause.ll

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -208,45 +208,36 @@ define void @mubuf_clause(ptr addrspace(5) noalias nocapture readonly %arg, ptr
208208
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31
209209
; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2
210210
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18
211-
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v6, 16, v0
212-
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v10, 32, v0
213-
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v14, 48, v0
214211
; GCN-SCRATCH-NEXT: s_clause 0x3
215212
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[2:5], v0, off
216-
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v6, off
217-
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v10, off
218-
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v14, off
213+
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v0, off offset:16
214+
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v0, off offset:32
215+
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v0, off offset:48
219216
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v1, v18
220-
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v1, 16, v0
221-
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v18, 32, v0
222-
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v19, 48, v0
223217
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(3)
224218
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[2:5], off
225219
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(2)
226-
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v1, v[6:9], off
220+
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16
227221
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1)
228-
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v18, v[10:13], off
222+
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32
229223
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0)
230-
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v19, v[14:17], off
224+
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:48
231225
; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31]
232226
bb:
233227
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
234-
%tmp2 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg, i32 %tmp
235-
%tmp3 = load <4 x i32>, ptr addrspace(5) %tmp2, align 16
236-
%tmp4 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg1, i32 %tmp
237-
%tmp5 = add nuw nsw i32 %tmp, 1
238-
%tmp6 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg, i32 %tmp5
228+
%base = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg, i32 %tmp
229+
%tmp3 = load <4 x i32>, ptr addrspace(5) %base, align 16
230+
%base1 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg1, i32 %tmp
231+
%tmp6 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %base, i32 1
239232
%tmp7 = load <4 x i32>, ptr addrspace(5) %tmp6, align 16
240-
%tmp8 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg1, i32 %tmp5
241-
%tmp9 = add nuw nsw i32 %tmp, 2
242-
%tmp10 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg, i32 %tmp9
233+
%tmp8 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %base1, i32 1
234+
%tmp10 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %base, i32 2
243235
%tmp11 = load <4 x i32>, ptr addrspace(5) %tmp10, align 16
244-
%tmp12 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg1, i32 %tmp9
245-
%tmp13 = add nuw nsw i32 %tmp, 3
246-
%tmp14 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg, i32 %tmp13
236+
%tmp12 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %base1, i32 2
237+
%tmp14 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %base, i32 3
247238
%tmp15 = load <4 x i32>, ptr addrspace(5) %tmp14, align 16
248-
%tmp16 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %arg1, i32 %tmp13
249-
store <4 x i32> %tmp3, ptr addrspace(5) %tmp4, align 16
239+
%tmp16 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %base1, i32 3
240+
store <4 x i32> %tmp3, ptr addrspace(5) %base1, align 16
250241
store <4 x i32> %tmp7, ptr addrspace(5) %tmp8, align 16
251242
store <4 x i32> %tmp11, ptr addrspace(5) %tmp12, align 16
252243
store <4 x i32> %tmp15, ptr addrspace(5) %tmp16, align 16

0 commit comments

Comments
 (0)