Skip to content

[AMDGPU] Generate checks for vector indexing. NFC. #105668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 0 additions & 67 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,70 +2,6 @@

; indexing of vectors.

; Subtest below moved from file test/CodeGen/AMDGPU/indirect-addressing-si.ll
; to avoid gfx9 scheduling induced issues.


; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
; GCN-DAG: s_load_dwordx16 s[[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]]
; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62

; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT15:[0-9]+]], s[[S_ELT15]]
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]

; GCN: v_cmp_eq_u32_e32
; GCN-COUNT-32: v_cndmask_b32

; GCN-COUNT-4: buffer_store_dwordx4
define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) #0 {
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
%id.ext = zext i32 %id to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %id.ext
%idx0 = load volatile i32, ptr addrspace(1) %gep
%idx1 = add i32 %idx0, 1
%live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"()
%vec1 = insertelement <16 x i32> %vec0, i32 %live.out.val, i32 %idx0
%vec2 = insertelement <16 x i32> %vec1, i32 63, i32 %idx1
store volatile <16 x i32> %vec2, ptr addrspace(1) %out0
%cmp = icmp eq i32 %id, 0
br i1 %cmp, label %bb1, label %bb2

bb1:
store volatile i32 %live.out.val, ptr addrspace(1) undef
br label %bb2

bb2:
ret void
}

; Avoid inserting extra v_mov from copies within the vgpr indexing sequence. The
; gpr_idx mode switching sequence is expanded late for this reason.

; GCN-LABEL: {{^}}insert_w_offset_multiple_in_block

; GCN: s_set_gpr_idx_on
; GCN-NEXT: v_mov_b32_e32
; GCN-NEXT: s_set_gpr_idx_off

; GCN: s_set_gpr_idx_on
; GCN-NEXT: v_mov_b32_e32
; GCN-NOT: v_mov_b32_e32
; GCN-NEXT: s_set_gpr_idx_off
define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %out1, i32 %in) #0 {
entry:
%add1 = add i32 %in, 1
%ins1 = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %add1
%add2 = add i32 %in, 2
%ins2 = insertelement <16 x float> %ins1, float 17.0, i32 %add2
store <16 x float> %ins1, ptr addrspace(1) %out1
%out2 = getelementptr <16 x float>, ptr addrspace(1) %out1, i32 1
store <16 x float> %ins2, ptr addrspace(1) %out2

ret void
}

declare hidden void @foo()

; For functions with calls, we were not accounting for m0_lo16/m0_hi16
Expand All @@ -83,7 +19,4 @@ define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %i
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.amdgcn.s.barrier() #2

attributes #0 = { nounwind }
63 changes: 0 additions & 63 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll

This file was deleted.

53 changes: 0 additions & 53 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll

This file was deleted.

Loading
Loading