Skip to content

Commit c4c5fdd

Browse files
authored
[AMDGPU] Generate checks for vector indexing. NFC. (#105668)
This allows combining some test files that were only split because adding new RUN lines introduced too much churn in the checks.
1 parent ec5e585 commit c4c5fdd

File tree

4 files changed

+8066
-496
lines changed

4 files changed

+8066
-496
lines changed

llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2,70 +2,6 @@
22

33
; indexing of vectors.
44

5-
; Subtest below moved from file test/CodeGen/AMDGPU/indirect-addressing-si.ll
6-
; to avoid gfx9 scheduling induced issues.
7-
8-
9-
; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
10-
; GCN-DAG: s_load_dwordx16 s[[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]]
11-
; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
12-
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
13-
14-
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT15:[0-9]+]], s[[S_ELT15]]
15-
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
16-
17-
; GCN: v_cmp_eq_u32_e32
18-
; GCN-COUNT-32: v_cndmask_b32
19-
20-
; GCN-COUNT-4: buffer_store_dwordx4
21-
define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) #0 {
22-
entry:
23-
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
24-
%id.ext = zext i32 %id to i64
25-
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %id.ext
26-
%idx0 = load volatile i32, ptr addrspace(1) %gep
27-
%idx1 = add i32 %idx0, 1
28-
%live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"()
29-
%vec1 = insertelement <16 x i32> %vec0, i32 %live.out.val, i32 %idx0
30-
%vec2 = insertelement <16 x i32> %vec1, i32 63, i32 %idx1
31-
store volatile <16 x i32> %vec2, ptr addrspace(1) %out0
32-
%cmp = icmp eq i32 %id, 0
33-
br i1 %cmp, label %bb1, label %bb2
34-
35-
bb1:
36-
store volatile i32 %live.out.val, ptr addrspace(1) undef
37-
br label %bb2
38-
39-
bb2:
40-
ret void
41-
}
42-
43-
; Avoid inserting extra v_mov from copies within the vgpr indexing sequence. The
44-
; gpr_idx mode switching sequence is expanded late for this reason.
45-
46-
; GCN-LABEL: {{^}}insert_w_offset_multiple_in_block
47-
48-
; GCN: s_set_gpr_idx_on
49-
; GCN-NEXT: v_mov_b32_e32
50-
; GCN-NEXT: s_set_gpr_idx_off
51-
52-
; GCN: s_set_gpr_idx_on
53-
; GCN-NEXT: v_mov_b32_e32
54-
; GCN-NOT: v_mov_b32_e32
55-
; GCN-NEXT: s_set_gpr_idx_off
56-
define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %out1, i32 %in) #0 {
57-
entry:
58-
%add1 = add i32 %in, 1
59-
%ins1 = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %add1
60-
%add2 = add i32 %in, 2
61-
%ins2 = insertelement <16 x float> %ins1, float 17.0, i32 %add2
62-
store <16 x float> %ins1, ptr addrspace(1) %out1
63-
%out2 = getelementptr <16 x float>, ptr addrspace(1) %out1, i32 1
64-
store <16 x float> %ins2, ptr addrspace(1) %out2
65-
66-
ret void
67-
}
68-
695
declare hidden void @foo()
706

717
; For functions with calls, we were not accounting for m0_lo16/m0_hi16
@@ -83,7 +19,4 @@ define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %i
8319
ret void
8420
}
8521

86-
declare i32 @llvm.amdgcn.workitem.id.x() #1
87-
declare void @llvm.amdgcn.s.barrier() #2
88-
8922
attributes #0 = { nounwind }

llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll

Lines changed: 0 additions & 63 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)