2
2
3
3
; indexing of vectors.
4
4
5
- ; Subtest below moved from file test/CodeGen/AMDGPU/indirect-addressing-si.ll
6
- ; to avoid gfx9 scheduling induced issues.
7
-
8
-
9
- ; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
10
- ; GCN-DAG: s_load_dwordx16 s[[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]]
11
- ; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
12
- ; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
13
-
14
- ; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT15:[0-9]+]], s[[S_ELT15]]
15
- ; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
16
-
17
- ; GCN: v_cmp_eq_u32_e32
18
- ; GCN-COUNT-32: v_cndmask_b32
19
-
20
- ; GCN-COUNT-4: buffer_store_dwordx4
21
- define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block (ptr addrspace (1 ) %out0 , ptr addrspace (1 ) %out1 , ptr addrspace (1 ) %in , <16 x i32 > %vec0 ) #0 {
22
- entry:
23
- %id = call i32 @llvm.amdgcn.workitem.id.x () #1
24
- %id.ext = zext i32 %id to i64
25
- %gep = getelementptr inbounds i32 , ptr addrspace (1 ) %in , i64 %id.ext
26
- %idx0 = load volatile i32 , ptr addrspace (1 ) %gep
27
- %idx1 = add i32 %idx0 , 1
28
- %live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62" , "=v" ()
29
- %vec1 = insertelement <16 x i32 > %vec0 , i32 %live.out.val , i32 %idx0
30
- %vec2 = insertelement <16 x i32 > %vec1 , i32 63 , i32 %idx1
31
- store volatile <16 x i32 > %vec2 , ptr addrspace (1 ) %out0
32
- %cmp = icmp eq i32 %id , 0
33
- br i1 %cmp , label %bb1 , label %bb2
34
-
35
- bb1:
36
- store volatile i32 %live.out.val , ptr addrspace (1 ) undef
37
- br label %bb2
38
-
39
- bb2:
40
- ret void
41
- }
42
-
43
- ; Avoid inserting extra v_mov from copies within the vgpr indexing sequence. The
44
- ; gpr_idx mode switching sequence is expanded late for this reason.
45
-
46
- ; GCN-LABEL: {{^}}insert_w_offset_multiple_in_block
47
-
48
- ; GCN: s_set_gpr_idx_on
49
- ; GCN-NEXT: v_mov_b32_e32
50
- ; GCN-NEXT: s_set_gpr_idx_off
51
-
52
- ; GCN: s_set_gpr_idx_on
53
- ; GCN-NEXT: v_mov_b32_e32
54
- ; GCN-NOT: v_mov_b32_e32
55
- ; GCN-NEXT: s_set_gpr_idx_off
56
- define amdgpu_kernel void @insert_w_offset_multiple_in_block (ptr addrspace (1 ) %out1 , i32 %in ) #0 {
57
- entry:
58
- %add1 = add i32 %in , 1
59
- %ins1 = insertelement <16 x float > <float 1 .0 , float 2 .0 , float 3 .0 , float 4 .0 , float 5 .0 , float 6 .0 , float 7 .0 , float 8 .0 , float 9 .0 , float 10 .0 , float 11 .0 , float 12 .0 , float 13 .0 , float 14 .0 , float 15 .0 , float 16 .0 >, float 17 .0 , i32 %add1
60
- %add2 = add i32 %in , 2
61
- %ins2 = insertelement <16 x float > %ins1 , float 17 .0 , i32 %add2
62
- store <16 x float > %ins1 , ptr addrspace (1 ) %out1
63
- %out2 = getelementptr <16 x float >, ptr addrspace (1 ) %out1 , i32 1
64
- store <16 x float > %ins2 , ptr addrspace (1 ) %out2
65
-
66
- ret void
67
- }
68
-
69
5
declare hidden void @foo ()
70
6
71
7
; For functions with calls, we were not accounting for m0_lo16/m0_hi16
@@ -83,7 +19,4 @@ define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %i
83
19
ret void
84
20
}
85
21
86
- declare i32 @llvm.amdgcn.workitem.id.x () #1
87
- declare void @llvm.amdgcn.s.barrier () #2
88
-
89
22
attributes #0 = { nounwind }
0 commit comments