1
- ; RUN: opt -opaque-pointers=0 - S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
1
+ ; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
2
2
3
3
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
4
4
@@ -7,18 +7,18 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
7
7
; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(
8
8
9
9
; OPT: .lr.ph.preheader:
10
- ; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
10
+ ; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
11
11
; OPT: br label %.lr.ph
12
12
; OPT: .lr.ph:
13
- ; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4 , %.lr.ph ], [ %scevgep2 , %.lr.ph.preheader ]
14
- ; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
13
+ ; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4 , %.lr.ph ], [ %uglygep2 , %.lr.ph.preheader ]
14
+ ; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
15
15
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
16
- ; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %lsr.iv3, i32 undef seq_cst, align 4
17
- ; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst, align 4
18
- ; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst, align 4
19
- ; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
16
+ ; OPT: %tmp4 = atomicrmw add ptr addrspace(3) %lsr.iv3, i32 undef seq_cst, align 4
17
+ ; OPT: %tmp7 = atomicrmw add ptr addrspace(3) %lsr.iv1, i32 undef seq_cst, align 4
18
+ ; OPT: %0 = atomicrmw add ptr addrspace(3) %lsr.iv1, i32 %tmp8 seq_cst, align 4
19
+ ; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
20
20
; OPT: br i1 %exitcond
21
- define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32 (i32 addrspace (3 )* noalias nocapture %arg0 , i32 addrspace (3 )* noalias nocapture readonly %arg1 , i32 %n ) #0 {
21
+ define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32 (ptr addrspace (3 ) noalias nocapture %arg0 , ptr addrspace (3 ) noalias nocapture readonly %arg1 , i32 %n ) #0 {
22
22
bb:
23
23
%tmp = icmp sgt i32 %n , 0
24
24
br i1 %tmp , label %.lr.ph.preheader , label %._crit_edge
35
35
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
36
36
%indvars.iv = phi i32 [ %indvars.iv.next , %.lr.ph ], [ 0 , %.lr.ph.preheader ]
37
37
%tmp1 = add nuw nsw i32 %indvars.iv , 16383
38
- %tmp3 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg1 , i32 %tmp1
39
- %tmp4 = atomicrmw add i32 addrspace (3 )* %tmp3 , i32 undef seq_cst
40
- %tmp6 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg0 , i32 %indvars.iv
41
- %tmp7 = atomicrmw add i32 addrspace (3 )* %tmp6 , i32 undef seq_cst
38
+ %tmp3 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg1 , i32 %tmp1
39
+ %tmp4 = atomicrmw add ptr addrspace (3 ) %tmp3 , i32 undef seq_cst
40
+ %tmp6 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg0 , i32 %indvars.iv
41
+ %tmp7 = atomicrmw add ptr addrspace (3 ) %tmp6 , i32 undef seq_cst
42
42
%tmp8 = add nsw i32 %tmp7 , %tmp4
43
- atomicrmw add i32 addrspace (3 )* %tmp6 , i32 %tmp8 seq_cst
43
+ atomicrmw add ptr addrspace (3 ) %tmp6 , i32 %tmp8 seq_cst
44
44
%indvars.iv.next = add nuw nsw i32 %indvars.iv , 1
45
45
%exitcond = icmp eq i32 %indvars.iv.next , %n
46
46
br i1 %exitcond , label %._crit_edge.loopexit , label %.lr.ph
49
49
; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(
50
50
51
51
; OPT: .lr.ph.preheader:
52
- ; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
52
+ ; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
53
53
; OPT: br label %.lr.ph
54
54
; OPT: .lr.ph:
55
- ; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4 , %.lr.ph ], [ %scevgep2 , %.lr.ph.preheader ]
56
- ; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
55
+ ; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4 , %.lr.ph ], [ %uglygep2 , %.lr.ph.preheader ]
56
+ ; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
57
57
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
58
- ; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %lsr.iv3, i32 undef, i32 undef seq_cst monotonic, align 4
59
- ; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
60
- define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32 (i32 addrspace (3 )* noalias nocapture %arg0 , i32 addrspace (3 )* noalias nocapture readonly %arg1 , i32 %n ) #0 {
58
+ ; OPT: %tmp4 = cmpxchg ptr addrspace(3) %lsr.iv3, i32 undef, i32 undef seq_cst monotonic, align 4
59
+ ; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
60
+ define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32 (ptr addrspace (3 ) noalias nocapture %arg0 , ptr addrspace (3 ) noalias nocapture readonly %arg1 , i32 %n ) #0 {
61
61
bb:
62
62
%tmp = icmp sgt i32 %n , 0
63
63
br i1 %tmp , label %.lr.ph.preheader , label %._crit_edge
74
74
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
75
75
%indvars.iv = phi i32 [ %indvars.iv.next , %.lr.ph ], [ 0 , %.lr.ph.preheader ]
76
76
%tmp1 = add nuw nsw i32 %indvars.iv , 16383
77
- %tmp3 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg1 , i32 %tmp1
78
- %tmp4 = cmpxchg i32 addrspace (3 )* %tmp3 , i32 undef , i32 undef seq_cst monotonic
77
+ %tmp3 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg1 , i32 %tmp1
78
+ %tmp4 = cmpxchg ptr addrspace (3 ) %tmp3 , i32 undef , i32 undef seq_cst monotonic
79
79
%tmp4.0 = extractvalue { i32 , i1 } %tmp4 , 0
80
- %tmp6 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg0 , i32 %indvars.iv
81
- %tmp7 = cmpxchg i32 addrspace (3 )* %tmp6 , i32 undef , i32 undef seq_cst monotonic
80
+ %tmp6 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg0 , i32 %indvars.iv
81
+ %tmp7 = cmpxchg ptr addrspace (3 ) %tmp6 , i32 undef , i32 undef seq_cst monotonic
82
82
%tmp7.0 = extractvalue { i32 , i1 } %tmp7 , 0
83
83
%tmp8 = add nsw i32 %tmp7.0 , %tmp4.0
84
- atomicrmw add i32 addrspace (3 )* %tmp6 , i32 %tmp8 seq_cst
84
+ atomicrmw add ptr addrspace (3 ) %tmp6 , i32 %tmp8 seq_cst
85
85
%indvars.iv.next = add nuw nsw i32 %indvars.iv , 1
86
86
%exitcond = icmp eq i32 %indvars.iv.next , %n
87
87
br i1 %exitcond , label %._crit_edge.loopexit , label %.lr.ph
90
90
; OPT-LABEL: @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(
91
91
92
92
; OPT: .lr.ph.preheader:
93
- ; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
93
+ ; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
94
94
; OPT: br label %.lr.ph
95
95
; OPT: .lr.ph:
96
- ; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4 , %.lr.ph ], [ %scevgep2 , %.lr.ph.preheader ]
97
- ; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
96
+ ; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4 , %.lr.ph ], [ %uglygep2 , %.lr.ph.preheader ]
97
+ ; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
98
98
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
99
- ; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
100
- ; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
101
- ; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
102
- define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32 (i32 addrspace (3 )* noalias nocapture %arg0 , i32 addrspace (3 )* noalias nocapture readonly %arg1 , i32 %n ) #0 {
99
+ ; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
100
+ ; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
101
+ ; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
102
+ define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32 (ptr addrspace (3 ) noalias nocapture %arg0 , ptr addrspace (3 ) noalias nocapture readonly %arg1 , i32 %n ) #0 {
103
103
bb:
104
104
%tmp = icmp sgt i32 %n , 0
105
105
br i1 %tmp , label %.lr.ph.preheader , label %._crit_edge
@@ -116,29 +116,29 @@ bb:
116
116
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
117
117
%indvars.iv = phi i32 [ %indvars.iv.next , %.lr.ph ], [ 0 , %.lr.ph.preheader ]
118
118
%tmp1 = add nuw nsw i32 %indvars.iv , 16383
119
- %tmp3 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg1 , i32 %tmp1
120
- %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32 ( i32 addrspace (3 )* %tmp3 , i32 undef , i32 0 , i32 0 , i1 false )
121
- %tmp6 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg0 , i32 %indvars.iv
122
- %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32 ( i32 addrspace (3 )* %tmp6 , i32 undef , i32 0 , i32 0 , i1 false )
119
+ %tmp3 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg1 , i32 %tmp1
120
+ %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3 ( ptr addrspace (3 ) %tmp3 , i32 undef , i32 0 , i32 0 , i1 false )
121
+ %tmp6 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg0 , i32 %indvars.iv
122
+ %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3 ( ptr addrspace (3 ) %tmp6 , i32 undef , i32 0 , i32 0 , i1 false )
123
123
%tmp8 = add nsw i32 %tmp7 , %tmp4
124
- atomicrmw add i32 addrspace (3 )* %tmp6 , i32 %tmp8 seq_cst
124
+ atomicrmw add ptr addrspace (3 ) %tmp6 , i32 %tmp8 seq_cst
125
125
%indvars.iv.next = add nuw nsw i32 %indvars.iv , 1
126
126
%exitcond = icmp eq i32 %indvars.iv.next , %n
127
127
br i1 %exitcond , label %._crit_edge.loopexit , label %.lr.ph
128
128
}
129
129
130
130
; OPT-LABEL: @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(
131
131
; OPT: .lr.ph.preheader:
132
- ; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
132
+ ; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
133
133
; OPT: br label %.lr.ph
134
134
; OPT: .lr.ph:
135
- ; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4 , %.lr.ph ], [ %scevgep2 , %.lr.ph.preheader ]
136
- ; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
135
+ ; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4 , %.lr.ph ], [ %uglygep2 , %.lr.ph.preheader ]
136
+ ; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep , %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
137
137
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
138
- ; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
139
- ; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
140
- ; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
141
- define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32 (i32 addrspace (3 )* noalias nocapture %arg0 , i32 addrspace (3 )* noalias nocapture readonly %arg1 , i32 %n ) #0 {
138
+ ; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
139
+ ; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
140
+ ; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
141
+ define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32 (ptr addrspace (3 ) noalias nocapture %arg0 , ptr addrspace (3 ) noalias nocapture readonly %arg1 , i32 %n ) #0 {
142
142
bb:
143
143
%tmp = icmp sgt i32 %n , 0
144
144
br i1 %tmp , label %.lr.ph.preheader , label %._crit_edge
@@ -155,19 +155,19 @@ bb:
155
155
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
156
156
%indvars.iv = phi i32 [ %indvars.iv.next , %.lr.ph ], [ 0 , %.lr.ph.preheader ]
157
157
%tmp1 = add nuw nsw i32 %indvars.iv , 16383
158
- %tmp3 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg1 , i32 %tmp1
159
- %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32 ( i32 addrspace (3 )* %tmp3 , i32 undef , i32 0 , i32 0 , i1 false )
160
- %tmp6 = getelementptr inbounds i32 , i32 addrspace (3 )* %arg0 , i32 %indvars.iv
161
- %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32 ( i32 addrspace (3 )* %tmp6 , i32 undef , i32 0 , i32 0 , i1 false )
158
+ %tmp3 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg1 , i32 %tmp1
159
+ %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3 ( ptr addrspace (3 ) %tmp3 , i32 undef , i32 0 , i32 0 , i1 false )
160
+ %tmp6 = getelementptr inbounds i32 , ptr addrspace (3 ) %arg0 , i32 %indvars.iv
161
+ %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3 ( ptr addrspace (3 ) %tmp6 , i32 undef , i32 0 , i32 0 , i1 false )
162
162
%tmp8 = add nsw i32 %tmp7 , %tmp4
163
- atomicrmw add i32 addrspace (3 )* %tmp6 , i32 %tmp8 seq_cst
163
+ atomicrmw add ptr addrspace (3 ) %tmp6 , i32 %tmp8 seq_cst
164
164
%indvars.iv.next = add nuw nsw i32 %indvars.iv , 1
165
165
%exitcond = icmp eq i32 %indvars.iv.next , %n
166
166
br i1 %exitcond , label %._crit_edge.loopexit , label %.lr.ph
167
167
}
168
168
169
- declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32 ( i32 addrspace (3 )* nocapture , i32 , i32 , i32 , i1 ) #1
170
- declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32 ( i32 addrspace (3 )* nocapture , i32 , i32 , i32 , i1 ) #1
169
+ declare i32 @llvm.amdgcn.atomic.inc.i32.p3 ( ptr addrspace (3 ) nocapture , i32 , i32 , i32 , i1 ) #1
170
+ declare i32 @llvm.amdgcn.atomic.dec.i32.p3 ( ptr addrspace (3 ) nocapture , i32 , i32 , i32 , i1 ) #1
171
171
172
172
attributes #0 = { nounwind }
173
173
attributes #1 = { nounwind argmemonly }
0 commit comments