Skip to content

Commit aa65dba

Browse files
committed
LoopStrengthReduce: Convert AMDGPU tests to opaque pointers
1 parent 93ec3fa commit aa65dba

File tree

4 files changed

+154
-159
lines changed

4 files changed

+154
-159
lines changed
Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -opaque-pointers=0 -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
1+
; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
22

33
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
44

@@ -7,18 +7,18 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
77
; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(
88

99
; OPT: .lr.ph.preheader:
10-
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
10+
; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
1111
; OPT: br label %.lr.ph
1212
; OPT: .lr.ph:
13-
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
14-
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
13+
; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4, %.lr.ph ], [ %uglygep2, %.lr.ph.preheader ]
14+
; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
1515
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
16-
; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %lsr.iv3, i32 undef seq_cst, align 4
17-
; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst, align 4
18-
; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst, align 4
19-
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
16+
; OPT: %tmp4 = atomicrmw add ptr addrspace(3) %lsr.iv3, i32 undef seq_cst, align 4
17+
; OPT: %tmp7 = atomicrmw add ptr addrspace(3) %lsr.iv1, i32 undef seq_cst, align 4
18+
; OPT: %0 = atomicrmw add ptr addrspace(3) %lsr.iv1, i32 %tmp8 seq_cst, align 4
19+
; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
2020
; OPT: br i1 %exitcond
21-
define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
21+
define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(ptr addrspace(3) noalias nocapture %arg0, ptr addrspace(3) noalias nocapture readonly %arg1, i32 %n) #0 {
2222
bb:
2323
%tmp = icmp sgt i32 %n, 0
2424
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
@@ -35,12 +35,12 @@ bb:
3535
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
3636
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
3737
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
38-
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
39-
%tmp4 = atomicrmw add i32 addrspace(3)* %tmp3, i32 undef seq_cst
40-
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
41-
%tmp7 = atomicrmw add i32 addrspace(3)* %tmp6, i32 undef seq_cst
38+
%tmp3 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 %tmp1
39+
%tmp4 = atomicrmw add ptr addrspace(3) %tmp3, i32 undef seq_cst
40+
%tmp6 = getelementptr inbounds i32, ptr addrspace(3) %arg0, i32 %indvars.iv
41+
%tmp7 = atomicrmw add ptr addrspace(3) %tmp6, i32 undef seq_cst
4242
%tmp8 = add nsw i32 %tmp7, %tmp4
43-
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
43+
atomicrmw add ptr addrspace(3) %tmp6, i32 %tmp8 seq_cst
4444
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
4545
%exitcond = icmp eq i32 %indvars.iv.next, %n
4646
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
@@ -49,15 +49,15 @@ bb:
4949
; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(
5050

5151
; OPT: .lr.ph.preheader:
52-
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
52+
; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
5353
; OPT: br label %.lr.ph
5454
; OPT: .lr.ph:
55-
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
56-
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
55+
; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4, %.lr.ph ], [ %uglygep2, %.lr.ph.preheader ]
56+
; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
5757
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
58-
; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %lsr.iv3, i32 undef, i32 undef seq_cst monotonic, align 4
59-
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
60-
define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
58+
; OPT: %tmp4 = cmpxchg ptr addrspace(3) %lsr.iv3, i32 undef, i32 undef seq_cst monotonic, align 4
59+
; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
60+
define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(ptr addrspace(3) noalias nocapture %arg0, ptr addrspace(3) noalias nocapture readonly %arg1, i32 %n) #0 {
6161
bb:
6262
%tmp = icmp sgt i32 %n, 0
6363
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
@@ -74,14 +74,14 @@ bb:
7474
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
7575
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
7676
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
77-
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
78-
%tmp4 = cmpxchg i32 addrspace(3)* %tmp3, i32 undef, i32 undef seq_cst monotonic
77+
%tmp3 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 %tmp1
78+
%tmp4 = cmpxchg ptr addrspace(3) %tmp3, i32 undef, i32 undef seq_cst monotonic
7979
%tmp4.0 = extractvalue { i32, i1 } %tmp4, 0
80-
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
81-
%tmp7 = cmpxchg i32 addrspace(3)* %tmp6, i32 undef, i32 undef seq_cst monotonic
80+
%tmp6 = getelementptr inbounds i32, ptr addrspace(3) %arg0, i32 %indvars.iv
81+
%tmp7 = cmpxchg ptr addrspace(3) %tmp6, i32 undef, i32 undef seq_cst monotonic
8282
%tmp7.0 = extractvalue { i32, i1 } %tmp7, 0
8383
%tmp8 = add nsw i32 %tmp7.0, %tmp4.0
84-
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
84+
atomicrmw add ptr addrspace(3) %tmp6, i32 %tmp8 seq_cst
8585
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
8686
%exitcond = icmp eq i32 %indvars.iv.next, %n
8787
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
@@ -90,16 +90,16 @@ bb:
9090
; OPT-LABEL: @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(
9191

9292
; OPT: .lr.ph.preheader:
93-
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
93+
; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
9494
; OPT: br label %.lr.ph
9595
; OPT: .lr.ph:
96-
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
97-
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
96+
; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4, %.lr.ph ], [ %uglygep2, %.lr.ph.preheader ]
97+
; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
9898
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
99-
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
100-
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
101-
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
102-
define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
99+
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
100+
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
101+
; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
102+
define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(ptr addrspace(3) noalias nocapture %arg0, ptr addrspace(3) noalias nocapture readonly %arg1, i32 %n) #0 {
103103
bb:
104104
%tmp = icmp sgt i32 %n, 0
105105
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
@@ -116,29 +116,29 @@ bb:
116116
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
117117
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
118118
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
119-
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
120-
%tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %tmp3, i32 undef, i32 0, i32 0, i1 false)
121-
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
122-
%tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %tmp6, i32 undef, i32 0, i32 0, i1 false)
119+
%tmp3 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 %tmp1
120+
%tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %tmp3, i32 undef, i32 0, i32 0, i1 false)
121+
%tmp6 = getelementptr inbounds i32, ptr addrspace(3) %arg0, i32 %indvars.iv
122+
%tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %tmp6, i32 undef, i32 0, i32 0, i1 false)
123123
%tmp8 = add nsw i32 %tmp7, %tmp4
124-
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
124+
atomicrmw add ptr addrspace(3) %tmp6, i32 %tmp8 seq_cst
125125
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
126126
%exitcond = icmp eq i32 %indvars.iv.next, %n
127127
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
128128
}
129129

130130
; OPT-LABEL: @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(
131131
; OPT: .lr.ph.preheader:
132-
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
132+
; OPT: %uglygep2 = getelementptr i8, ptr addrspace(3) %arg1, i32 65532
133133
; OPT: br label %.lr.ph
134134
; OPT: .lr.ph:
135-
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
136-
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
135+
; OPT: %lsr.iv3 = phi ptr addrspace(3) [ %uglygep4, %.lr.ph ], [ %uglygep2, %.lr.ph.preheader ]
136+
; OPT: %lsr.iv1 = phi ptr addrspace(3) [ %uglygep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
137137
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
138-
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
139-
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
140-
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
141-
define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
138+
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
139+
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
140+
; OPT: %uglygep4 = getelementptr i8, ptr addrspace(3) %lsr.iv3, i32 4
141+
define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(ptr addrspace(3) noalias nocapture %arg0, ptr addrspace(3) noalias nocapture readonly %arg1, i32 %n) #0 {
142142
bb:
143143
%tmp = icmp sgt i32 %n, 0
144144
br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
@@ -155,19 +155,19 @@ bb:
155155
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
156156
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
157157
%tmp1 = add nuw nsw i32 %indvars.iv, 16383
158-
%tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1
159-
%tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %tmp3, i32 undef, i32 0, i32 0, i1 false)
160-
%tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv
161-
%tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %tmp6, i32 undef, i32 0, i32 0, i1 false)
158+
%tmp3 = getelementptr inbounds i32, ptr addrspace(3) %arg1, i32 %tmp1
159+
%tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %tmp3, i32 undef, i32 0, i32 0, i1 false)
160+
%tmp6 = getelementptr inbounds i32, ptr addrspace(3) %arg0, i32 %indvars.iv
161+
%tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %tmp6, i32 undef, i32 0, i32 0, i1 false)
162162
%tmp8 = add nsw i32 %tmp7, %tmp4
163-
atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst
163+
atomicrmw add ptr addrspace(3) %tmp6, i32 %tmp8 seq_cst
164164
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
165165
%exitcond = icmp eq i32 %indvars.iv.next, %n
166166
br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
167167
}
168168

169-
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #1
170-
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #1
169+
declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #1
170+
declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #1
171171

172172
attributes #0 = { nounwind }
173173
attributes #1 = { nounwind argmemonly }

0 commit comments

Comments
 (0)