1
- ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
2
- ; RUN: opt -S -mtriple=r600-unknown-unknown -mcpu=redwood -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
1
+ ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
2
+ ; RUN: opt -S -mtriple=r600-unknown-unknown-amdgiz -mcpu=redwood -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
3
+ target datalayout = "A5"
3
4
4
5
declare i32 @llvm.r600.read.tidig.x () nounwind readnone
5
6
@@ -18,19 +19,19 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
18
19
19
20
define amdgpu_kernel void @mova_same_clause (i32 addrspace (1 )* nocapture %out , i32 addrspace (1 )* nocapture %in ) #0 {
20
21
entry:
21
- %stack = alloca [5 x i32 ], align 4
22
+ %stack = alloca [5 x i32 ], align 4 , addrspace ( 5 )
22
23
%0 = load i32 , i32 addrspace (1 )* %in , align 4
23
- %arrayidx1 = getelementptr inbounds [5 x i32 ], [5 x i32 ]* %stack , i32 0 , i32 %0
24
- store i32 4 , i32* %arrayidx1 , align 4
24
+ %arrayidx1 = getelementptr inbounds [5 x i32 ], [5 x i32 ] addrspace ( 5 ) * %stack , i32 0 , i32 %0
25
+ store i32 4 , i32 addrspace ( 5 ) * %arrayidx1 , align 4
25
26
%arrayidx2 = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i32 1
26
27
%1 = load i32 , i32 addrspace (1 )* %arrayidx2 , align 4
27
- %arrayidx3 = getelementptr inbounds [5 x i32 ], [5 x i32 ]* %stack , i32 0 , i32 %1
28
- store i32 5 , i32* %arrayidx3 , align 4
29
- %arrayidx10 = getelementptr inbounds [5 x i32 ], [5 x i32 ]* %stack , i32 0 , i32 0
30
- %2 = load i32 , i32* %arrayidx10 , align 4
28
+ %arrayidx3 = getelementptr inbounds [5 x i32 ], [5 x i32 ] addrspace ( 5 ) * %stack , i32 0 , i32 %1
29
+ store i32 5 , i32 addrspace ( 5 ) * %arrayidx3 , align 4
30
+ %arrayidx10 = getelementptr inbounds [5 x i32 ], [5 x i32 ] addrspace ( 5 ) * %stack , i32 0 , i32 0
31
+ %2 = load i32 , i32 addrspace ( 5 ) * %arrayidx10 , align 4
31
32
store i32 %2 , i32 addrspace (1 )* %out , align 4
32
- %arrayidx12 = getelementptr inbounds [5 x i32 ], [5 x i32 ]* %stack , i32 0 , i32 1
33
- %3 = load i32 , i32* %arrayidx12
33
+ %arrayidx12 = getelementptr inbounds [5 x i32 ], [5 x i32 ] addrspace ( 5 ) * %stack , i32 0 , i32 1
34
+ %3 = load i32 , i32 addrspace ( 5 ) * %arrayidx12
34
35
%arrayidx13 = getelementptr inbounds i32 , i32 addrspace (1 )* %out , i32 1
35
36
store i32 %3 , i32 addrspace (1 )* %arrayidx13
36
37
ret void
@@ -49,20 +50,20 @@ entry:
49
50
50
51
define amdgpu_kernel void @multiple_structs (i32 addrspace (1 )* %out ) #0 {
51
52
entry:
52
- %a = alloca %struct.point
53
- %b = alloca %struct.point
54
- %a.x.ptr = getelementptr inbounds %struct.point , %struct.point* %a , i32 0 , i32 0
55
- %a.y.ptr = getelementptr inbounds %struct.point , %struct.point* %a , i32 0 , i32 1
56
- %b.x.ptr = getelementptr inbounds %struct.point , %struct.point* %b , i32 0 , i32 0
57
- %b.y.ptr = getelementptr inbounds %struct.point , %struct.point* %b , i32 0 , i32 1
58
- store i32 0 , i32* %a.x.ptr
59
- store i32 1 , i32* %a.y.ptr
60
- store i32 2 , i32* %b.x.ptr
61
- store i32 3 , i32* %b.y.ptr
62
- %a.indirect.ptr = getelementptr inbounds %struct.point , %struct.point* %a , i32 0 , i32 0
63
- %b.indirect.ptr = getelementptr inbounds %struct.point , %struct.point* %b , i32 0 , i32 0
64
- %a.indirect = load i32 , i32* %a.indirect.ptr
65
- %b.indirect = load i32 , i32* %b.indirect.ptr
53
+ %a = alloca %struct.point , addrspace ( 5 )
54
+ %b = alloca %struct.point , addrspace ( 5 )
55
+ %a.x.ptr = getelementptr inbounds %struct.point , %struct.point addrspace ( 5 ) * %a , i32 0 , i32 0
56
+ %a.y.ptr = getelementptr inbounds %struct.point , %struct.point addrspace ( 5 ) * %a , i32 0 , i32 1
57
+ %b.x.ptr = getelementptr inbounds %struct.point , %struct.point addrspace ( 5 ) * %b , i32 0 , i32 0
58
+ %b.y.ptr = getelementptr inbounds %struct.point , %struct.point addrspace ( 5 ) * %b , i32 0 , i32 1
59
+ store i32 0 , i32 addrspace ( 5 ) * %a.x.ptr
60
+ store i32 1 , i32 addrspace ( 5 ) * %a.y.ptr
61
+ store i32 2 , i32 addrspace ( 5 ) * %b.x.ptr
62
+ store i32 3 , i32 addrspace ( 5 ) * %b.y.ptr
63
+ %a.indirect.ptr = getelementptr inbounds %struct.point , %struct.point addrspace ( 5 ) * %a , i32 0 , i32 0
64
+ %b.indirect.ptr = getelementptr inbounds %struct.point , %struct.point addrspace ( 5 ) * %b , i32 0 , i32 0
65
+ %a.indirect = load i32 , i32 addrspace ( 5 ) * %a.indirect.ptr
66
+ %b.indirect = load i32 , i32 addrspace ( 5 ) * %b.indirect.ptr
66
67
%0 = add i32 %a.indirect , %b.indirect
67
68
store i32 %0 , i32 addrspace (1 )* %out
68
69
ret void
@@ -77,32 +78,32 @@ entry:
77
78
78
79
define amdgpu_kernel void @direct_loop (i32 addrspace (1 )* %out , i32 addrspace (1 )* %in ) #0 {
79
80
entry:
80
- %prv_array_const = alloca [2 x i32 ]
81
- %prv_array = alloca [2 x i32 ]
81
+ %prv_array_const = alloca [2 x i32 ], addrspace ( 5 )
82
+ %prv_array = alloca [2 x i32 ], addrspace ( 5 )
82
83
%a = load i32 , i32 addrspace (1 )* %in
83
84
%b_src_ptr = getelementptr inbounds i32 , i32 addrspace (1 )* %in , i32 1
84
85
%b = load i32 , i32 addrspace (1 )* %b_src_ptr
85
- %a_dst_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %prv_array_const , i32 0 , i32 0
86
- store i32 %a , i32* %a_dst_ptr
87
- %b_dst_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %prv_array_const , i32 0 , i32 1
88
- store i32 %b , i32* %b_dst_ptr
86
+ %a_dst_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %prv_array_const , i32 0 , i32 0
87
+ store i32 %a , i32 addrspace ( 5 ) * %a_dst_ptr
88
+ %b_dst_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %prv_array_const , i32 0 , i32 1
89
+ store i32 %b , i32 addrspace ( 5 ) * %b_dst_ptr
89
90
br label %for.body
90
91
91
92
for.body:
92
93
%inc = phi i32 [0 , %entry ], [%count , %for.body ]
93
- %x_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %prv_array_const , i32 0 , i32 0
94
- %x = load i32 , i32* %x_ptr
95
- %y_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %prv_array , i32 0 , i32 0
96
- %y = load i32 , i32* %y_ptr
94
+ %x_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %prv_array_const , i32 0 , i32 0
95
+ %x = load i32 , i32 addrspace ( 5 ) * %x_ptr
96
+ %y_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %prv_array , i32 0 , i32 0
97
+ %y = load i32 , i32 addrspace ( 5 ) * %y_ptr
97
98
%xy = add i32 %x , %y
98
- store i32 %xy , i32* %y_ptr
99
+ store i32 %xy , i32 addrspace ( 5 ) * %y_ptr
99
100
%count = add i32 %inc , 1
100
101
%done = icmp eq i32 %count , 4095
101
102
br i1 %done , label %for.end , label %for.body
102
103
103
104
for.end:
104
- %value_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %prv_array , i32 0 , i32 0
105
- %value = load i32 , i32* %value_ptr
105
+ %value_ptr = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %prv_array , i32 0 , i32 0
106
+ %value = load i32 , i32 addrspace ( 5 ) * %value_ptr
106
107
store i32 %value , i32 addrspace (1 )* %out
107
108
ret void
108
109
}
@@ -112,13 +113,13 @@ for.end:
112
113
; R600: MOVA_INT
113
114
define amdgpu_kernel void @short_array (i32 addrspace (1 )* %out , i32 %index ) #0 {
114
115
entry:
115
- %0 = alloca [2 x i16 ]
116
- %1 = getelementptr inbounds [2 x i16 ], [2 x i16 ]* %0 , i32 0 , i32 0
117
- %2 = getelementptr inbounds [2 x i16 ], [2 x i16 ]* %0 , i32 0 , i32 1
118
- store i16 0 , i16* %1
119
- store i16 1 , i16* %2
120
- %3 = getelementptr inbounds [2 x i16 ], [2 x i16 ]* %0 , i32 0 , i32 %index
121
- %4 = load i16 , i16* %3
116
+ %0 = alloca [2 x i16 ], addrspace ( 5 )
117
+ %1 = getelementptr inbounds [2 x i16 ], [2 x i16 ] addrspace ( 5 ) * %0 , i32 0 , i32 0
118
+ %2 = getelementptr inbounds [2 x i16 ], [2 x i16 ] addrspace ( 5 ) * %0 , i32 0 , i32 1
119
+ store i16 0 , i16 addrspace ( 5 ) * %1
120
+ store i16 1 , i16 addrspace ( 5 ) * %2
121
+ %3 = getelementptr inbounds [2 x i16 ], [2 x i16 ] addrspace ( 5 ) * %0 , i32 0 , i32 %index
122
+ %4 = load i16 , i16 addrspace ( 5 ) * %3
122
123
%5 = sext i16 %4 to i32
123
124
store i32 %5 , i32 addrspace (1 )* %out
124
125
ret void
@@ -129,13 +130,13 @@ entry:
129
130
; R600: MOVA_INT
130
131
define amdgpu_kernel void @char_array (i32 addrspace (1 )* %out , i32 %index ) #0 {
131
132
entry:
132
- %0 = alloca [2 x i8 ]
133
- %1 = getelementptr inbounds [2 x i8 ], [2 x i8 ]* %0 , i32 0 , i32 0
134
- %2 = getelementptr inbounds [2 x i8 ], [2 x i8 ]* %0 , i32 0 , i32 1
135
- store i8 0 , i8* %1
136
- store i8 1 , i8* %2
137
- %3 = getelementptr inbounds [2 x i8 ], [2 x i8 ]* %0 , i32 0 , i32 %index
138
- %4 = load i8 , i8* %3
133
+ %0 = alloca [2 x i8 ], addrspace ( 5 )
134
+ %1 = getelementptr inbounds [2 x i8 ], [2 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 0
135
+ %2 = getelementptr inbounds [2 x i8 ], [2 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 1
136
+ store i8 0 , i8 addrspace ( 5 ) * %1
137
+ store i8 1 , i8 addrspace ( 5 ) * %2
138
+ %3 = getelementptr inbounds [2 x i8 ], [2 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 %index
139
+ %4 = load i8 , i8 addrspace ( 5 ) * %3
139
140
%5 = sext i8 %4 to i32
140
141
store i32 %5 , i32 addrspace (1 )* %out
141
142
ret void
@@ -150,13 +151,13 @@ entry:
150
151
; R600-NOT: MOV * TO.X
151
152
define amdgpu_kernel void @work_item_info (i32 addrspace (1 )* %out , i32 %in ) #0 {
152
153
entry:
153
- %0 = alloca [2 x i32 ]
154
- %1 = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %0 , i32 0 , i32 0
155
- %2 = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %0 , i32 0 , i32 1
156
- store i32 0 , i32* %1
157
- store i32 1 , i32* %2
158
- %3 = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %0 , i32 0 , i32 %in
159
- %4 = load i32 , i32* %3
154
+ %0 = alloca [2 x i32 ], addrspace ( 5 )
155
+ %1 = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %0 , i32 0 , i32 0
156
+ %2 = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %0 , i32 0 , i32 1
157
+ store i32 0 , i32 addrspace ( 5 ) * %1
158
+ store i32 1 , i32 addrspace ( 5 ) * %2
159
+ %3 = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %0 , i32 0 , i32 %in
160
+ %4 = load i32 , i32 addrspace ( 5 ) * %3
160
161
%5 = call i32 @llvm.r600.read.tidig.x ()
161
162
%6 = add i32 %4 , %5
162
163
store i32 %6 , i32 addrspace (1 )* %out
@@ -171,22 +172,22 @@ entry:
171
172
; R600-NOT: [[CHAN]]+
172
173
define amdgpu_kernel void @no_overlap (i32 addrspace (1 )* %out , i32 %in ) #0 {
173
174
entry:
174
- %0 = alloca [3 x i8 ], align 1
175
- %1 = alloca [2 x i8 ], align 1
176
- %2 = getelementptr inbounds [3 x i8 ], [3 x i8 ]* %0 , i32 0 , i32 0
177
- %3 = getelementptr inbounds [3 x i8 ], [3 x i8 ]* %0 , i32 0 , i32 1
178
- %4 = getelementptr inbounds [3 x i8 ], [3 x i8 ]* %0 , i32 0 , i32 2
179
- %5 = getelementptr inbounds [2 x i8 ], [2 x i8 ]* %1 , i32 0 , i32 0
180
- %6 = getelementptr inbounds [2 x i8 ], [2 x i8 ]* %1 , i32 0 , i32 1
181
- store i8 0 , i8* %2
182
- store i8 1 , i8* %3
183
- store i8 2 , i8* %4
184
- store i8 1 , i8* %5
185
- store i8 0 , i8* %6
186
- %7 = getelementptr inbounds [3 x i8 ], [3 x i8 ]* %0 , i32 0 , i32 %in
187
- %8 = getelementptr inbounds [2 x i8 ], [2 x i8 ]* %1 , i32 0 , i32 %in
188
- %9 = load i8 , i8* %7
189
- %10 = load i8 , i8* %8
175
+ %0 = alloca [3 x i8 ], align 1 , addrspace ( 5 )
176
+ %1 = alloca [2 x i8 ], align 1 , addrspace ( 5 )
177
+ %2 = getelementptr inbounds [3 x i8 ], [3 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 0
178
+ %3 = getelementptr inbounds [3 x i8 ], [3 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 1
179
+ %4 = getelementptr inbounds [3 x i8 ], [3 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 2
180
+ %5 = getelementptr inbounds [2 x i8 ], [2 x i8 ] addrspace ( 5 ) * %1 , i32 0 , i32 0
181
+ %6 = getelementptr inbounds [2 x i8 ], [2 x i8 ] addrspace ( 5 ) * %1 , i32 0 , i32 1
182
+ store i8 0 , i8 addrspace ( 5 ) * %2
183
+ store i8 1 , i8 addrspace ( 5 ) * %3
184
+ store i8 2 , i8 addrspace ( 5 ) * %4
185
+ store i8 1 , i8 addrspace ( 5 ) * %5
186
+ store i8 0 , i8 addrspace ( 5 ) * %6
187
+ %7 = getelementptr inbounds [3 x i8 ], [3 x i8 ] addrspace ( 5 ) * %0 , i32 0 , i32 %in
188
+ %8 = getelementptr inbounds [2 x i8 ], [2 x i8 ] addrspace ( 5 ) * %1 , i32 0 , i32 %in
189
+ %9 = load i8 , i8 addrspace ( 5 ) * %7
190
+ %10 = load i8 , i8 addrspace ( 5 ) * %8
190
191
%11 = add i8 %9 , %10
191
192
%12 = sext i8 %11 to i32
192
193
store i32 %12 , i32 addrspace (1 )* %out
@@ -195,40 +196,40 @@ entry:
195
196
196
197
define amdgpu_kernel void @char_array_array (i32 addrspace (1 )* %out , i32 %index ) #0 {
197
198
entry:
198
- %alloca = alloca [2 x [2 x i8 ]]
199
- %gep0 = getelementptr inbounds [2 x [2 x i8 ]], [2 x [2 x i8 ]]* %alloca , i32 0 , i32 0 , i32 0
200
- %gep1 = getelementptr inbounds [2 x [2 x i8 ]], [2 x [2 x i8 ]]* %alloca , i32 0 , i32 0 , i32 1
201
- store i8 0 , i8* %gep0
202
- store i8 1 , i8* %gep1
203
- %gep2 = getelementptr inbounds [2 x [2 x i8 ]], [2 x [2 x i8 ]]* %alloca , i32 0 , i32 0 , i32 %index
204
- %load = load i8 , i8* %gep2
199
+ %alloca = alloca [2 x [2 x i8 ]], addrspace ( 5 )
200
+ %gep0 = getelementptr inbounds [2 x [2 x i8 ]], [2 x [2 x i8 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 0
201
+ %gep1 = getelementptr inbounds [2 x [2 x i8 ]], [2 x [2 x i8 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 1
202
+ store i8 0 , i8 addrspace ( 5 ) * %gep0
203
+ store i8 1 , i8 addrspace ( 5 ) * %gep1
204
+ %gep2 = getelementptr inbounds [2 x [2 x i8 ]], [2 x [2 x i8 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 %index
205
+ %load = load i8 , i8 addrspace ( 5 ) * %gep2
205
206
%sext = sext i8 %load to i32
206
207
store i32 %sext , i32 addrspace (1 )* %out
207
208
ret void
208
209
}
209
210
210
211
define amdgpu_kernel void @i32_array_array (i32 addrspace (1 )* %out , i32 %index ) #0 {
211
212
entry:
212
- %alloca = alloca [2 x [2 x i32 ]]
213
- %gep0 = getelementptr inbounds [2 x [2 x i32 ]], [2 x [2 x i32 ]]* %alloca , i32 0 , i32 0 , i32 0
214
- %gep1 = getelementptr inbounds [2 x [2 x i32 ]], [2 x [2 x i32 ]]* %alloca , i32 0 , i32 0 , i32 1
215
- store i32 0 , i32* %gep0
216
- store i32 1 , i32* %gep1
217
- %gep2 = getelementptr inbounds [2 x [2 x i32 ]], [2 x [2 x i32 ]]* %alloca , i32 0 , i32 0 , i32 %index
218
- %load = load i32 , i32* %gep2
213
+ %alloca = alloca [2 x [2 x i32 ]], addrspace ( 5 )
214
+ %gep0 = getelementptr inbounds [2 x [2 x i32 ]], [2 x [2 x i32 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 0
215
+ %gep1 = getelementptr inbounds [2 x [2 x i32 ]], [2 x [2 x i32 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 1
216
+ store i32 0 , i32 addrspace ( 5 ) * %gep0
217
+ store i32 1 , i32 addrspace ( 5 ) * %gep1
218
+ %gep2 = getelementptr inbounds [2 x [2 x i32 ]], [2 x [2 x i32 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 %index
219
+ %load = load i32 , i32 addrspace ( 5 ) * %gep2
219
220
store i32 %load , i32 addrspace (1 )* %out
220
221
ret void
221
222
}
222
223
223
224
define amdgpu_kernel void @i64_array_array (i64 addrspace (1 )* %out , i32 %index ) #0 {
224
225
entry:
225
- %alloca = alloca [2 x [2 x i64 ]]
226
- %gep0 = getelementptr inbounds [2 x [2 x i64 ]], [2 x [2 x i64 ]]* %alloca , i32 0 , i32 0 , i32 0
227
- %gep1 = getelementptr inbounds [2 x [2 x i64 ]], [2 x [2 x i64 ]]* %alloca , i32 0 , i32 0 , i32 1
228
- store i64 0 , i64* %gep0
229
- store i64 1 , i64* %gep1
230
- %gep2 = getelementptr inbounds [2 x [2 x i64 ]], [2 x [2 x i64 ]]* %alloca , i32 0 , i32 0 , i32 %index
231
- %load = load i64 , i64* %gep2
226
+ %alloca = alloca [2 x [2 x i64 ]], addrspace ( 5 )
227
+ %gep0 = getelementptr inbounds [2 x [2 x i64 ]], [2 x [2 x i64 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 0
228
+ %gep1 = getelementptr inbounds [2 x [2 x i64 ]], [2 x [2 x i64 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 1
229
+ store i64 0 , i64 addrspace ( 5 ) * %gep0
230
+ store i64 1 , i64 addrspace ( 5 ) * %gep1
231
+ %gep2 = getelementptr inbounds [2 x [2 x i64 ]], [2 x [2 x i64 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 %index
232
+ %load = load i64 , i64 addrspace ( 5 ) * %gep2
232
233
store i64 %load , i64 addrspace (1 )* %out
233
234
ret void
234
235
}
@@ -237,40 +238,40 @@ entry:
237
238
238
239
define amdgpu_kernel void @struct_array_array (i32 addrspace (1 )* %out , i32 %index ) #0 {
239
240
entry:
240
- %alloca = alloca [2 x [2 x %struct.pair32 ]]
241
- %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32 ]], [2 x [2 x %struct.pair32 ]]* %alloca , i32 0 , i32 0 , i32 0 , i32 1
242
- %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32 ]], [2 x [2 x %struct.pair32 ]]* %alloca , i32 0 , i32 0 , i32 1 , i32 1
243
- store i32 0 , i32* %gep0
244
- store i32 1 , i32* %gep1
245
- %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32 ]], [2 x [2 x %struct.pair32 ]]* %alloca , i32 0 , i32 0 , i32 %index , i32 0
246
- %load = load i32 , i32* %gep2
241
+ %alloca = alloca [2 x [2 x %struct.pair32 ]], addrspace ( 5 )
242
+ %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32 ]], [2 x [2 x %struct.pair32 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 0 , i32 1
243
+ %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32 ]], [2 x [2 x %struct.pair32 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 1 , i32 1
244
+ store i32 0 , i32 addrspace ( 5 ) * %gep0
245
+ store i32 1 , i32 addrspace ( 5 ) * %gep1
246
+ %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32 ]], [2 x [2 x %struct.pair32 ]] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 %index , i32 0
247
+ %load = load i32 , i32 addrspace ( 5 ) * %gep2
247
248
store i32 %load , i32 addrspace (1 )* %out
248
249
ret void
249
250
}
250
251
251
252
define amdgpu_kernel void @struct_pair32_array (i32 addrspace (1 )* %out , i32 %index ) #0 {
252
253
entry:
253
- %alloca = alloca [2 x %struct.pair32 ]
254
- %gep0 = getelementptr inbounds [2 x %struct.pair32 ], [2 x %struct.pair32 ]* %alloca , i32 0 , i32 0 , i32 1
255
- %gep1 = getelementptr inbounds [2 x %struct.pair32 ], [2 x %struct.pair32 ]* %alloca , i32 0 , i32 1 , i32 0
256
- store i32 0 , i32* %gep0
257
- store i32 1 , i32* %gep1
258
- %gep2 = getelementptr inbounds [2 x %struct.pair32 ], [2 x %struct.pair32 ]* %alloca , i32 0 , i32 %index , i32 0
259
- %load = load i32 , i32* %gep2
254
+ %alloca = alloca [2 x %struct.pair32 ], addrspace ( 5 )
255
+ %gep0 = getelementptr inbounds [2 x %struct.pair32 ], [2 x %struct.pair32 ] addrspace ( 5 ) * %alloca , i32 0 , i32 0 , i32 1
256
+ %gep1 = getelementptr inbounds [2 x %struct.pair32 ], [2 x %struct.pair32 ] addrspace ( 5 ) * %alloca , i32 0 , i32 1 , i32 0
257
+ store i32 0 , i32 addrspace ( 5 ) * %gep0
258
+ store i32 1 , i32 addrspace ( 5 ) * %gep1
259
+ %gep2 = getelementptr inbounds [2 x %struct.pair32 ], [2 x %struct.pair32 ] addrspace ( 5 ) * %alloca , i32 0 , i32 %index , i32 0
260
+ %load = load i32 , i32 addrspace ( 5 ) * %gep2
260
261
store i32 %load , i32 addrspace (1 )* %out
261
262
ret void
262
263
}
263
264
264
265
define amdgpu_kernel void @select_private (i32 addrspace (1 )* %out , i32 %in ) nounwind {
265
266
entry:
266
- %tmp = alloca [2 x i32 ]
267
- %tmp1 = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %tmp , i32 0 , i32 0
268
- %tmp2 = getelementptr inbounds [2 x i32 ], [2 x i32 ]* %tmp , i32 0 , i32 1
269
- store i32 0 , i32* %tmp1
270
- store i32 1 , i32* %tmp2
267
+ %tmp = alloca [2 x i32 ], addrspace ( 5 )
268
+ %tmp1 = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %tmp , i32 0 , i32 0
269
+ %tmp2 = getelementptr inbounds [2 x i32 ], [2 x i32 ] addrspace ( 5 ) * %tmp , i32 0 , i32 1
270
+ store i32 0 , i32 addrspace ( 5 ) * %tmp1
271
+ store i32 1 , i32 addrspace ( 5 ) * %tmp2
271
272
%cmp = icmp eq i32 %in , 0
272
- %sel = select i1 %cmp , i32* %tmp1 , i32* %tmp2
273
- %load = load i32 , i32* %sel
273
+ %sel = select i1 %cmp , i32 addrspace ( 5 ) * %tmp1 , i32 addrspace ( 5 ) * %tmp2
274
+ %load = load i32 , i32 addrspace ( 5 ) * %sel
274
275
store i32 %load , i32 addrspace (1 )* %out
275
276
ret void
276
277
}
@@ -283,14 +284,14 @@ entry:
283
284
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
284
285
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
285
286
define amdgpu_kernel void @ptrtoint (i32 addrspace (1 )* %out , i32 %a , i32 %b ) #0 {
286
- %alloca = alloca [16 x i32 ]
287
- %tmp0 = getelementptr inbounds [16 x i32 ], [16 x i32 ]* %alloca , i32 0 , i32 %a
288
- store i32 5 , i32* %tmp0
289
- %tmp1 = ptrtoint [16 x i32 ]* %alloca to i32
287
+ %alloca = alloca [16 x i32 ], addrspace ( 5 )
288
+ %tmp0 = getelementptr inbounds [16 x i32 ], [16 x i32 ] addrspace ( 5 ) * %alloca , i32 0 , i32 %a
289
+ store i32 5 , i32 addrspace ( 5 ) * %tmp0
290
+ %tmp1 = ptrtoint [16 x i32 ] addrspace ( 5 ) * %alloca to i32
290
291
%tmp2 = add i32 %tmp1 , 5
291
- %tmp3 = inttoptr i32 %tmp2 to i32*
292
- %tmp4 = getelementptr inbounds i32 , i32* %tmp3 , i32 %b
293
- %tmp5 = load i32 , i32* %tmp4
292
+ %tmp3 = inttoptr i32 %tmp2 to i32 addrspace ( 5 ) *
293
+ %tmp4 = getelementptr inbounds i32 , i32 addrspace ( 5 ) * %tmp3 , i32 %b
294
+ %tmp5 = load i32 , i32 addrspace ( 5 ) * %tmp4
294
295
store i32 %tmp5 , i32 addrspace (1 )* %out
295
296
ret void
296
297
}
0 commit comments