Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit ec1f0cc

Browse files
committed
[AMDGPU] Change alloca addr space of r600 to 5 for amdgiz environment
Differential Revision: https://reviews.llvm.org/D39657 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317479 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 044ea89 commit ec1f0cc

File tree

3 files changed

+134
-128
lines changed

3 files changed

+134
-128
lines changed

lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ GCNMinRegSchedRegistry("gcn-minreg",
245245
static StringRef computeDataLayout(const Triple &TT) {
246246
if (TT.getArch() == Triple::r600) {
247247
// 32-bit pointers.
248+
if (TT.getEnvironmentName() == "amdgiz" ||
249+
TT.getEnvironmentName() == "amdgizcl")
250+
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
251+
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
248252
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
249253
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
250254
}

test/CodeGen/AMDGPU/private-memory-r600.ll

Lines changed: 125 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
2-
; RUN: opt -S -mtriple=r600-unknown-unknown -mcpu=redwood -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
1+
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
2+
; RUN: opt -S -mtriple=r600-unknown-unknown-amdgiz -mcpu=redwood -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
3+
target datalayout = "A5"
34

45
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
56

@@ -18,19 +19,19 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
1819

1920
define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
2021
entry:
21-
%stack = alloca [5 x i32], align 4
22+
%stack = alloca [5 x i32], align 4, addrspace(5)
2223
%0 = load i32, i32 addrspace(1)* %in, align 4
23-
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
24-
store i32 4, i32* %arrayidx1, align 4
24+
%arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
25+
store i32 4, i32 addrspace(5)* %arrayidx1, align 4
2526
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
2627
%1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
27-
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
28-
store i32 5, i32* %arrayidx3, align 4
29-
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
30-
%2 = load i32, i32* %arrayidx10, align 4
28+
%arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
29+
store i32 5, i32 addrspace(5)* %arrayidx3, align 4
30+
%arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
31+
%2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
3132
store i32 %2, i32 addrspace(1)* %out, align 4
32-
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
33-
%3 = load i32, i32* %arrayidx12
33+
%arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
34+
%3 = load i32, i32 addrspace(5)* %arrayidx12
3435
%arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
3536
store i32 %3, i32 addrspace(1)* %arrayidx13
3637
ret void
@@ -49,20 +50,20 @@ entry:
4950

5051
define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
5152
entry:
52-
%a = alloca %struct.point
53-
%b = alloca %struct.point
54-
%a.x.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 0
55-
%a.y.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 1
56-
%b.x.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 0
57-
%b.y.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 1
58-
store i32 0, i32* %a.x.ptr
59-
store i32 1, i32* %a.y.ptr
60-
store i32 2, i32* %b.x.ptr
61-
store i32 3, i32* %b.y.ptr
62-
%a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 0
63-
%b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 0
64-
%a.indirect = load i32, i32* %a.indirect.ptr
65-
%b.indirect = load i32, i32* %b.indirect.ptr
53+
%a = alloca %struct.point, addrspace(5)
54+
%b = alloca %struct.point, addrspace(5)
55+
%a.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
56+
%a.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1
57+
%b.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
58+
%b.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1
59+
store i32 0, i32 addrspace(5)* %a.x.ptr
60+
store i32 1, i32 addrspace(5)* %a.y.ptr
61+
store i32 2, i32 addrspace(5)* %b.x.ptr
62+
store i32 3, i32 addrspace(5)* %b.y.ptr
63+
%a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
64+
%b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
65+
%a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr
66+
%b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr
6667
%0 = add i32 %a.indirect, %b.indirect
6768
store i32 %0, i32 addrspace(1)* %out
6869
ret void
@@ -77,32 +78,32 @@ entry:
7778

7879
define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
7980
entry:
80-
%prv_array_const = alloca [2 x i32]
81-
%prv_array = alloca [2 x i32]
81+
%prv_array_const = alloca [2 x i32], addrspace(5)
82+
%prv_array = alloca [2 x i32], addrspace(5)
8283
%a = load i32, i32 addrspace(1)* %in
8384
%b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
8485
%b = load i32, i32 addrspace(1)* %b_src_ptr
85-
%a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
86-
store i32 %a, i32* %a_dst_ptr
87-
%b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
88-
store i32 %b, i32* %b_dst_ptr
86+
%a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
87+
store i32 %a, i32 addrspace(5)* %a_dst_ptr
88+
%b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1
89+
store i32 %b, i32 addrspace(5)* %b_dst_ptr
8990
br label %for.body
9091

9192
for.body:
9293
%inc = phi i32 [0, %entry], [%count, %for.body]
93-
%x_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
94-
%x = load i32, i32* %x_ptr
95-
%y_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
96-
%y = load i32, i32* %y_ptr
94+
%x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
95+
%x = load i32, i32 addrspace(5)* %x_ptr
96+
%y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
97+
%y = load i32, i32 addrspace(5)* %y_ptr
9798
%xy = add i32 %x, %y
98-
store i32 %xy, i32* %y_ptr
99+
store i32 %xy, i32 addrspace(5)* %y_ptr
99100
%count = add i32 %inc, 1
100101
%done = icmp eq i32 %count, 4095
101102
br i1 %done, label %for.end, label %for.body
102103

103104
for.end:
104-
%value_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
105-
%value = load i32, i32* %value_ptr
105+
%value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
106+
%value = load i32, i32 addrspace(5)* %value_ptr
106107
store i32 %value, i32 addrspace(1)* %out
107108
ret void
108109
}
@@ -112,13 +113,13 @@ for.end:
112113
; R600: MOVA_INT
113114
define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
114115
entry:
115-
%0 = alloca [2 x i16]
116-
%1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
117-
%2 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 1
118-
store i16 0, i16* %1
119-
store i16 1, i16* %2
120-
%3 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 %index
121-
%4 = load i16, i16* %3
116+
%0 = alloca [2 x i16], addrspace(5)
117+
%1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0
118+
%2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1
119+
store i16 0, i16 addrspace(5)* %1
120+
store i16 1, i16 addrspace(5)* %2
121+
%3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index
122+
%4 = load i16, i16 addrspace(5)* %3
122123
%5 = sext i16 %4 to i32
123124
store i32 %5, i32 addrspace(1)* %out
124125
ret void
@@ -129,13 +130,13 @@ entry:
129130
; R600: MOVA_INT
130131
define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
131132
entry:
132-
%0 = alloca [2 x i8]
133-
%1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
134-
%2 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 1
135-
store i8 0, i8* %1
136-
store i8 1, i8* %2
137-
%3 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 %index
138-
%4 = load i8, i8* %3
133+
%0 = alloca [2 x i8], addrspace(5)
134+
%1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0
135+
%2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1
136+
store i8 0, i8 addrspace(5)* %1
137+
store i8 1, i8 addrspace(5)* %2
138+
%3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index
139+
%4 = load i8, i8 addrspace(5)* %3
139140
%5 = sext i8 %4 to i32
140141
store i32 %5, i32 addrspace(1)* %out
141142
ret void
@@ -150,13 +151,13 @@ entry:
150151
; R600-NOT: MOV * TO.X
151152
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
152153
entry:
153-
%0 = alloca [2 x i32]
154-
%1 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 0
155-
%2 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 1
156-
store i32 0, i32* %1
157-
store i32 1, i32* %2
158-
%3 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 %in
159-
%4 = load i32, i32* %3
154+
%0 = alloca [2 x i32], addrspace(5)
155+
%1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
156+
%2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
157+
store i32 0, i32 addrspace(5)* %1
158+
store i32 1, i32 addrspace(5)* %2
159+
%3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
160+
%4 = load i32, i32 addrspace(5)* %3
160161
%5 = call i32 @llvm.r600.read.tidig.x()
161162
%6 = add i32 %4, %5
162163
store i32 %6, i32 addrspace(1)* %out
@@ -171,22 +172,22 @@ entry:
171172
; R600-NOT: [[CHAN]]+
172173
define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
173174
entry:
174-
%0 = alloca [3 x i8], align 1
175-
%1 = alloca [2 x i8], align 1
176-
%2 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 0
177-
%3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 1
178-
%4 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 2
179-
%5 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 0
180-
%6 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 1
181-
store i8 0, i8* %2
182-
store i8 1, i8* %3
183-
store i8 2, i8* %4
184-
store i8 1, i8* %5
185-
store i8 0, i8* %6
186-
%7 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 %in
187-
%8 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 %in
188-
%9 = load i8, i8* %7
189-
%10 = load i8, i8* %8
175+
%0 = alloca [3 x i8], align 1, addrspace(5)
176+
%1 = alloca [2 x i8], align 1, addrspace(5)
177+
%2 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0
178+
%3 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1
179+
%4 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2
180+
%5 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0
181+
%6 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1
182+
store i8 0, i8 addrspace(5)* %2
183+
store i8 1, i8 addrspace(5)* %3
184+
store i8 2, i8 addrspace(5)* %4
185+
store i8 1, i8 addrspace(5)* %5
186+
store i8 0, i8 addrspace(5)* %6
187+
%7 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in
188+
%8 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in
189+
%9 = load i8, i8 addrspace(5)* %7
190+
%10 = load i8, i8 addrspace(5)* %8
190191
%11 = add i8 %9, %10
191192
%12 = sext i8 %11 to i32
192193
store i32 %12, i32 addrspace(1)* %out
@@ -195,40 +196,40 @@ entry:
195196

196197
define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
197198
entry:
198-
%alloca = alloca [2 x [2 x i8]]
199-
%gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
200-
%gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
201-
store i8 0, i8* %gep0
202-
store i8 1, i8* %gep1
203-
%gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
204-
%load = load i8, i8* %gep2
199+
%alloca = alloca [2 x [2 x i8]], addrspace(5)
200+
%gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
201+
%gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
202+
store i8 0, i8 addrspace(5)* %gep0
203+
store i8 1, i8 addrspace(5)* %gep1
204+
%gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
205+
%load = load i8, i8 addrspace(5)* %gep2
205206
%sext = sext i8 %load to i32
206207
store i32 %sext, i32 addrspace(1)* %out
207208
ret void
208209
}
209210

210211
define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
211212
entry:
212-
%alloca = alloca [2 x [2 x i32]]
213-
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
214-
%gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
215-
store i32 0, i32* %gep0
216-
store i32 1, i32* %gep1
217-
%gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
218-
%load = load i32, i32* %gep2
213+
%alloca = alloca [2 x [2 x i32]], addrspace(5)
214+
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
215+
%gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
216+
store i32 0, i32 addrspace(5)* %gep0
217+
store i32 1, i32 addrspace(5)* %gep1
218+
%gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
219+
%load = load i32, i32 addrspace(5)* %gep2
219220
store i32 %load, i32 addrspace(1)* %out
220221
ret void
221222
}
222223

223224
define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
224225
entry:
225-
%alloca = alloca [2 x [2 x i64]]
226-
%gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
227-
%gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
228-
store i64 0, i64* %gep0
229-
store i64 1, i64* %gep1
230-
%gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
231-
%load = load i64, i64* %gep2
226+
%alloca = alloca [2 x [2 x i64]], addrspace(5)
227+
%gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
228+
%gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
229+
store i64 0, i64 addrspace(5)* %gep0
230+
store i64 1, i64 addrspace(5)* %gep1
231+
%gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
232+
%load = load i64, i64 addrspace(5)* %gep2
232233
store i64 %load, i64 addrspace(1)* %out
233234
ret void
234235
}
@@ -237,40 +238,40 @@ entry:
237238

238239
define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
239240
entry:
240-
%alloca = alloca [2 x [2 x %struct.pair32]]
241-
%gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
242-
%gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
243-
store i32 0, i32* %gep0
244-
store i32 1, i32* %gep1
245-
%gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
246-
%load = load i32, i32* %gep2
241+
%alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
242+
%gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1
243+
%gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1
244+
store i32 0, i32 addrspace(5)* %gep0
245+
store i32 1, i32 addrspace(5)* %gep1
246+
%gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0
247+
%load = load i32, i32 addrspace(5)* %gep2
247248
store i32 %load, i32 addrspace(1)* %out
248249
ret void
249250
}
250251

251252
define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
252253
entry:
253-
%alloca = alloca [2 x %struct.pair32]
254-
%gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
255-
%gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
256-
store i32 0, i32* %gep0
257-
store i32 1, i32* %gep1
258-
%gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
259-
%load = load i32, i32* %gep2
254+
%alloca = alloca [2 x %struct.pair32], addrspace(5)
255+
%gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1
256+
%gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0
257+
store i32 0, i32 addrspace(5)* %gep0
258+
store i32 1, i32 addrspace(5)* %gep1
259+
%gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0
260+
%load = load i32, i32 addrspace(5)* %gep2
260261
store i32 %load, i32 addrspace(1)* %out
261262
ret void
262263
}
263264

264265
define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
265266
entry:
266-
%tmp = alloca [2 x i32]
267-
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
268-
%tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
269-
store i32 0, i32* %tmp1
270-
store i32 1, i32* %tmp2
267+
%tmp = alloca [2 x i32], addrspace(5)
268+
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
269+
%tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
270+
store i32 0, i32 addrspace(5)* %tmp1
271+
store i32 1, i32 addrspace(5)* %tmp2
271272
%cmp = icmp eq i32 %in, 0
272-
%sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
273-
%load = load i32, i32* %sel
273+
%sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2
274+
%load = load i32, i32 addrspace(5)* %sel
274275
store i32 %load, i32 addrspace(1)* %out
275276
ret void
276277
}
@@ -283,14 +284,14 @@ entry:
283284
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
284285
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
285286
define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
286-
%alloca = alloca [16 x i32]
287-
%tmp0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
288-
store i32 5, i32* %tmp0
289-
%tmp1 = ptrtoint [16 x i32]* %alloca to i32
287+
%alloca = alloca [16 x i32], addrspace(5)
288+
%tmp0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
289+
store i32 5, i32 addrspace(5)* %tmp0
290+
%tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32
290291
%tmp2 = add i32 %tmp1, 5
291-
%tmp3 = inttoptr i32 %tmp2 to i32*
292-
%tmp4 = getelementptr inbounds i32, i32* %tmp3, i32 %b
293-
%tmp5 = load i32, i32* %tmp4
292+
%tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)*
293+
%tmp4 = getelementptr inbounds i32, i32 addrspace(5)* %tmp3, i32 %b
294+
%tmp5 = load i32, i32 addrspace(5)* %tmp4
294295
store i32 %tmp5, i32 addrspace(1)* %out
295296
ret void
296297
}

test/CodeGen/AMDGPU/unknown-processor.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc -march=amdgcn -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
2-
; RUN: llc -march=r600 -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
1+
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
2+
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
3+
target datalayout = "A5"
34

45
; Should not crash when the processor is not recognized and the
56
; wavefront size feature not set.
@@ -14,7 +15,7 @@
1415

1516
; R600: MOV
1617
define amdgpu_kernel void @foo() {
17-
%alloca = alloca i32, align 4
18-
store volatile i32 0, i32* %alloca
18+
%alloca = alloca i32, align 4, addrspace(5)
19+
store volatile i32 0, i32 addrspace(5)* %alloca
1920
ret void
2021
}

0 commit comments

Comments
 (0)