Skip to content

Commit e3c1b93

Browse files
committed
[merge] regen 9 opencl lit tests, match trunk now
clang/test/CodeGenOpenCL/addr-space-struct-arg.cl clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl clang/test/CodeGenOpenCL/atomic-ops.cl clang/test/CodeGenOpenCL/blocks.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-lds.cl clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
1 parent ab9b255 commit e3c1b93

9 files changed

+435
-298
lines changed

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Lines changed: 122 additions & 87 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
1616
// CL20-NEXT: [[ENTRY:.*:]]
1717
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
18-
// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8
19-
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8
18+
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
19+
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
20+
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
2021
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
2122
// CL20-NEXT: ret void
2223
//

clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl

Lines changed: 122 additions & 87 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 83 additions & 52 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/atomic-ops.cl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,11 +344,14 @@ int test_volatile(volatile atomic_int *i) {
344344
// CHECK-LABEL: @test_volatile
345345
// CHECK: %[[i_addr:.*]] = alloca ptr
346346
// CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
347-
// CHECK-NEXT: store ptr %i, ptr addrspace(5) %[[i_addr]]
348-
// CHECK-NEXT: %[[addr:.*]] = load ptr, ptr addrspace(5) %[[i_addr]]
347+
// CHECK-NEXT: %[[retval_ascast:.*]] = addrspacecast ptr addrspace(5) {{.*}} to ptr
348+
// CHECK-NEXT: %[[i_addr_ascast:.*]] = addrspacecast ptr addrspace(5) %[[i_addr]] to ptr
349+
// CHECK-NEXT: %[[atomicdst_ascast:.*]] = addrspacecast ptr addrspace(5) %[[atomicdst]] to ptr
350+
// CHECK-NEXT: store ptr %i, ptr %[[i_addr_ascast]]
351+
// CHECK-NEXT: %[[addr:.*]] = load ptr, ptr %[[i_addr_ascast]]
349352
// CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] syncscope("workgroup") seq_cst, align 4{{$}}
350-
// CHECK-NEXT: store i32 %[[res]], ptr addrspace(5) %[[atomicdst]]
351-
// CHECK-NEXT: %[[retval:.*]] = load i32, ptr addrspace(5) %[[atomicdst]]
353+
// CHECK-NEXT: store i32 %[[res]], ptr %[[atomicdst_ascast]]
354+
// CHECK-NEXT: %[[retval:.*]] = load i32, ptr %[[atomicdst_ascast]]
352355
// CHECK-NEXT: ret i32 %[[retval]]
353356
return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
354357
}

clang/test/CodeGenOpenCL/blocks.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ void foo(){
2525
// COMMON-NOT: %block.reserved
2626
// COMMON-NOT: %block.descriptor
2727
// SPIR: %[[block_size:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %block, i32 0, i32 0
28-
// AMDGCN: %[[block_size:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr addrspace(5) %block, i32 0, i32 0
28+
// AMDGCN: %[[block_size:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %block{{.*}}, i32 0, i32 0
2929
// SPIR: store i32 16, ptr %[[block_size]]
30-
// AMDGCN: store i32 20, ptr addrspace(5) %[[block_size]]
30+
// AMDGCN: store i32 20, ptr %[[block_size]]
3131
// SPIR: %[[block_align:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %block, i32 0, i32 1
32-
// AMDGCN: %[[block_align:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr addrspace(5) %block, i32 0, i32 1
32+
// AMDGCN: %[[block_align:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %block{{.*}}, i32 0, i32 1
3333
// SPIR: store i32 4, ptr %[[block_align]]
34-
// AMDGCN: store i32 8, ptr addrspace(5) %[[block_align]]
34+
// AMDGCN: store i32 8, ptr %[[block_align]]
3535
// SPIR: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %[[block:.*]], i32 0, i32 2
3636
// SPIR: store ptr addrspace(4) addrspacecast (ptr @__foo_block_invoke to ptr addrspace(4)), ptr %[[block_invoke]]
3737
// SPIR: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %[[block]], i32 0, i32 3

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl

Lines changed: 73 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ typedef unsigned int uint;
88
// CHECK-LABEL: @test_s_sleep_var(
99
// CHECK-NEXT: entry:
1010
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
11-
// CHECK-NEXT: store i32 [[D:%.*]], ptr addrspace(5) [[D_ADDR]], align 4
12-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[D_ADDR]], align 4
11+
// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
12+
// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
13+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
1314
// CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 [[TMP0]])
1415
// CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 15)
1516
// CHECK-NEXT: ret void
@@ -26,15 +27,19 @@ void test_s_sleep_var(int d)
2627
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
2728
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
2829
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
29-
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
30-
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
31-
// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4
32-
// CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4
33-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
34-
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
35-
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
30+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
31+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
32+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
33+
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
34+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
35+
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
36+
// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
37+
// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4
38+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
39+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
40+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
3641
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false)
37-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
42+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
3843
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
3944
// CHECK-NEXT: ret void
4045
//
@@ -48,15 +53,19 @@ void test_permlane16_var(global uint* out, uint a, uint b, uint c) {
4853
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
4954
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
5055
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
51-
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
52-
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
53-
// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4
54-
// CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4
55-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
56-
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
57-
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
56+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
57+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
58+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
59+
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
60+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
61+
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
62+
// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
63+
// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4
64+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
65+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
66+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
5867
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlanex16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false)
59-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
68+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
6069
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
6170
// CHECK-NEXT: ret void
6271
//
@@ -80,11 +89,13 @@ void test_s_barrier_signal()
8089
// CHECK-NEXT: entry:
8190
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
8291
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
83-
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr addrspace(5) [[BAR_ADDR]], align 8
84-
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
85-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[BAR_ADDR]], align 8
92+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
93+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
94+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
95+
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
96+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
8697
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
87-
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
98+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
8899
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
89100
// CHECK-NEXT: ret void
90101
//
@@ -98,18 +109,21 @@ void test_s_barrier_signal_var(void *bar, int a)
98109
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
99110
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
100111
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
101-
// CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8
102-
// CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8
103-
// CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8
112+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
113+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
114+
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
115+
// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
116+
// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
117+
// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
104118
// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 1)
105119
// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
106120
// CHECK: if.then:
107-
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8
108-
// CHECK-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[A_ADDR]], align 8
121+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
122+
// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8
109123
// CHECK-NEXT: br label [[IF_END:%.*]]
110124
// CHECK: if.else:
111-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8
112-
// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8
125+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
126+
// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
113127
// CHECK-NEXT: br label [[IF_END]]
114128
// CHECK: if.end:
115129
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1)
@@ -127,6 +141,7 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
127141

128142
// CHECK-LABEL: @test_s_get_barrier_state(
129143
// CHECK-NEXT: entry:
144+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
130145
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
131146
// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
132147
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
@@ -146,6 +161,7 @@ unsigned test_s_get_barrier_state(int a)
146161

147162
// CHECK-LABEL: @test_s_get_named_barrier_state(
148163
// CHECK-NEXT: entry:
164+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
149165
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
150166
// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
151167
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
@@ -190,16 +206,20 @@ void test_s_ttracedata_imm()
190206
// CHECK-NEXT: [[GP_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
191207
// CHECK-NEXT: [[CP_ADDR:%.*]] = alloca ptr addrspace(4), align 8, addrspace(5)
192208
// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
193-
// CHECK-NEXT: store ptr [[FP:%.*]], ptr addrspace(5) [[FP_ADDR]], align 8
194-
// CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr addrspace(5) [[GP_ADDR]], align 8
195-
// CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[CP_ADDR]], align 8
196-
// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
197-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
209+
// CHECK-NEXT: [[FP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FP_ADDR]] to ptr
210+
// CHECK-NEXT: [[GP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GP_ADDR]] to ptr
211+
// CHECK-NEXT: [[CP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CP_ADDR]] to ptr
212+
// CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr
213+
// CHECK-NEXT: store ptr [[FP:%.*]], ptr [[FP_ADDR_ASCAST]], align 8
214+
// CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr [[GP_ADDR_ASCAST]], align 8
215+
// CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr [[CP_ADDR_ASCAST]], align 8
216+
// CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4
217+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR_ASCAST]], align 8
198218
// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p0(ptr [[TMP0]], i32 0)
199-
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GP_ADDR]], align 8
200-
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
219+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GP_ADDR_ASCAST]], align 8
220+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4
201221
// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p1(ptr addrspace(1) [[TMP1]], i32 [[TMP2]])
202-
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(5) [[CP_ADDR]], align 8
222+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr [[CP_ADDR_ASCAST]], align 8
203223
// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) [[TMP3]], i32 31)
204224
// CHECK-NEXT: ret void
205225
//
@@ -214,12 +234,14 @@ void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned
214234
// CHECK-NEXT: entry:
215235
// CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
216236
// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
217-
// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(5) [[RSRC_ADDR]], align 16
218-
// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
219-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
220-
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
237+
// CHECK-NEXT: [[RSRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RSRC_ADDR]] to ptr
238+
// CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr
239+
// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr [[RSRC_ADDR_ASCAST]], align 16
240+
// CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4
241+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16
242+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4
221243
// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]])
222-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
244+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16
223245
// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31)
224246
// CHECK-NEXT: ret void
225247
//
@@ -234,13 +256,16 @@ void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int len)
234256
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
235257
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
236258
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
237-
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
238-
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
239-
// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4
240-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
241-
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
259+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
260+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
261+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
262+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
263+
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
264+
// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
265+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
266+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
242267
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.ds.bpermute.fi.b32(i32 [[TMP0]], i32 [[TMP1]])
243-
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
268+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
244269
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
245270
// CHECK-NEXT: ret void
246271
//

0 commit comments

Comments
 (0)