Skip to content

Commit 2fc0601

Browse files
committed
[AMDGPU] Add vector processing support to AMDGPU printf
1 parent ba0d4e5 commit 2fc0601

File tree

2 files changed

+288
-52
lines changed

2 files changed

+288
-52
lines changed

clang/test/CodeGenOpenCL/amdgpu-printf.cl

Lines changed: 204 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,68 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2-
// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -mprintf-kind=buffered -disable-llvm-passes -emit-llvm -o - %s | FileCheck --check-prefix=CHECK_BUFFERED %s
3+
// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -mprintf-kind=hostcall -disable-llvm-passes -emit-llvm -o - %s | FileCheck --check-prefix=CHECK_HOSTCALL %s
34

45
int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
56

67
// CHECK-LABEL: @test_printf_noargs(
78
// CHECK-NEXT: entry:
89
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR4:[0-9]+]]
910
// CHECK-NEXT: ret void
11+
// CHECK_BUFFERED-LABEL: @test_printf_noargs(
12+
// CHECK_BUFFERED-NEXT: entry:
13+
// CHECK_BUFFERED-NEXT: br i1 icmp eq (ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr null), label [[STRLEN_JOIN:%.*]], label [[STRLEN_WHILE:%.*]]
14+
// CHECK_BUFFERED: strlen.while:
15+
// CHECK_BUFFERED-NEXT: [[TMP0:%.*]] = phi ptr [ addrspacecast (ptr addrspace(4) @.str to ptr), [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[STRLEN_WHILE]] ]
16+
// CHECK_BUFFERED-NEXT: [[TMP1]] = getelementptr i8, ptr [[TMP0]], i64 1
17+
// CHECK_BUFFERED-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 1
18+
// CHECK_BUFFERED-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
19+
// CHECK_BUFFERED-NEXT: br i1 [[TMP3]], label [[STRLEN_WHILE_DONE:%.*]], label [[STRLEN_WHILE]]
20+
// CHECK_BUFFERED: strlen.while.done:
21+
// CHECK_BUFFERED-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP0]] to i64
22+
// CHECK_BUFFERED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], ptrtoint (ptr addrspacecast (ptr addrspace(4) @.str to ptr) to i64)
23+
// CHECK_BUFFERED-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
24+
// CHECK_BUFFERED-NEXT: br label [[STRLEN_JOIN]]
25+
// CHECK_BUFFERED: strlen.join:
26+
// CHECK_BUFFERED-NEXT: [[TMP7:%.*]] = phi i64 [ [[TMP6]], [[STRLEN_WHILE_DONE]] ], [ 0, [[ENTRY]] ]
27+
// CHECK_BUFFERED-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 7
28+
// CHECK_BUFFERED-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 4294967288
29+
// CHECK_BUFFERED-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 4
30+
// CHECK_BUFFERED-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
31+
// CHECK_BUFFERED-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 [[TMP11]])
32+
// CHECK_BUFFERED-NEXT: [[TMP12:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
33+
// CHECK_BUFFERED-NEXT: br i1 [[TMP12]], label [[ARGPUSH_BLOCK:%.*]], label [[END_BLOCK:%.*]]
34+
// CHECK_BUFFERED: end.block:
35+
// CHECK_BUFFERED-NEXT: [[TMP13:%.*]] = xor i1 [[TMP12]], true
36+
// CHECK_BUFFERED-NEXT: [[PRINTF_RESULT:%.*]] = sext i1 [[TMP13]] to i32
37+
// CHECK_BUFFERED-NEXT: ret void
38+
// CHECK_BUFFERED: argpush.block:
39+
// CHECK_BUFFERED-NEXT: [[TMP14:%.*]] = shl i32 [[TMP11]], 2
40+
// CHECK_BUFFERED-NEXT: store i32 [[TMP14]], ptr addrspace(1) [[PRINTF_ALLOC_FN]], align 4
41+
// CHECK_BUFFERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
42+
// CHECK_BUFFERED-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 1 [[TMP15]], ptr align 1 addrspacecast (ptr addrspace(4) @.str to ptr), i64 [[TMP7]], i1 false)
43+
// CHECK_BUFFERED-NEXT: [[PRINTBUFFNEXTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP15]], i64 [[TMP9]]
44+
// CHECK_BUFFERED-NEXT: br label [[END_BLOCK]]
45+
//
46+
// CHECK_HOSTCALL-LABEL: @test_printf_noargs(
47+
// CHECK_HOSTCALL-NEXT: entry:
48+
// CHECK_HOSTCALL-NEXT: [[TMP0:%.*]] = call i64 @__ockl_printf_begin(i64 0)
49+
// CHECK_HOSTCALL-NEXT: br i1 icmp eq (ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr null), label [[STRLEN_JOIN:%.*]], label [[STRLEN_WHILE:%.*]]
50+
// CHECK_HOSTCALL: strlen.while:
51+
// CHECK_HOSTCALL-NEXT: [[TMP1:%.*]] = phi ptr [ addrspacecast (ptr addrspace(4) @.str to ptr), [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[STRLEN_WHILE]] ]
52+
// CHECK_HOSTCALL-NEXT: [[TMP2]] = getelementptr i8, ptr [[TMP1]], i64 1
53+
// CHECK_HOSTCALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP1]], align 1
54+
// CHECK_HOSTCALL-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
55+
// CHECK_HOSTCALL-NEXT: br i1 [[TMP4]], label [[STRLEN_WHILE_DONE:%.*]], label [[STRLEN_WHILE]]
56+
// CHECK_HOSTCALL: strlen.while.done:
57+
// CHECK_HOSTCALL-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP1]] to i64
58+
// CHECK_HOSTCALL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], ptrtoint (ptr addrspacecast (ptr addrspace(4) @.str to ptr) to i64)
59+
// CHECK_HOSTCALL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 1
60+
// CHECK_HOSTCALL-NEXT: br label [[STRLEN_JOIN]]
61+
// CHECK_HOSTCALL: strlen.join:
62+
// CHECK_HOSTCALL-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP7]], [[STRLEN_WHILE_DONE]] ], [ 0, [[ENTRY]] ]
63+
// CHECK_HOSTCALL-NEXT: [[TMP9:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[TMP0]], ptr addrspacecast (ptr addrspace(4) @.str to ptr), i64 [[TMP8]], i32 1)
64+
// CHECK_HOSTCALL-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
65+
// CHECK_HOSTCALL-NEXT: ret void
1066
//
1167
__kernel void test_printf_noargs() {
1268
printf("");
@@ -19,6 +75,53 @@ __kernel void test_printf_noargs() {
1975
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
2076
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR4]]
2177
// CHECK-NEXT: ret void
78+
// CHECK_BUFFERED-LABEL: @test_printf_int(
79+
// CHECK_BUFFERED-NEXT: entry:
80+
// CHECK_BUFFERED-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
81+
// CHECK_BUFFERED-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA12:![0-9]+]]
82+
// CHECK_BUFFERED-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA12]]
83+
// CHECK_BUFFERED-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 20)
84+
// CHECK_BUFFERED-NEXT: [[TMP1:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
85+
// CHECK_BUFFERED-NEXT: br i1 [[TMP1]], label [[ARGPUSH_BLOCK:%.*]], label [[END_BLOCK:%.*]]
86+
// CHECK_BUFFERED: end.block:
87+
// CHECK_BUFFERED-NEXT: [[TMP2:%.*]] = xor i1 [[TMP1]], true
88+
// CHECK_BUFFERED-NEXT: [[PRINTF_RESULT:%.*]] = sext i1 [[TMP2]] to i32
89+
// CHECK_BUFFERED-NEXT: ret void
90+
// CHECK_BUFFERED: argpush.block:
91+
// CHECK_BUFFERED-NEXT: store i32 82, ptr addrspace(1) [[PRINTF_ALLOC_FN]], align 4
92+
// CHECK_BUFFERED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
93+
// CHECK_BUFFERED-NEXT: store i64 -2582314622382785113, ptr addrspace(1) [[TMP3]], align 8
94+
// CHECK_BUFFERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP3]], i32 8
95+
// CHECK_BUFFERED-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64
96+
// CHECK_BUFFERED-NEXT: store i64 [[TMP5]], ptr addrspace(1) [[TMP4]], align 8
97+
// CHECK_BUFFERED-NEXT: [[PRINTBUFFNEXTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 8
98+
// CHECK_BUFFERED-NEXT: br label [[END_BLOCK]]
99+
//
100+
// CHECK_HOSTCALL-LABEL: @test_printf_int(
101+
// CHECK_HOSTCALL-NEXT: entry:
102+
// CHECK_HOSTCALL-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
103+
// CHECK_HOSTCALL-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]]
104+
// CHECK_HOSTCALL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
105+
// CHECK_HOSTCALL-NEXT: [[TMP1:%.*]] = call i64 @__ockl_printf_begin(i64 0)
106+
// CHECK_HOSTCALL-NEXT: br i1 icmp eq (ptr addrspacecast (ptr addrspace(4) @.str.1 to ptr), ptr null), label [[STRLEN_JOIN:%.*]], label [[STRLEN_WHILE:%.*]]
107+
// CHECK_HOSTCALL: strlen.while:
108+
// CHECK_HOSTCALL-NEXT: [[TMP2:%.*]] = phi ptr [ addrspacecast (ptr addrspace(4) @.str.1 to ptr), [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[STRLEN_WHILE]] ]
109+
// CHECK_HOSTCALL-NEXT: [[TMP3]] = getelementptr i8, ptr [[TMP2]], i64 1
110+
// CHECK_HOSTCALL-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
111+
// CHECK_HOSTCALL-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
112+
// CHECK_HOSTCALL-NEXT: br i1 [[TMP5]], label [[STRLEN_WHILE_DONE:%.*]], label [[STRLEN_WHILE]]
113+
// CHECK_HOSTCALL: strlen.while.done:
114+
// CHECK_HOSTCALL-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP2]] to i64
115+
// CHECK_HOSTCALL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], ptrtoint (ptr addrspacecast (ptr addrspace(4) @.str.1 to ptr) to i64)
116+
// CHECK_HOSTCALL-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 1
117+
// CHECK_HOSTCALL-NEXT: br label [[STRLEN_JOIN]]
118+
// CHECK_HOSTCALL: strlen.join:
119+
// CHECK_HOSTCALL-NEXT: [[TMP9:%.*]] = phi i64 [ [[TMP8]], [[STRLEN_WHILE_DONE]] ], [ 0, [[ENTRY]] ]
120+
// CHECK_HOSTCALL-NEXT: [[TMP10:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[TMP1]], ptr addrspacecast (ptr addrspace(4) @.str.1 to ptr), i64 [[TMP9]], i32 0)
121+
// CHECK_HOSTCALL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP0]] to i64
122+
// CHECK_HOSTCALL-NEXT: [[TMP12:%.*]] = call i64 @__ockl_printf_append_args(i64 [[TMP10]], i32 1, i64 [[TMP11]], i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1)
123+
// CHECK_HOSTCALL-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i32
124+
// CHECK_HOSTCALL-NEXT: ret void
22125
//
23126
__kernel void test_printf_int(int i) {
24127
printf("%d", i);
@@ -43,6 +146,106 @@ __kernel void test_printf_int(int i) {
43146
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]]
44147
// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5]]
45148
// CHECK-NEXT: ret void
149+
// CHECK_BUFFERED-LABEL: @test_printf_str_int(
150+
// CHECK_BUFFERED-NEXT: entry:
151+
// CHECK_BUFFERED-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
152+
// CHECK_BUFFERED-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
153+
// CHECK_BUFFERED-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA12]]
154+
// CHECK_BUFFERED-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR1:[0-9]+]]
155+
// CHECK_BUFFERED-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
156+
// CHECK_BUFFERED-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
157+
// CHECK_BUFFERED-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA12]]
158+
// CHECK_BUFFERED-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
159+
// CHECK_BUFFERED-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null
160+
// CHECK_BUFFERED-NEXT: br i1 [[TMP2]], label [[STRLEN_JOIN:%.*]], label [[STRLEN_WHILE:%.*]]
161+
// CHECK_BUFFERED: strlen.while:
162+
// CHECK_BUFFERED-NEXT: [[TMP3:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[STRLEN_WHILE]] ]
163+
// CHECK_BUFFERED-NEXT: [[TMP4]] = getelementptr i8, ptr [[TMP3]], i64 1
164+
// CHECK_BUFFERED-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
165+
// CHECK_BUFFERED-NEXT: [[TMP6:%.*]] = icmp eq i8 [[TMP5]], 0
166+
// CHECK_BUFFERED-NEXT: br i1 [[TMP6]], label [[STRLEN_WHILE_DONE:%.*]], label [[STRLEN_WHILE]]
167+
// CHECK_BUFFERED: strlen.while.done:
168+
// CHECK_BUFFERED-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP1]] to i64
169+
// CHECK_BUFFERED-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP3]] to i64
170+
// CHECK_BUFFERED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[TMP7]]
171+
// CHECK_BUFFERED-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 1
172+
// CHECK_BUFFERED-NEXT: br label [[STRLEN_JOIN]]
173+
// CHECK_BUFFERED: strlen.join:
174+
// CHECK_BUFFERED-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[STRLEN_WHILE_DONE]] ], [ 0, [[ENTRY]] ]
175+
// CHECK_BUFFERED-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 7
176+
// CHECK_BUFFERED-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 4294967288
177+
// CHECK_BUFFERED-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 20
178+
// CHECK_BUFFERED-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
179+
// CHECK_BUFFERED-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 [[TMP15]])
180+
// CHECK_BUFFERED-NEXT: [[TMP16:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
181+
// CHECK_BUFFERED-NEXT: br i1 [[TMP16]], label [[ARGPUSH_BLOCK:%.*]], label [[END_BLOCK:%.*]]
182+
// CHECK_BUFFERED: end.block:
183+
// CHECK_BUFFERED-NEXT: [[TMP17:%.*]] = xor i1 [[TMP16]], true
184+
// CHECK_BUFFERED-NEXT: [[PRINTF_RESULT:%.*]] = sext i1 [[TMP17]] to i32
185+
// CHECK_BUFFERED-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR1]]
186+
// CHECK_BUFFERED-NEXT: ret void
187+
// CHECK_BUFFERED: argpush.block:
188+
// CHECK_BUFFERED-NEXT: [[TMP18:%.*]] = shl i32 [[TMP15]], 2
189+
// CHECK_BUFFERED-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], 2
190+
// CHECK_BUFFERED-NEXT: store i32 [[TMP19]], ptr addrspace(1) [[PRINTF_ALLOC_FN]], align 4
191+
// CHECK_BUFFERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
192+
// CHECK_BUFFERED-NEXT: store i64 -2942283388077972797, ptr addrspace(1) [[TMP20]], align 8
193+
// CHECK_BUFFERED-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i32 8
194+
// CHECK_BUFFERED-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 1 [[TMP21]], ptr align 1 [[TMP1]], i64 [[TMP11]], i1 false)
195+
// CHECK_BUFFERED-NEXT: [[PRINTBUFFNEXTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i64 [[TMP13]]
196+
// CHECK_BUFFERED-NEXT: [[TMP22:%.*]] = zext i32 [[TMP0]] to i64
197+
// CHECK_BUFFERED-NEXT: store i64 [[TMP22]], ptr addrspace(1) [[PRINTBUFFNEXTPTR]], align 8
198+
// CHECK_BUFFERED-NEXT: [[PRINTBUFFNEXTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
199+
// CHECK_BUFFERED-NEXT: br label [[END_BLOCK]]
200+
//
201+
// CHECK_HOSTCALL-LABEL: @test_printf_str_int(
202+
// CHECK_HOSTCALL-NEXT: entry:
203+
// CHECK_HOSTCALL-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
204+
// CHECK_HOSTCALL-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
205+
// CHECK_HOSTCALL-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
206+
// CHECK_HOSTCALL-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR3:[0-9]+]]
207+
// CHECK_HOSTCALL-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
208+
// CHECK_HOSTCALL-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
209+
// CHECK_HOSTCALL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
210+
// CHECK_HOSTCALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
211+
// CHECK_HOSTCALL-NEXT: [[TMP2:%.*]] = call i64 @__ockl_printf_begin(i64 0)
212+
// CHECK_HOSTCALL-NEXT: br i1 icmp eq (ptr addrspacecast (ptr addrspace(4) @.str.2 to ptr), ptr null), label [[STRLEN_JOIN:%.*]], label [[STRLEN_WHILE:%.*]]
213+
// CHECK_HOSTCALL: strlen.while:
214+
// CHECK_HOSTCALL-NEXT: [[TMP3:%.*]] = phi ptr [ addrspacecast (ptr addrspace(4) @.str.2 to ptr), [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[STRLEN_WHILE]] ]
215+
// CHECK_HOSTCALL-NEXT: [[TMP4]] = getelementptr i8, ptr [[TMP3]], i64 1
216+
// CHECK_HOSTCALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
217+
// CHECK_HOSTCALL-NEXT: [[TMP6:%.*]] = icmp eq i8 [[TMP5]], 0
218+
// CHECK_HOSTCALL-NEXT: br i1 [[TMP6]], label [[STRLEN_WHILE_DONE:%.*]], label [[STRLEN_WHILE]]
219+
// CHECK_HOSTCALL: strlen.while.done:
220+
// CHECK_HOSTCALL-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP3]] to i64
221+
// CHECK_HOSTCALL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], ptrtoint (ptr addrspacecast (ptr addrspace(4) @.str.2 to ptr) to i64)
222+
// CHECK_HOSTCALL-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1
223+
// CHECK_HOSTCALL-NEXT: br label [[STRLEN_JOIN]]
224+
// CHECK_HOSTCALL: strlen.join:
225+
// CHECK_HOSTCALL-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP9]], [[STRLEN_WHILE_DONE]] ], [ 0, [[ENTRY]] ]
226+
// CHECK_HOSTCALL-NEXT: [[TMP11:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[TMP2]], ptr addrspacecast (ptr addrspace(4) @.str.2 to ptr), i64 [[TMP10]], i32 0)
227+
// CHECK_HOSTCALL-NEXT: [[TMP12:%.*]] = icmp eq ptr [[TMP1]], null
228+
// CHECK_HOSTCALL-NEXT: br i1 [[TMP12]], label [[STRLEN_JOIN1:%.*]], label [[STRLEN_WHILE2:%.*]]
229+
// CHECK_HOSTCALL: strlen.while2:
230+
// CHECK_HOSTCALL-NEXT: [[TMP13:%.*]] = phi ptr [ [[TMP1]], [[STRLEN_JOIN]] ], [ [[TMP14:%.*]], [[STRLEN_WHILE2]] ]
231+
// CHECK_HOSTCALL-NEXT: [[TMP14]] = getelementptr i8, ptr [[TMP13]], i64 1
232+
// CHECK_HOSTCALL-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP13]], align 1
233+
// CHECK_HOSTCALL-NEXT: [[TMP16:%.*]] = icmp eq i8 [[TMP15]], 0
234+
// CHECK_HOSTCALL-NEXT: br i1 [[TMP16]], label [[STRLEN_WHILE_DONE3:%.*]], label [[STRLEN_WHILE2]]
235+
// CHECK_HOSTCALL: strlen.while.done3:
236+
// CHECK_HOSTCALL-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP1]] to i64
237+
// CHECK_HOSTCALL-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP13]] to i64
238+
// CHECK_HOSTCALL-NEXT: [[TMP19:%.*]] = sub i64 [[TMP18]], [[TMP17]]
239+
// CHECK_HOSTCALL-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 1
240+
// CHECK_HOSTCALL-NEXT: br label [[STRLEN_JOIN1]]
241+
// CHECK_HOSTCALL: strlen.join1:
242+
// CHECK_HOSTCALL-NEXT: [[TMP21:%.*]] = phi i64 [ [[TMP20]], [[STRLEN_WHILE_DONE3]] ], [ 0, [[STRLEN_JOIN]] ]
243+
// CHECK_HOSTCALL-NEXT: [[TMP22:%.*]] = call i64 @__ockl_printf_append_string_n(i64 [[TMP11]], ptr [[TMP1]], i64 [[TMP21]], i32 0)
244+
// CHECK_HOSTCALL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP0]] to i64
245+
// CHECK_HOSTCALL-NEXT: [[TMP24:%.*]] = call i64 @__ockl_printf_append_args(i64 [[TMP22]], i32 1, i64 [[TMP23]], i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1)
246+
// CHECK_HOSTCALL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32
247+
// CHECK_HOSTCALL-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR3]]
248+
// CHECK_HOSTCALL-NEXT: ret void
46249
//
47250
__kernel void test_printf_str_int(int i) {
48251
char s[] = "foo";

0 commit comments

Comments
 (0)