2
2
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
3
3
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
4
4
5
+ // RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
6
+ // RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
7
+
5
8
#define __device__ __attribute__ ((device))
6
9
#define __global__ __attribute__ ((global))
7
10
10
13
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
11
14
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
12
15
// .
16
+ // OPT: @__hip_cuid_ = addrspace(1) global i8 0
17
+ // OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
18
+ // OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
19
+ // .
13
20
__device__ void extern_func();
14
21
15
22
// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
@@ -19,6 +26,13 @@ __device__ void extern_func();
19
26
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
20
27
// OPTNONE-NEXT: ret void
21
28
//
29
+ // OPT: Function Attrs: convergent mustprogress nounwind
30
+ // OPT-LABEL: define {{[^@]+}}@_Z4funcv
31
+ // OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
32
+ // OPT-NEXT: entry:
33
+ // OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
34
+ // OPT-NEXT: ret void
35
+ //
22
36
__device__ void func () {
23
37
extern_func ();
24
38
}
@@ -30,6 +44,13 @@ __device__ void func() {
30
44
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
31
45
// OPTNONE-NEXT: ret void
32
46
//
47
+ // OPT: Function Attrs: convergent mustprogress norecurse nounwind
48
+ // OPT-LABEL: define {{[^@]+}}@_Z6kernelv
49
+ // OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
50
+ // OPT-NEXT: entry:
51
+ // OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
52
+ // OPT-NEXT: ret void
53
+ //
33
54
__global__ void kernel () {
34
55
extern_func ();
35
56
}
@@ -39,7 +60,16 @@ __global__ void kernel() {
39
60
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
40
61
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
41
62
// .
63
+ // OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
64
+ // OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
65
+ // OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
66
+ // OPT: attributes #[[ATTR3]] = { convergent nounwind }
67
+ // .
42
68
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
43
69
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
44
70
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
45
71
// .
72
+ // OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
73
+ // OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
74
+ // OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
75
+ // .
0 commit comments