2
2
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
3
3
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
4
4
5
- // RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
6
- // RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
7
-
8
5
#define __device__ __attribute__ ((device))
9
6
#define __global__ __attribute__ ((global))
10
7
13
10
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
14
11
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
15
12
// .
16
- // OPT: @__hip_cuid_ = addrspace(1) global i8 0
17
- // OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
18
- // OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
19
- // .
20
13
__device__ void extern_func();
21
14
22
15
// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
@@ -26,13 +19,6 @@ __device__ void extern_func();
26
19
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
27
20
// OPTNONE-NEXT: ret void
28
21
//
29
- // OPT: Function Attrs: convergent mustprogress nounwind
30
- // OPT-LABEL: define {{[^@]+}}@_Z4funcv
31
- // OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
32
- // OPT-NEXT: entry:
33
- // OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
34
- // OPT-NEXT: ret void
35
- //
36
22
__device__ void func () {
37
23
extern_func ();
38
24
}
@@ -44,13 +30,6 @@ __device__ void func() {
44
30
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
45
31
// OPTNONE-NEXT: ret void
46
32
//
47
- // OPT: Function Attrs: convergent mustprogress norecurse nounwind
48
- // OPT-LABEL: define {{[^@]+}}@_Z6kernelv
49
- // OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
50
- // OPT-NEXT: entry:
51
- // OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
52
- // OPT-NEXT: ret void
53
- //
54
33
__global__ void kernel () {
55
34
extern_func ();
56
35
}
@@ -60,16 +39,7 @@ __global__ void kernel() {
60
39
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
61
40
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
62
41
// .
63
- // OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
64
- // OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
65
- // OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
66
- // OPT: attributes #[[ATTR3]] = { convergent nounwind }
67
- // .
68
42
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
69
43
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
70
44
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
71
45
// .
72
- // OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
73
- // OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
74
- // OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
75
- // .
0 commit comments