Skip to content

Commit 889d677

Browse files
authored
clang/AMDGPU: Restore O3 checks in default-attributes.hip (#115238)
These were dropped in b1bcb7c to avoid some bot failures.
1 parent 1c8fca8 commit 889d677

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
33
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
44

5+
// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
6+
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
7+
58
#define __device__ __attribute__((device))
69
#define __global__ __attribute__((global))
710

@@ -10,6 +13,10 @@
1013
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
1114
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
1215
//.
16+
// OPT: @__hip_cuid_ = addrspace(1) global i8 0
17+
// OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
18+
// OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
19+
//.
1320
__device__ void extern_func();
1421

1522
// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
@@ -19,6 +26,13 @@ __device__ void extern_func();
1926
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
2027
// OPTNONE-NEXT: ret void
2128
//
29+
// OPT: Function Attrs: convergent mustprogress nounwind
30+
// OPT-LABEL: define {{[^@]+}}@_Z4funcv
31+
// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
32+
// OPT-NEXT: entry:
33+
// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
34+
// OPT-NEXT: ret void
35+
//
2236
__device__ void func() {
2337
extern_func();
2438
}
@@ -30,6 +44,13 @@ __device__ void func() {
3044
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
3145
// OPTNONE-NEXT: ret void
3246
//
47+
// OPT: Function Attrs: convergent mustprogress norecurse nounwind
48+
// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
49+
// OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
50+
// OPT-NEXT: entry:
51+
// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
52+
// OPT-NEXT: ret void
53+
//
3354
__global__ void kernel() {
3455
extern_func();
3556
}
@@ -39,7 +60,16 @@ __global__ void kernel() {
3960
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
4061
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
4162
//.
63+
// OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
64+
// OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
65+
// OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
66+
// OPT: attributes #[[ATTR3]] = { convergent nounwind }
67+
//.
4268
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
4369
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
4470
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4571
//.
72+
// OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
73+
// OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
74+
// OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
75+
//.

0 commit comments

Comments
 (0)