Skip to content

clang/AMDGPU: Restore O3 checks in default-attributes.hip #115238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions clang/test/CodeGenHIP/default-attributes.hip
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s

// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s

#define __device__ __attribute__((device))
#define __global__ __attribute__((global))

Expand All @@ -10,6 +13,10 @@
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
//.
// OPT: @__hip_cuid_ = addrspace(1) global i8 0
// OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
//.
__device__ void extern_func();

// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
Expand All @@ -19,6 +26,13 @@ __device__ void extern_func();
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
// OPTNONE-NEXT: ret void
//
// OPT: Function Attrs: convergent mustprogress nounwind
// OPT-LABEL: define {{[^@]+}}@_Z4funcv
// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
// OPT-NEXT: entry:
// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
// OPT-NEXT: ret void
//
__device__ void func() {
extern_func();
}
Expand All @@ -30,6 +44,13 @@ __device__ void func() {
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
// OPTNONE-NEXT: ret void
//
// OPT: Function Attrs: convergent mustprogress norecurse nounwind
// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
// OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
// OPT-NEXT: entry:
// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
// OPT-NEXT: ret void
//
__global__ void kernel() {
extern_func();
}
Expand All @@ -39,7 +60,16 @@ __global__ void kernel() {
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
//.
// OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
// OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
// OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// OPT: attributes #[[ATTR3]] = { convergent nounwind }
//.
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
//.
// OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
//.
Loading