Skip to content

Commit 137dc54

Browse files
committed
[Clang][AMDGPU] Remove special handling for COV4 libraries
Summary: When we were first porting to COV5, this lead to some ABI issues due to a change in how we looked up the work group size. Bitcode libraries relied on the builtins to emit code, but this was changed between versions. This prevented the bitcode libraries, like OpenMP or libc, from being used for both COV4 and COV5. The solution was to have this 'none' functionality which effectively emitted code that branched off of a global to resolve to either version. This isn't a great solution because it forced every TU to have this variable in it. The patch in COV4 from OpenMP, which was the only consumer of this functionality. Other users like HIP and OpenCL did not use this because they linked the ROCm Device Library directly which has its own handling (The name was borrowed from it after all). So, now that we don't need to worry about backward compatibility with COV4, we can remove this special handling. Users can still emit COV4 code, this simply removes the special handling used to make the OpenMP device runtime bitcode version agnostic. Author: Joseph Huber PR: llvm#132870
1 parent aa68ef2 commit 137dc54

File tree

18 files changed

+7
-277
lines changed

18 files changed

+7
-277
lines changed

amd/comgr/test/mangled_names_test.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,14 @@ int main(int argc, char *argv[]) {
166166
Status = amd_comgr_populate_mangled_names(DataBc, &NumNames);
167167
checkError(Status, "amd_comgr_populate_mangled_names");
168168

169-
if (NumNames != 3) {
169+
if (NumNames != 2) {
170170
printf("amd_populate_mangled_names Failed: "
171171
"produced %zu bitcode names (expected 2)\n",
172172
NumNames);
173173
exit(1);
174174
}
175175

176-
const char *BcNames[] = {"__oclc_ABI_version", "source1", "source2"};
176+
const char *BcNames[] = {"source1", "source2"};
177177

178178
for (size_t I = 0; I < NumNames; ++I) {
179179
size_t Size;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,8 +1439,6 @@ void CodeGenModule::Release() {
14391439
getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign",
14401440
getContext().getTargetInfo().getMaxTLSAlign());
14411441

1442-
getTargetCodeGenInfo().emitTargetGlobals(*this);
1443-
14441442
getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
14451443

14461444
EmitBackendOptionsMetadata(getCodeGenOpts());

clang/lib/CodeGen/TargetInfo.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,6 @@ class TargetCodeGenInfo {
8282
CodeGen::CodeGenModule &CGM,
8383
const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {}
8484

85-
/// Provides a convenient hook to handle extra target-specific globals.
86-
virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
87-
8885
/// Any further codegen related checks that need to be done on a function
8986
/// signature in a target specific manner.
9087
virtual void checkFunctionABI(CodeGenModule &CGM,

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
305305
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
306306
CodeGenModule &CGM) const;
307307

308-
void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
309-
310308
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
311309
CodeGen::CodeGenModule &M) const override;
312310
unsigned getOpenCLKernelCallingConv() const override;
@@ -414,34 +412,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
414412
}
415413
}
416414

417-
/// Emits control constants used to change per-architecture behaviour in the
418-
/// AMDGPU ROCm device libraries.
419-
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
420-
CodeGen::CodeGenModule &CGM) const {
421-
StringRef Name = "__oclc_ABI_version";
422-
llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
423-
if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
424-
return;
425-
426-
if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
427-
llvm::CodeObjectVersionKind::COV_None)
428-
return;
429-
430-
auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
431-
llvm::Constant *COV = llvm::ConstantInt::get(
432-
Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
433-
434-
// It needs to be constant weak_odr without externally_initialized so that
435-
// the load instuction can be eliminated by the IPSCCP.
436-
auto *GV = new llvm::GlobalVariable(
437-
CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
438-
nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
439-
CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
440-
441-
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
442-
GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
443-
}
444-
445415
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
446416
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
447417
if (requiresAMDGPUProtectedVisibility(D, GV)) {

clang/test/CodeGen/amdgpu-abi-version.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 5
22
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
33

44
//.
55
// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
66
//.
77
// CHECK-LABEL: define dso_local i32 @foo(
88
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9-
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[ENTRY:.*:]]
1010
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
1111
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
1212
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4

clang/test/CodeGen/amdgpu-address-spaces.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ int [[clang::address_space(999)]] bbb = 1234;
2929
// CHECK: @u = addrspace(5) global i32 undef, align 4
3030
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
3131
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
32-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
3332
//.
3433
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
3534
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {

clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu

Lines changed: 0 additions & 133 deletions
This file was deleted.

clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111
// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
1212
// RUN: | FileCheck -check-prefix=COV5 %s
1313

14-
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
15-
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
16-
// RUN: | FileCheck -check-prefix=COVNONE %s
17-
1814
#include "Inputs/cuda.h"
1915

2016
// PRECOV5-LABEL: test_get_workgroup_size
@@ -36,34 +32,6 @@
3632
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
3733

3834

39-
// COVNONE-LABEL: test_get_workgroup_size
40-
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
41-
// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500
42-
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
43-
// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
44-
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
45-
// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
46-
// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
47-
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
48-
49-
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
50-
// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500
51-
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
52-
// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
53-
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
54-
// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
55-
// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
56-
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
57-
58-
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
59-
// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500
60-
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
61-
// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
62-
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
63-
// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
64-
// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
65-
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
66-
6735
__device__ void test_get_workgroup_size(int d, int *out)
6836
{
6937
switch (d) {

clang/test/CodeGenCXX/dynamic-cast-address-space.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ B fail;
1313
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
1414
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
1515
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
16-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
1716
//.
1817
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
1918
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
//.
99
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
1010
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
11-
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
1211
//.
1312
__device__ void extern_func();
1413

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ kernel void test_target_features_kernel(global int *i) {
6868
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
6969
// CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
7070
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
71-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
7271
//.
7372
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
7473
// NOCPU-LABEL: define {{[^@]+}}@callee

clang/test/OpenMP/amdgcn_target_global_constructor.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ S A;
2929
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
3030
// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
3131
// CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
32-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
3332
//.
3433
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
3534
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC
2222
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
2323
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
2424
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
25-
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
2625
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
2726
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
2827
builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG)

0 commit comments

Comments
 (0)