Skip to content

[Clang][AMDGPU] Remove special handling for COV4 libraries #132870

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
/// and use its value for COV_4 or COV_5+ approach. It is used for
/// compiling device libraries in an ABI-agnostic way.
///
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
/// clang during compilation of user code.
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
llvm::LoadInst *LD;

Expand Down
36 changes: 0 additions & 36 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
CodeGenModule &CGM) const;

void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;

void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
Expand Down Expand Up @@ -414,40 +412,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
}
}

/// Emits control constants used to change per-architecture behaviour in the
/// AMDGPU ROCm device libraries.
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
CodeGen::CodeGenModule &CGM) const {
StringRef Name = "__oclc_ABI_version";
llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
return;

if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
llvm::CodeObjectVersionKind::COV_None)
return;

auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
llvm::Constant *COV = llvm::ConstantInt::get(
Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);

// It needs to be constant weak_odr without externally_initialized so that
// the load instuction can be eliminated by the IPSCCP.
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);

// Replace any external references to this variable with the new global.
if (OriginalGV) {
OriginalGV->replaceAllUsesWith(GV);
GV->takeName(OriginalGV);
OriginalGV->eraseFromParent();
}
}

void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (requiresAMDGPUProtectedVisibility(D, GV)) {
Expand Down
15 changes: 12 additions & 3 deletions clang/test/CodeGen/amdgpu-abi-version.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s

//.
// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
//.
// CHECK-LABEL: define dso_local i32 @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
Expand All @@ -16,8 +16,17 @@
// CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load !3, !noundef !3
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]]
// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
// CHECK-NEXT: ret i32 [[CONV]]
//
int foo() { return __builtin_amdgcn_workgroup_size_x(); }
//.
// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
// CHECK: [[RNG2]] = !{i16 1, i16 1025}
// CHECK: [[META3]] = !{}
//.
8 changes: 7 additions & 1 deletion clang/test/CodeGen/amdgpu-address-spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
Expand Down Expand Up @@ -60,3 +59,10 @@ extern "C" [[clang::amdgpu_kernel]] void foo() {
aaa = 0;
bbb = 0;
}
//.
// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
133 changes: 0 additions & 133 deletions clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu

This file was deleted.

34 changes: 0 additions & 34 deletions clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=PRECOV5 %s


// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s
Expand All @@ -11,10 +10,6 @@
// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COVNONE %s

#include "Inputs/cuda.h"

// PRECOV5-LABEL: test_get_workgroup_size
Expand All @@ -35,35 +30,6 @@
// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef


// COVNONE-LABEL: test_get_workgroup_size
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef

__device__ void test_get_workgroup_size(int d, int *out)
{
switch (d) {
Expand Down
1 change: 0 additions & 1 deletion clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ B fail;
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8
Expand Down
1 change: 0 additions & 1 deletion clang/test/CodeGenHIP/default-attributes.hip
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
//.
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
__device__ void extern_func();

Expand Down
1 change: 0 additions & 1 deletion clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ kernel void test_target_features_kernel(global int *i) {
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
// CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
// NOCPU-LABEL: define {{[^@]+}}@callee
Expand Down
Loading
Loading