Skip to content

Commit 6cd42cd

Browse files
committed
Revert "Revert "[Clang][AMDGPU] Remove special handling for COV4 libraries (llvm#132870)""
This reverts commit be5d122.
1 parent 5b2de35 commit 6cd42cd

File tree

15 files changed

+21
-278
lines changed

15 files changed

+21
-278
lines changed

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,6 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
5757
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
5858
/// and use its value for COV_4 or COV_5+ approach. It is used for
5959
/// compiling device libraries in an ABI-agnostic way.
60-
///
61-
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
62-
/// clang during compilation of user code.
6360
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
6461
llvm::LoadInst *LD;
6562

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
305305
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
306306
CodeGenModule &CGM) const;
307307

308-
void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
309-
310308
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
311309
CodeGen::CodeGenModule &M) const override;
312310
unsigned getOpenCLKernelCallingConv() const override;
@@ -414,34 +412,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
414412
}
415413
}
416414

417-
/// Emits control constants used to change per-architecture behaviour in the
418-
/// AMDGPU ROCm device libraries.
419-
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
420-
CodeGen::CodeGenModule &CGM) const {
421-
StringRef Name = "__oclc_ABI_version";
422-
llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
423-
if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
424-
return;
425-
426-
if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
427-
llvm::CodeObjectVersionKind::COV_None)
428-
return;
429-
430-
auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
431-
llvm::Constant *COV = llvm::ConstantInt::get(
432-
Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
433-
434-
// It needs to be constant weak_odr without externally_initialized so that
435-
// the load instuction can be eliminated by the IPSCCP.
436-
auto *GV = new llvm::GlobalVariable(
437-
CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
438-
nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
439-
CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
440-
441-
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
442-
GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
443-
}
444-
445415
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
446416
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
447417
if (requiresAMDGPUProtectedVisibility(D, GV)) {
Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
22
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
33

44
//.
55
// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
66
//.
77
// CHECK-LABEL: define dso_local i32 @foo(
88
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9-
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[ENTRY:.*:]]
1010
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
1111
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
1212
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
@@ -16,8 +16,17 @@
1616
// CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
1717
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
1818
// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
19-
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load !3, !noundef !3
19+
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]]
2020
// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
2121
// CHECK-NEXT: ret i32 [[CONV]]
2222
//
2323
int foo() { return __builtin_amdgcn_workgroup_size_x(); }
24+
//.
25+
// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
26+
// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
27+
//.
28+
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
29+
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
30+
// CHECK: [[RNG2]] = !{i16 1, i16 1025}
31+
// CHECK: [[META3]] = !{}
32+
//.

clang/test/CodeGen/amdgpu-address-spaces.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ int [[clang::address_space(999)]] bbb = 1234;
2929
// CHECK: @u = addrspace(5) global i32 undef, align 4
3030
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
3131
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
32-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
3332
//.
3433
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
3534
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -60,3 +59,10 @@ extern "C" [[clang::amdgpu_kernel]] void foo() {
6059
aaa = 0;
6160
bbb = 0;
6261
}
62+
//.
63+
// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
64+
//.
65+
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
66+
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
67+
// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
68+
//.

clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu

Lines changed: 0 additions & 133 deletions
This file was deleted.

clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
33
// RUN: | FileCheck -check-prefix=PRECOV5 %s
44

5-
65
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
76
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
87
// RUN: | FileCheck -check-prefix=COV5 %s
@@ -11,10 +10,6 @@
1110
// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
1211
// RUN: | FileCheck -check-prefix=COV5 %s
1312

14-
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
15-
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
16-
// RUN: | FileCheck -check-prefix=COVNONE %s
17-
1813
#include "Inputs/cuda.h"
1914

2015
// PRECOV5-LABEL: test_get_workgroup_size
@@ -35,35 +30,6 @@
3530
// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
3631
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
3732

38-
39-
// COVNONE-LABEL: test_get_workgroup_size
40-
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
41-
// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500
42-
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
43-
// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
44-
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
45-
// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
46-
// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
47-
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
48-
49-
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
50-
// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500
51-
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
52-
// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
53-
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
54-
// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
55-
// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
56-
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
57-
58-
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
59-
// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500
60-
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
61-
// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
62-
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
63-
// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
64-
// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
65-
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
66-
6733
__device__ void test_get_workgroup_size(int d, int *out)
6834
{
6935
switch (d) {

clang/test/CodeGenCXX/dynamic-cast-address-space.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ B fail;
1313
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
1414
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
1515
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
16-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
1716
//.
1817
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
1918
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
//.
99
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
1010
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
11-
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
1211
//.
1312
__device__ void extern_func();
1413

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ kernel void test_target_features_kernel(global int *i) {
6868
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
6969
// CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
7070
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
71-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
7271
//.
7372
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
7473
// NOCPU-LABEL: define dso_local void @callee(

0 commit comments

Comments
 (0)