Skip to content

Commit 02c015f

Browse files
authored
Amd/dev/rlieberm/revert 2 commits (llvm#1392)
2 parents 67b946b + 3a31170 commit 02c015f

File tree

19 files changed

+290
-15
lines changed

19 files changed

+290
-15
lines changed

amd/comgr/test/mangled_names_test.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,14 @@ int main(int argc, char *argv[]) {
166166
Status = amd_comgr_populate_mangled_names(DataBc, &NumNames);
167167
checkError(Status, "amd_comgr_populate_mangled_names");
168168

169-
if (NumNames != 2) {
169+
if (NumNames != 3) {
170170
printf("amd_populate_mangled_names Failed: "
171171
"produced %zu bitcode names (expected 2)\n",
172172
NumNames);
173173
exit(1);
174174
}
175175

176-
const char *BcNames[] = {"source1", "source2"};
176+
const char *BcNames[] = {"__oclc_ABI_version", "source1", "source2"};
177177

178178
for (size_t I = 0; I < NumNames; ++I) {
179179
size_t Size;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,8 @@ void CodeGenModule::Release() {
14391439
getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign",
14401440
getContext().getTargetInfo().getMaxTLSAlign());
14411441

1442+
getTargetCodeGenInfo().emitTargetGlobals(*this);
1443+
14421444
getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
14431445

14441446
EmitBackendOptionsMetadata(getCodeGenOpts());

clang/lib/CodeGen/TargetInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class TargetCodeGenInfo {
8282
CodeGen::CodeGenModule &CGM,
8383
const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {}
8484

85+
/// Provides a convenient hook to handle extra target-specific globals.
86+
virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
87+
8588
/// Any further codegen related checks that need to be done on a function
8689
/// signature in a target specific manner.
8790
virtual void checkFunctionABI(CodeGenModule &CGM,

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
305305
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
306306
CodeGenModule &CGM) const;
307307

308+
void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
309+
308310
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
309311
CodeGen::CodeGenModule &M) const override;
310312
unsigned getOpenCLKernelCallingConv() const override;
@@ -412,6 +414,34 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
412414
}
413415
}
414416

417+
/// Emits control constants used to change per-architecture behaviour in the
418+
/// AMDGPU ROCm device libraries.
419+
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
420+
CodeGen::CodeGenModule &CGM) const {
421+
StringRef Name = "__oclc_ABI_version";
422+
llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
423+
if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
424+
return;
425+
426+
if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
427+
llvm::CodeObjectVersionKind::COV_None)
428+
return;
429+
430+
auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
431+
llvm::Constant *COV = llvm::ConstantInt::get(
432+
Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
433+
434+
// It needs to be constant weak_odr without externally_initialized so that
435+
// the load instuction can be eliminated by the IPSCCP.
436+
auto *GV = new llvm::GlobalVariable(
437+
CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
438+
nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
439+
CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
440+
441+
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
442+
GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
443+
}
444+
415445
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
416446
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
417447
if (requiresAMDGPUProtectedVisibility(D, GV)) {

clang/test/CodeGen/amdgpu-abi-version.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 5
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
22
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
33

44
//.
55
// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
66
//.
77
// CHECK-LABEL: define dso_local i32 @foo(
88
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9-
// CHECK-NEXT: [[ENTRY:.*:]]
9+
// CHECK-NEXT: entry:
1010
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
1111
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
1212
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4

clang/test/CodeGen/amdgpu-address-spaces.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
2929
// CHECK: @u = addrspace(5) global i32 undef, align 4
3030
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
3131
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
32+
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
3233
//.
3334
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
3435
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
2+
// RUN: -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s
3+
4+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
5+
// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
6+
7+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
8+
// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
9+
10+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
11+
// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
12+
13+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
14+
// RUN: %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\
15+
// RUN: FileCheck -check-prefix=LINKED4 %s
16+
17+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
18+
// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
19+
// RUN: FileCheck -check-prefix=LINKED5 %s
20+
21+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
22+
// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
23+
// RUN: FileCheck -check-prefix=LINKED6 %s
24+
25+
#include "Inputs/cuda.h"
26+
27+
// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
28+
// LINKED4-LABEL: bar
29+
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
30+
// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
31+
// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
32+
// LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
33+
// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
34+
// LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
35+
// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
36+
// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
37+
38+
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
39+
// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
40+
// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
41+
// LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
42+
// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
43+
// LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
44+
// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
45+
// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
46+
47+
// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
48+
// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
49+
// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
50+
// LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
51+
// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
52+
// LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
53+
// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
54+
// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
55+
// LINKED4: "amdhsa_code_object_version", i32 400
56+
57+
// LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
58+
// LINKED5-LABEL: bar
59+
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
60+
// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
61+
// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
62+
// LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
63+
// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
64+
// LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
65+
// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
66+
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
67+
68+
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
69+
// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
70+
// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
71+
// LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
72+
// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
73+
// LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
74+
// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
75+
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
76+
77+
// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
78+
// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
79+
// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
80+
// LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
81+
// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
82+
// LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
83+
// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
84+
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
85+
// LINKED5: "amdhsa_code_object_version", i32 500
86+
87+
// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
88+
// LINKED6-LABEL: bar
89+
// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
90+
// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
91+
// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
92+
// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
93+
// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
94+
// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
95+
// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
96+
// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
97+
98+
// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
99+
// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
100+
// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
101+
// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
102+
// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
103+
// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
104+
// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
105+
// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
106+
107+
// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
108+
// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
109+
// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
110+
// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
111+
// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
112+
// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
113+
// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
114+
// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
115+
// LINKED6: "amdhsa_code_object_version", i32 600
116+
117+
#ifdef DEVICELIB
118+
__device__ void bar(int *x, int *y, int *z)
119+
{
120+
*x = __builtin_amdgcn_workgroup_size_x();
121+
*y = __builtin_amdgcn_workgroup_size_y();
122+
*z = __builtin_amdgcn_workgroup_size_z();
123+
}
124+
#endif
125+
126+
#ifdef USER
127+
__device__ void bar(int *x, int *y, int *z);
128+
__device__ void foo()
129+
{
130+
int *x, *y, *z;
131+
bar(x, y, z);
132+
}
133+
#endif

clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
1212
// RUN: | FileCheck -check-prefix=COV5 %s
1313

14+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
15+
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
16+
// RUN: | FileCheck -check-prefix=COVNONE %s
17+
1418
#include "Inputs/cuda.h"
1519

1620
// PRECOV5-LABEL: test_get_workgroup_size
@@ -32,6 +36,34 @@
3236
// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
3337

3438

39+
// COVNONE-LABEL: test_get_workgroup_size
40+
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
41+
// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500
42+
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
43+
// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
44+
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
45+
// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
46+
// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
47+
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
48+
49+
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
50+
// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500
51+
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
52+
// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
53+
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
54+
// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
55+
// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
56+
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
57+
58+
// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
59+
// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500
60+
// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
61+
// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
62+
// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
63+
// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
64+
// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
65+
// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
66+
3567
__device__ void test_get_workgroup_size(int d, int *out)
3668
{
3769
switch (d) {

clang/test/CodeGenCXX/dynamic-cast-address-space.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ B fail;
1313
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
1414
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
1515
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
16+
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
1617
//.
1718
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
1819
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//.
99
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
1010
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
11+
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
1112
//.
1213
__device__ void extern_func();
1314

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ kernel void test_target_features_kernel(global int *i) {
6868
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
6969
// CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
7070
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
71+
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
7172
//.
7273
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
7374
// NOCPU-LABEL: define {{[^@]+}}@callee

clang/test/OpenMP/amdgcn_target_global_constructor.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ S A;
2929
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
3030
// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
3131
// CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
32+
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
3233
//.
3334
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
3435
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC
2222
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
2323
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
2424
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
25+
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
2526
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
2627
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
2728
builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG)

0 commit comments

Comments
 (0)