Skip to content

Commit b873223

Browse files
committed
Stop using undefined global now
1 parent e419859 commit b873223

File tree

2 files changed

+14
-59
lines changed

2 files changed

+14
-59
lines changed

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 10 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -54,63 +54,26 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
5454
/// Emit code based on Code Object ABI version.
5555
/// COV_4 : Emit code to use dispatch ptr
5656
/// COV_5+ : Emit code to use implicitarg ptr
57-
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
58-
/// and use its value for COV_4 or COV_5+ approach. It is used for
59-
/// compiling device libraries in an ABI-agnostic way.
6057
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
6158
llvm::LoadInst *LD;
6259

6360
auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
64-
65-
if (Cov == CodeObjectVersionKind::COV_None) {
66-
StringRef Name = "__oclc_ABI_version";
67-
auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
68-
if (!ABIVersionC)
69-
ABIVersionC = new llvm::GlobalVariable(
70-
CGF.CGM.getModule(), CGF.Int32Ty, false,
71-
llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
72-
llvm::GlobalVariable::NotThreadLocal,
73-
CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
74-
75-
// This load will be eliminated by the IPSCCP because it is constant
76-
// weak_odr without externally_initialized. Either changing it to weak or
77-
// adding externally_initialized will keep the load.
78-
Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
79-
CGF.CGM.getIntAlign());
80-
81-
Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
82-
ABIVersion,
83-
llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
84-
61+
Value *GEP = nullptr;
62+
if (Cov >= CodeObjectVersionKind::COV_5) {
8563
// Indexing the implicit kernarg segment.
86-
Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
64+
GEP = CGF.Builder.CreateConstGEP1_32(
8765
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
88-
89-
// Indexing the HSA kernel_dispatch_packet struct.
90-
Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
91-
CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
92-
93-
auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
94-
LD = CGF.Builder.CreateLoad(
95-
Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
9666
} else {
97-
Value *GEP = nullptr;
98-
if (Cov >= CodeObjectVersionKind::COV_5) {
99-
// Indexing the implicit kernarg segment.
100-
GEP = CGF.Builder.CreateConstGEP1_32(
101-
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
102-
} else {
103-
// Indexing the HSA kernel_dispatch_packet struct.
104-
GEP = CGF.Builder.CreateConstGEP1_32(
105-
CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
106-
}
107-
LD = CGF.Builder.CreateLoad(
108-
Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
67+
// Indexing the HSA kernel_dispatch_packet struct.
68+
GEP = CGF.Builder.CreateConstGEP1_32(CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF),
69+
4 + Index * 2);
10970
}
71+
LD = CGF.Builder.CreateLoad(
72+
Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
11073

11174
llvm::MDBuilder MDHelper(CGF.getLLVMContext());
112-
llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
113-
APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
75+
llvm::MDNode *RNode = MDHelper.createRange(
76+
APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
11477
LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
11578
LD->setMetadata(llvm::LLVMContext::MD_noundef,
11679
llvm::MDNode::get(CGF.getLLVMContext(), {}));

clang/test/CodeGen/amdgpu-abi-version.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,15 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
22
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
33

4-
//.
5-
// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
6-
//.
74
// CHECK-LABEL: define dso_local i32 @foo(
85
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
96
// CHECK-NEXT: [[ENTRY:.*:]]
107
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
118
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
12-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
13-
// CHECK-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
14-
// CHECK-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
15-
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
16-
// CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
17-
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
18-
// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
19-
// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]]
20-
// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
9+
// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
10+
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 4
11+
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]]
12+
// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP2]] to i32
2113
// CHECK-NEXT: ret i32 [[CONV]]
2214
//
2315
int foo() { return __builtin_amdgcn_workgroup_size_x(); }

0 commit comments

Comments
 (0)