@@ -54,63 +54,26 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
54
54
// / Emit code based on Code Object ABI version.
55
55
// / COV_4 : Emit code to use dispatch ptr
56
56
// / COV_5+ : Emit code to use implicitarg ptr
57
- // / COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
58
- // / and use its value for COV_4 or COV_5+ approach. It is used for
59
- // / compiling device libraries in an ABI-agnostic way.
60
57
Value *EmitAMDGPUWorkGroupSize (CodeGenFunction &CGF, unsigned Index) {
61
58
llvm::LoadInst *LD;
62
59
63
60
auto Cov = CGF.getTarget ().getTargetOpts ().CodeObjectVersion ;
64
-
65
- if (Cov == CodeObjectVersionKind::COV_None) {
66
- StringRef Name = " __oclc_ABI_version" ;
67
- auto *ABIVersionC = CGF.CGM .getModule ().getNamedGlobal (Name);
68
- if (!ABIVersionC)
69
- ABIVersionC = new llvm::GlobalVariable (
70
- CGF.CGM .getModule (), CGF.Int32Ty , false ,
71
- llvm::GlobalValue::ExternalLinkage, nullptr , Name, nullptr ,
72
- llvm::GlobalVariable::NotThreadLocal,
73
- CGF.CGM .getContext ().getTargetAddressSpace (LangAS::opencl_constant));
74
-
75
- // This load will be eliminated by the IPSCCP because it is constant
76
- // weak_odr without externally_initialized. Either changing it to weak or
77
- // adding externally_initialized will keep the load.
78
- Value *ABIVersion = CGF.Builder .CreateAlignedLoad (CGF.Int32Ty , ABIVersionC,
79
- CGF.CGM .getIntAlign ());
80
-
81
- Value *IsCOV5 = CGF.Builder .CreateICmpSGE (
82
- ABIVersion,
83
- llvm::ConstantInt::get (CGF.Int32Ty , CodeObjectVersionKind::COV_5));
84
-
61
+ Value *GEP = nullptr ;
62
+ if (Cov >= CodeObjectVersionKind::COV_5) {
85
63
// Indexing the implicit kernarg segment.
86
- Value *ImplicitGEP = CGF.Builder .CreateConstGEP1_32 (
64
+ GEP = CGF.Builder .CreateConstGEP1_32 (
87
65
CGF.Int8Ty , EmitAMDGPUImplicitArgPtr (CGF), 12 + Index * 2 );
88
-
89
- // Indexing the HSA kernel_dispatch_packet struct.
90
- Value *DispatchGEP = CGF.Builder .CreateConstGEP1_32 (
91
- CGF.Int8Ty , EmitAMDGPUDispatchPtr (CGF), 4 + Index * 2 );
92
-
93
- auto Result = CGF.Builder .CreateSelect (IsCOV5, ImplicitGEP, DispatchGEP);
94
- LD = CGF.Builder .CreateLoad (
95
- Address (Result, CGF.Int16Ty , CharUnits::fromQuantity (2 )));
96
66
} else {
97
- Value *GEP = nullptr ;
98
- if (Cov >= CodeObjectVersionKind::COV_5) {
99
- // Indexing the implicit kernarg segment.
100
- GEP = CGF.Builder .CreateConstGEP1_32 (
101
- CGF.Int8Ty , EmitAMDGPUImplicitArgPtr (CGF), 12 + Index * 2 );
102
- } else {
103
- // Indexing the HSA kernel_dispatch_packet struct.
104
- GEP = CGF.Builder .CreateConstGEP1_32 (
105
- CGF.Int8Ty , EmitAMDGPUDispatchPtr (CGF), 4 + Index * 2 );
106
- }
107
- LD = CGF.Builder .CreateLoad (
108
- Address (GEP, CGF.Int16Ty , CharUnits::fromQuantity (2 )));
67
+ // Indexing the HSA kernel_dispatch_packet struct.
68
+ GEP = CGF.Builder .CreateConstGEP1_32 (CGF.Int8Ty , EmitAMDGPUDispatchPtr (CGF),
69
+ 4 + Index * 2 );
109
70
}
71
+ LD = CGF.Builder .CreateLoad (
72
+ Address (GEP, CGF.Int16Ty , CharUnits::fromQuantity (2 )));
110
73
111
74
llvm::MDBuilder MDHelper (CGF.getLLVMContext ());
112
- llvm::MDNode *RNode = MDHelper.createRange (APInt ( 16 , 1 ),
113
- APInt (16 , CGF.getTarget ().getMaxOpenCLWorkGroupSize () + 1 ));
75
+ llvm::MDNode *RNode = MDHelper.createRange (
76
+ APInt (16 , 1 ), APInt ( 16 , CGF.getTarget ().getMaxOpenCLWorkGroupSize () + 1 ));
114
77
LD->setMetadata (llvm::LLVMContext::MD_range, RNode);
115
78
LD->setMetadata (llvm::LLVMContext::MD_noundef,
116
79
llvm::MDNode::get (CGF.getLLVMContext (), {}));
0 commit comments