@@ -71,7 +71,7 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
71
71
return PreservedAnalyses::all ();
72
72
73
73
AT = TargetHelpers::getArchType (M);
74
- llvm:: Function *ImplicitOffsetIntrinsic = M.getFunction (Intrinsic::getName (
74
+ Function *ImplicitOffsetIntrinsic = M.getFunction (Intrinsic::getName (
75
75
AT == ArchType::Cuda
76
76
? static_cast <unsigned >(Intrinsic::nvvm_implicit_offset)
77
77
: static_cast <unsigned >(Intrinsic::amdgcn_implicit_offset)));
@@ -82,12 +82,13 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
82
82
// For AMD allocas and pointers have to be to CONSTANT_PRIVATE (5), NVVM is
83
83
// happy with ADDRESS_SPACE_GENERIC (0).
84
84
TargetAS = AT == ArchType::Cuda ? 0 : 5 ;
85
+ // / The value for NVVM's ADDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen
86
+ // / to be 3, use it for the implicit argument pointer type.
85
87
KernelImplicitArgumentType =
86
88
ArrayType::get (Type::getInt32Ty (M.getContext ()), 3 );
87
89
ImplicitOffsetPtrType =
88
90
Type::getInt32Ty (M.getContext ())->getPointerTo (TargetAS);
89
- assert ((!ImplicitOffsetIntrinsic ||
90
- ImplicitOffsetIntrinsic->getReturnType () == ImplicitOffsetPtrType) &&
91
+ assert ((ImplicitOffsetIntrinsic->getReturnType () == ImplicitOffsetPtrType) &&
91
92
" Implicit offset intrinsic does not return the expected type" );
92
93
93
94
SmallVector<KernelPayload, 4 > KernelPayloads;
@@ -108,10 +109,11 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
108
109
return PreservedAnalyses::none ();
109
110
}
110
111
111
- void GlobalOffsetPass::processKernelEntryPoint (Module &M, Function *Func) {
112
+ void GlobalOffsetPass::processKernelEntryPoint (Function *Func) {
112
113
assert (EntryPointMetadata.count (Func) != 0 &&
113
114
" Function must be an entry point" );
114
115
116
+ auto &M = *Func->getParent ();
115
117
LLVMContext &Ctx = M.getContext ();
116
118
MDNode *FuncMetadata = EntryPointMetadata[Func];
117
119
@@ -128,7 +130,7 @@ void GlobalOffsetPass::processKernelEntryPoint(Module &M, Function *Func) {
128
130
M, Func, KernelImplicitArgumentType->getPointerTo (),
129
131
/* KeepOriginal=*/ true )
130
132
.first ;
131
- Argument *NewArgument = NewFunc-> arg_begin () + ( NewFunc->arg_size () - 1 );
133
+ Argument *NewArgument = std::prev ( NewFunc->arg_end () );
132
134
// Pass byval to the kernel for NVIDIA, AMD's calling convention disallows
133
135
// byval args, use byref.
134
136
auto Attr =
@@ -161,26 +163,6 @@ void GlobalOffsetPass::processKernelEntryPoint(Module &M, Function *Func) {
161
163
ImplicitOffsetType, ImplicitOffset, 0 , 0 );
162
164
}
163
165
164
- // This function adds an implicit parameter to the function containing a call
165
- // instruction to the implicit offset intrinsic or another function (which
166
- // eventually calls the instrinsic). If the call instruction is to the
167
- // implicit offset intrinsic, then the intrinisic is replaced with the
168
- // parameter that was added.
169
- //
170
- // `Callee` is the function (to which this transformation has already been
171
- // applied), or to the implicit offset intrinsic. `CalleeWithImplicitParam`
172
- // indicates whether Callee is to the implicit intrinsic (when `nullptr`) or
173
- // to another function (not `nullptr`) - this is used to know whether calls to
174
- // it needs to have the implicit parameter added to it or replaced with the
175
- // implicit parameter.
176
- //
177
- // Once the function, say `F`, containing a call to `Callee` has the implicit
178
- // parameter added, callers of `F` are processed by recursively calling this
179
- // function, passing `F` to `CalleeWithImplicitParam`.
180
- //
181
- // Since the cloning of entry points may alter the users of a function, the
182
- // cloning must be done as early as possible, as to ensure that no users are
183
- // added to previous callees in the call-tree.
184
166
void GlobalOffsetPass::addImplicitParameterToCallers (
185
167
Module &M, Value *Callee, Function *CalleeWithImplicitParam) {
186
168
@@ -193,7 +175,7 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
193
175
194
176
Function *Caller = Call->getFunction ();
195
177
if (EntryPointMetadata.count (Caller) != 0 ) {
196
- processKernelEntryPoint (M, Caller);
178
+ processKernelEntryPoint (Caller);
197
179
}
198
180
}
199
181
@@ -223,40 +205,41 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
223
205
CallToOld->replaceAllUsesWith (ImplicitOffset);
224
206
} else {
225
207
// Build up a list of arguments to call the modified function using.
226
- llvm:: SmallVector<Value *, 8 > ImplicitOffsets;
208
+ SmallVector<Value *, 8 > ImplicitOffsets;
227
209
for (Use &U : CallToOld->args ()) {
228
210
ImplicitOffsets.push_back (U);
229
211
}
230
212
ImplicitOffsets.push_back (ImplicitOffset);
231
213
232
214
// Replace call to other function (which now has a new parameter),
233
215
// with a call including the new parameter to that same function.
234
- auto *NewCaller = CallInst::Create (
216
+ auto *NewCallInst = CallInst::Create (
235
217
/* Ty= */ CalleeWithImplicitParam->getFunctionType (),
236
218
/* Func= */ CalleeWithImplicitParam,
237
219
/* Args= */ ImplicitOffsets,
238
220
/* NameStr= */ Twine (),
239
221
/* InsertBefore= */ CallToOld);
240
- NewCaller ->setTailCallKind (CallToOld->getTailCallKind ());
241
- NewCaller ->copyMetadata (*CallToOld);
242
- CallToOld->replaceAllUsesWith (NewCaller );
222
+ NewCallInst ->setTailCallKind (CallToOld->getTailCallKind ());
223
+ NewCallInst ->copyMetadata (*CallToOld);
224
+ CallToOld->replaceAllUsesWith (NewCallInst );
243
225
244
226
if (CallToOld->hasName ()) {
245
- NewCaller ->takeName (CallToOld);
227
+ NewCallInst ->takeName (CallToOld);
246
228
}
247
229
}
248
230
249
231
// Remove the caller now that it has been replaced.
250
232
CallToOld->eraseFromParent ();
251
233
252
- if (!AlreadyProcessed) {
253
- // Process callers of the old function.
254
- addImplicitParameterToCallers (M, Caller, NewFunc);
234
+ if (AlreadyProcessed)
235
+ continue ;
255
236
256
- // Now that the old function is dead, delete it.
257
- Caller->dropAllReferences ();
258
- Caller->eraseFromParent ();
259
- }
237
+ // Process callers of the old function.
238
+ addImplicitParameterToCallers (M, Caller, NewFunc);
239
+
240
+ // Now that the old function is dead, delete it.
241
+ Caller->dropAllReferences ();
242
+ Caller->eraseFromParent ();
260
243
}
261
244
}
262
245
@@ -270,13 +253,9 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
270
253
// Construct an argument list containing all of the previous arguments.
271
254
SmallVector<Type *, 8 > Arguments;
272
255
SmallVector<AttributeSet, 8 > ArgumentAttributes;
273
-
274
- unsigned i = 0 ;
275
- for (Function::arg_iterator FuncArg = Func->arg_begin (),
276
- FuncEnd = Func->arg_end ();
277
- FuncArg != FuncEnd; ++FuncArg, ++i) {
278
- Arguments.push_back (FuncArg->getType ());
279
- ArgumentAttributes.push_back (FuncAttrs.getParamAttrs (i));
256
+ for (const auto &I : enumerate(Func->args ())) {
257
+ Arguments.push_back (I.value ().getType ());
258
+ ArgumentAttributes.push_back (FuncAttrs.getParamAttrs (I.index ()));
280
259
}
281
260
282
261
// Add the offset argument. Must be the same type as returned by
@@ -325,8 +304,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
325
304
IRBuilder<> Builder (EntryBlock, EntryBlock->getFirstInsertionPt ());
326
305
Type *ImplicitOffsetType =
327
306
ArrayType::get (Type::getInt32Ty (M.getContext ()), 3 );
328
- Value *OrigImplicitOffset =
329
- NewFunc->arg_begin () + (NewFunc->arg_size () - 1 );
307
+ Value *OrigImplicitOffset = std::prev (NewFunc->arg_end ());
330
308
AllocaInst *ImplicitOffsetAlloca =
331
309
Builder.CreateAlloca (ImplicitOffsetType, TargetAS);
332
310
auto DL = M.getDataLayout ();
@@ -346,7 +324,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
346
324
ImplicitArgumentType = ImplicitOffset->getType ();
347
325
ImplicitOffsetAllocaInserted = true ;
348
326
} else {
349
- ImplicitOffset = NewFunc-> arg_begin () + ( NewFunc->arg_size () - 1 );
327
+ ImplicitOffset = std::prev ( NewFunc->arg_end () );
350
328
}
351
329
} else {
352
330
NewFunc->copyAttributesFrom (Func);
@@ -371,7 +349,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
371
349
for (auto MD : MDs)
372
350
NewFunc->addMetadata (MD.first , *MD.second );
373
351
374
- ImplicitOffset = NewFunc-> arg_begin () + ( NewFunc->arg_size () - 1 );
352
+ ImplicitOffset = std::prev ( NewFunc->arg_end () );
375
353
}
376
354
assert (ImplicitOffset && " Value of implicit offset must be set." );
377
355
@@ -381,7 +359,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
381
359
// Make sure bitcast is inserted after alloca, if present.
382
360
BasicBlock::iterator InsertionPt =
383
361
ImplicitOffsetAllocaInserted
384
- ? std::next ((( AllocaInst *) ImplicitOffset)->getIterator ())
362
+ ? std::next (cast< AllocaInst>( ImplicitOffset)->getIterator ())
385
363
: EntryBlock->getFirstInsertionPt ();
386
364
IRBuilder<> Builder (EntryBlock, InsertionPt);
387
365
ImplicitOffset = Builder.CreateBitCast (
@@ -409,7 +387,7 @@ DenseMap<Function *, MDNode *> GlobalOffsetPass::validateKernels(
409
387
return !GV->hasOneUse () || !Used.count (GV);
410
388
};
411
389
412
- llvm:: DenseMap<Function *, MDNode *> EntryPointMetadata;
390
+ DenseMap<Function *, MDNode *> EntryPointMetadata;
413
391
for (auto &KP : KernelPayloads) {
414
392
if (HasUseOtherThanLLVMUsed (KP.Kernel ))
415
393
llvm_unreachable (" Kernel entry point can't have uses." );
0 commit comments