214
214
#define DEBUG_TYPE " amdgpu-lower-module-lds"
215
215
216
216
using namespace llvm ;
217
+ using namespace AMDGPU ;
217
218
218
219
namespace {
219
220
@@ -236,17 +237,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
236
237
clEnumValN(LoweringKind::hybrid, " hybrid" ,
237
238
" Lower via mixture of above strategies" )));
238
239
239
- bool isKernelLDS (const Function *F) {
240
- // Some weirdness here. AMDGPU::isKernelCC does not call into
241
- // AMDGPU::isKernel with the calling conv, it instead calls into
242
- // isModuleEntryFunction which returns true for more calling conventions
243
- // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
244
- // There's also a test that checks that the LDS lowering does not hit on
245
- // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
246
- // Putting LDS in the name of the function to draw attention to this.
247
- return AMDGPU::isKernel (F->getCallingConv ());
248
- }
249
-
250
240
template <typename T> std::vector<T> sortByName (std::vector<T> &&V) {
251
241
llvm::sort (V.begin (), V.end (), [](const auto *L, const auto *R) {
252
242
return L->getName () < R->getName ();
@@ -307,163 +297,9 @@ class AMDGPULowerModuleLDS {
307
297
Decl, {}, {OperandBundleDefT<Value *>(" ExplicitUse" , UseInstance)});
308
298
}
309
299
310
- static bool eliminateConstantExprUsesOfLDSFromAllInstructions (Module &M) {
311
- // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
312
- // global may have uses from multiple different functions as a result.
313
- // This pass specialises LDS variables with respect to the kernel that
314
- // allocates them.
315
-
316
- // This is semantically equivalent to (the unimplemented as slow):
317
- // for (auto &F : M.functions())
318
- // for (auto &BB : F)
319
- // for (auto &I : BB)
320
- // for (Use &Op : I.operands())
321
- // if (constantExprUsesLDS(Op))
322
- // replaceConstantExprInFunction(I, Op);
323
-
324
- SmallVector<Constant *> LDSGlobals;
325
- for (auto &GV : M.globals ())
326
- if (AMDGPU::isLDSVariableToLower (GV))
327
- LDSGlobals.push_back (&GV);
328
-
329
- return convertUsersOfConstantsToInstructions (LDSGlobals);
330
- }
331
-
332
300
public:
333
301
AMDGPULowerModuleLDS (const AMDGPUTargetMachine &TM_) : TM(TM_) {}
334
302
335
- using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
336
-
337
- using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
338
-
339
- static void getUsesOfLDSByFunction (CallGraph const &CG, Module &M,
340
- FunctionVariableMap &kernels,
341
- FunctionVariableMap &functions) {
342
-
343
- // Get uses from the current function, excluding uses by called functions
344
- // Two output variables to avoid walking the globals list twice
345
- for (auto &GV : M.globals ()) {
346
- if (!AMDGPU::isLDSVariableToLower (GV)) {
347
- continue ;
348
- }
349
-
350
- if (GV.isAbsoluteSymbolRef ()) {
351
- report_fatal_error (
352
- " LDS variables with absolute addresses are unimplemented." );
353
- }
354
-
355
- for (User *V : GV.users ()) {
356
- if (auto *I = dyn_cast<Instruction>(V)) {
357
- Function *F = I->getFunction ();
358
- if (isKernelLDS (F)) {
359
- kernels[F].insert (&GV);
360
- } else {
361
- functions[F].insert (&GV);
362
- }
363
- }
364
- }
365
- }
366
- }
367
-
368
- struct LDSUsesInfoTy {
369
- FunctionVariableMap direct_access;
370
- FunctionVariableMap indirect_access;
371
- };
372
-
373
- static LDSUsesInfoTy getTransitiveUsesOfLDS (CallGraph const &CG, Module &M) {
374
-
375
- FunctionVariableMap direct_map_kernel;
376
- FunctionVariableMap direct_map_function;
377
- getUsesOfLDSByFunction (CG, M, direct_map_kernel, direct_map_function);
378
-
379
- // Collect variables that are used by functions whose address has escaped
380
- DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
381
- for (Function &F : M.functions ()) {
382
- if (!isKernelLDS (&F))
383
- if (F.hasAddressTaken (nullptr ,
384
- /* IgnoreCallbackUses */ false ,
385
- /* IgnoreAssumeLikeCalls */ false ,
386
- /* IgnoreLLVMUsed */ true ,
387
- /* IgnoreArcAttachedCall */ false )) {
388
- set_union (VariablesReachableThroughFunctionPointer,
389
- direct_map_function[&F]);
390
- }
391
- }
392
-
393
- auto functionMakesUnknownCall = [&](const Function *F) -> bool {
394
- assert (!F->isDeclaration ());
395
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
396
- if (!R.second ->getFunction ()) {
397
- return true ;
398
- }
399
- }
400
- return false ;
401
- };
402
-
403
- // Work out which variables are reachable through function calls
404
- FunctionVariableMap transitive_map_function = direct_map_function;
405
-
406
- // If the function makes any unknown call, assume the worst case that it can
407
- // access all variables accessed by functions whose address escaped
408
- for (Function &F : M.functions ()) {
409
- if (!F.isDeclaration () && functionMakesUnknownCall (&F)) {
410
- if (!isKernelLDS (&F)) {
411
- set_union (transitive_map_function[&F],
412
- VariablesReachableThroughFunctionPointer);
413
- }
414
- }
415
- }
416
-
417
- // Direct implementation of collecting all variables reachable from each
418
- // function
419
- for (Function &Func : M.functions ()) {
420
- if (Func.isDeclaration () || isKernelLDS (&Func))
421
- continue ;
422
-
423
- DenseSet<Function *> seen; // catches cycles
424
- SmallVector<Function *, 4 > wip{&Func};
425
-
426
- while (!wip.empty ()) {
427
- Function *F = wip.pop_back_val ();
428
-
429
- // Can accelerate this by referring to transitive map for functions that
430
- // have already been computed, with more care than this
431
- set_union (transitive_map_function[&Func], direct_map_function[F]);
432
-
433
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
434
- Function *ith = R.second ->getFunction ();
435
- if (ith) {
436
- if (!seen.contains (ith)) {
437
- seen.insert (ith);
438
- wip.push_back (ith);
439
- }
440
- }
441
- }
442
- }
443
- }
444
-
445
- // direct_map_kernel lists which variables are used by the kernel
446
- // find the variables which are used through a function call
447
- FunctionVariableMap indirect_map_kernel;
448
-
449
- for (Function &Func : M.functions ()) {
450
- if (Func.isDeclaration () || !isKernelLDS (&Func))
451
- continue ;
452
-
453
- for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
454
- Function *ith = R.second ->getFunction ();
455
- if (ith) {
456
- set_union (indirect_map_kernel[&Func], transitive_map_function[ith]);
457
- } else {
458
- set_union (indirect_map_kernel[&Func],
459
- VariablesReachableThroughFunctionPointer);
460
- }
461
- }
462
- }
463
-
464
- return {std::move (direct_map_kernel), std::move (indirect_map_kernel)};
465
- }
466
-
467
303
struct LDSVariableReplacement {
468
304
GlobalVariable *SGV = nullptr ;
469
305
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
0 commit comments