43
43
#include " llvm/Analysis/CallGraph.h"
44
44
#include " llvm/Analysis/TargetTransformInfo.h"
45
45
#include " llvm/IR/Function.h"
46
+ #include " llvm/IR/InstIterator.h"
46
47
#include " llvm/IR/Instruction.h"
47
48
#include " llvm/IR/Module.h"
48
49
#include " llvm/IR/User.h"
@@ -103,6 +104,15 @@ static cl::opt<bool> NoExternalizeGlobals(
103
104
cl::desc (" disables externalization of global variable with local linkage; "
104
105
" may cause globals to be duplicated which increases binary size" ));
105
106
107
+ static cl::opt<bool > NoExternalizeOnAddrTaken (
108
+ " amdgpu-module-splitting-no-externalize-address-taken" , cl::Hidden,
109
+ cl::desc (
110
+ " disables externalization of functions whose addresses are taken" ));
111
+
112
+ static cl::opt<bool > InlineAsmIsIndirectCall (
113
+ " amdgpu-module-splitting-inline-asm-is-indirect-call" , cl::Hidden,
114
+ cl::desc (" consider inline assembly as an indirect call" ));
115
+
106
116
static cl::opt<std::string>
107
117
ModuleDotCfgOutput (" amdgpu-module-splitting-print-module-dotcfg" ,
108
118
cl::Hidden,
@@ -482,6 +492,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
482
492
dbgs ()
483
493
<< " [build graph] constructing graph representation of the input\n " );
484
494
495
+ // FIXME(?): Is the callgraph really worth using if we have to iterate the
496
+ // function again whenever it fails to give us enough information?
497
+
485
498
// We build the graph by just iterating all functions in the module and
486
499
// working on their direct callees. At the end, all nodes should be linked
487
500
// together as expected.
@@ -492,29 +505,52 @@ void SplitGraph::buildGraph(CallGraph &CG) {
492
505
continue ;
493
506
494
507
// Look at direct callees and create the necessary edges in the graph.
495
- bool HasIndirectCall = false ;
496
- Node &N = getNode (Cache, Fn) ;
508
+ SetVector< const Function *> DirectCallees ;
509
+ bool CallsExternal = false ;
497
510
for (auto &CGEntry : *CG[&Fn]) {
498
511
auto *CGNode = CGEntry.second ;
499
- auto *Callee = CGNode->getFunction ();
500
- if (!Callee) {
501
- // TODO: Don't consider inline assembly as indirect calls.
502
- if (CGNode == CG.getCallsExternalNode ())
503
- HasIndirectCall = true ;
504
- continue ;
505
- }
506
-
507
- if (!Callee->isDeclaration ())
508
- createEdge (N, getNode (Cache, *Callee), EdgeKind::DirectCall);
512
+ if (auto *Callee = CGNode->getFunction ()) {
513
+ if (!Callee->isDeclaration ())
514
+ DirectCallees.insert (Callee);
515
+ } else if (CGNode == CG.getCallsExternalNode ())
516
+ CallsExternal = true ;
509
517
}
510
518
511
519
// Keep track of this function if it contains an indirect call and/or if it
512
520
// can be indirectly called.
513
- if (HasIndirectCall) {
514
- LLVM_DEBUG (dbgs () << " indirect call found in " << Fn.getName () << " \n " );
515
- FnsWithIndirectCalls.push_back (&Fn);
521
+ if (CallsExternal) {
522
+ LLVM_DEBUG (dbgs () << " [!] callgraph is incomplete for " << Fn.getName ()
523
+ << " - analyzing function\n " );
524
+
525
+ bool HasIndirectCall = false ;
526
+ for (const auto &Inst : instructions (Fn)) {
527
+ // look at all calls without a direct callee.
528
+ if (const auto *CB = dyn_cast<CallBase>(&Inst);
529
+ CB && !CB->getCalledFunction ()) {
530
+ // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
531
+ // true.
532
+ if (CB->isInlineAsm ()) {
533
+ if (InlineAsmIsIndirectCall)
534
+ HasIndirectCall = true ;
535
+ LLVM_DEBUG (dbgs () << " found inline assembly\n " );
536
+ continue ;
537
+ }
538
+
539
+ // everything else is handled conservatively.
540
+ HasIndirectCall = true ;
541
+ }
542
+ }
543
+
544
+ if (HasIndirectCall) {
545
+ LLVM_DEBUG (dbgs () << " indirect call found\n " );
546
+ FnsWithIndirectCalls.push_back (&Fn);
547
+ }
516
548
}
517
549
550
+ Node &N = getNode (Cache, Fn);
551
+ for (const auto *Callee : DirectCallees)
552
+ createEdge (N, getNode (Cache, *Callee), EdgeKind::DirectCall);
553
+
518
554
if (canBeIndirectlyCalled (Fn))
519
555
IndirectlyCallableFns.push_back (&Fn);
520
556
}
@@ -1326,13 +1362,23 @@ static void splitAMDGPUModule(
1326
1362
//
1327
1363
// Additionally, it guides partitioning to not duplicate this function if it's
1328
1364
// called directly at some point.
1329
- for (auto &Fn : M) {
1330
- if (Fn.hasAddressTaken ()) {
1331
- if (Fn.hasLocalLinkage ()) {
1332
- LLVM_DEBUG (dbgs () << " [externalize] " << Fn.getName ()
1333
- << " because its address is taken\n " );
1365
+ //
1366
+ // TODO: Could we be smarter about this ? This makes all functions whose
1367
+ // addresses are taken non-copyable. We should probably model this type of
1368
+ // constraint in the graph and use it to guide splitting, instead of
1369
+ // externalizing like this. Maybe non-copyable should really mean "keep one
1370
+ // visible copy, then internalize all other copies" for some functions?
1371
+ if (!NoExternalizeOnAddrTaken) {
1372
+ for (auto &Fn : M) {
1373
+ // TODO: Should aliases count? Probably not but they're so rare I'm not
1374
+ // sure it's worth fixing.
1375
+ if (Fn.hasAddressTaken ()) {
1376
+ if (Fn.hasLocalLinkage ()) {
1377
+ LLVM_DEBUG (dbgs () << " [externalize] " << Fn.getName ()
1378
+ << " because its address is taken\n " );
1379
+ }
1380
+ externalize (Fn);
1334
1381
}
1335
- externalize (Fn);
1336
1382
}
1337
1383
}
1338
1384
@@ -1368,7 +1414,8 @@ static void splitAMDGPUModule(
1368
1414
dbgs () << " [graph] nodes:\n " ;
1369
1415
for (const SplitGraph::Node *N : SG.nodes ()) {
1370
1416
dbgs () << " - [" << N->getID () << " ]: " << N->getName () << " "
1371
- << (N->isGraphEntryPoint () ? " (entry)" : " " ) << " \n " ;
1417
+ << (N->isGraphEntryPoint () ? " (entry)" : " " ) << " "
1418
+ << (N->isNonCopyable () ? " (noncopyable)" : " " ) << " \n " ;
1372
1419
}
1373
1420
});
1374
1421
0 commit comments