Skip to content

Commit 3668534

Browse files
committed
[AMDGPU][SplitModule] Cleanup CallsExternal Handling
- Don't treat inline ASM as indirect calls - Remove alias testing, which was broken (only working by pure luck right now) and isn't needed anyway. GlobalOpt should take care of them for us.
1 parent cf2122c commit 3668534

File tree

4 files changed

+110
-75
lines changed

4 files changed

+110
-75
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 69 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/Analysis/CallGraph.h"
4444
#include "llvm/Analysis/TargetTransformInfo.h"
4545
#include "llvm/IR/Function.h"
46+
#include "llvm/IR/InstIterator.h"
4647
#include "llvm/IR/Instruction.h"
4748
#include "llvm/IR/Module.h"
4849
#include "llvm/IR/User.h"
@@ -103,6 +104,15 @@ static cl::opt<bool> NoExternalizeGlobals(
103104
cl::desc("disables externalization of global variable with local linkage; "
104105
"may cause globals to be duplicated which increases binary size"));
105106

107+
static cl::opt<bool> NoExternalizeOnAddrTaken(
108+
"amdgpu-module-splitting-no-externalize-address-taken", cl::Hidden,
109+
cl::desc(
110+
"disables externalization of functions whose addresses are taken"));
111+
112+
static cl::opt<bool> InlineAsmIsIndirectCall(
113+
"amdgpu-module-splitting-inline-asm-is-indirect-call", cl::Hidden,
114+
cl::desc("consider inline assembly as an indirect call"));
115+
106116
static cl::opt<std::string>
107117
ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
108118
cl::Hidden,
@@ -482,6 +492,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
482492
dbgs()
483493
<< "[build graph] constructing graph representation of the input\n");
484494

495+
// FIXME(?): Is the callgraph really worth using if we have to iterate the
496+
// function again whenever it fails to give us enough information?
497+
485498
// We build the graph by just iterating all functions in the module and
486499
// working on their direct callees. At the end, all nodes should be linked
487500
// together as expected.
@@ -492,29 +505,52 @@ void SplitGraph::buildGraph(CallGraph &CG) {
492505
continue;
493506

494507
// Look at direct callees and create the necessary edges in the graph.
495-
bool HasIndirectCall = false;
496-
Node &N = getNode(Cache, Fn);
508+
SetVector<const Function *> DirectCallees;
509+
bool CallsExternal = false;
497510
for (auto &CGEntry : *CG[&Fn]) {
498511
auto *CGNode = CGEntry.second;
499-
auto *Callee = CGNode->getFunction();
500-
if (!Callee) {
501-
// TODO: Don't consider inline assembly as indirect calls.
502-
if (CGNode == CG.getCallsExternalNode())
503-
HasIndirectCall = true;
504-
continue;
505-
}
506-
507-
if (!Callee->isDeclaration())
508-
createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
512+
if (auto *Callee = CGNode->getFunction()) {
513+
if (!Callee->isDeclaration())
514+
DirectCallees.insert(Callee);
515+
} else if (CGNode == CG.getCallsExternalNode())
516+
CallsExternal = true;
509517
}
510518

511519
// Keep track of this function if it contains an indirect call and/or if it
512520
// can be indirectly called.
513-
if (HasIndirectCall) {
514-
LLVM_DEBUG(dbgs() << "indirect call found in " << Fn.getName() << "\n");
515-
FnsWithIndirectCalls.push_back(&Fn);
521+
if (CallsExternal) {
522+
LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for " << Fn.getName()
523+
<< " - analyzing function\n");
524+
525+
bool HasIndirectCall = false;
526+
for (const auto &Inst : instructions(Fn)) {
527+
// look at all calls without a direct callee.
528+
if (const auto *CB = dyn_cast<CallBase>(&Inst);
529+
CB && !CB->getCalledFunction()) {
530+
// inline assembly can be ignored, unless InlineAsmIsIndirectCall is
531+
// true.
532+
if (CB->isInlineAsm()) {
533+
if (InlineAsmIsIndirectCall)
534+
HasIndirectCall = true;
535+
LLVM_DEBUG(dbgs() << " found inline assembly\n");
536+
continue;
537+
}
538+
539+
// everything else is handled conservatively.
540+
HasIndirectCall = true;
541+
}
542+
}
543+
544+
if (HasIndirectCall) {
545+
LLVM_DEBUG(dbgs() << " indirect call found\n");
546+
FnsWithIndirectCalls.push_back(&Fn);
547+
}
516548
}
517549

550+
Node &N = getNode(Cache, Fn);
551+
for (const auto *Callee : DirectCallees)
552+
createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
553+
518554
if (canBeIndirectlyCalled(Fn))
519555
IndirectlyCallableFns.push_back(&Fn);
520556
}
@@ -1326,13 +1362,23 @@ static void splitAMDGPUModule(
13261362
//
13271363
// Additionally, it guides partitioning to not duplicate this function if it's
13281364
// called directly at some point.
1329-
for (auto &Fn : M) {
1330-
if (Fn.hasAddressTaken()) {
1331-
if (Fn.hasLocalLinkage()) {
1332-
LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
1333-
<< " because its address is taken\n");
1365+
//
1366+
// TODO: Could we be smarter about this ? This makes all functions whose
1367+
// addresses are taken non-copyable. We should probably model this type of
1368+
// constraint in the graph and use it to guide splitting, instead of
1369+
// externalizing like this. Maybe non-copyable should really mean "keep one
1370+
// visible copy, then internalize all other copies" for some functions?
1371+
if (!NoExternalizeOnAddrTaken) {
1372+
for (auto &Fn : M) {
1373+
// TODO: Should aliases count? Probably not but they're so rare I'm not
1374+
// sure it's worth fixing.
1375+
if (Fn.hasAddressTaken()) {
1376+
if (Fn.hasLocalLinkage()) {
1377+
LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
1378+
<< " because its address is taken\n");
1379+
}
1380+
externalize(Fn);
13341381
}
1335-
externalize(Fn);
13361382
}
13371383
}
13381384

@@ -1368,7 +1414,8 @@ static void splitAMDGPUModule(
13681414
dbgs() << "[graph] nodes:\n";
13691415
for (const SplitGraph::Node *N : SG.nodes()) {
13701416
dbgs() << " - [" << N->getID() << "]: " << N->getName() << " "
1371-
<< (N->isGraphEntryPoint() ? "(entry)" : "") << "\n";
1417+
<< (N->isGraphEntryPoint() ? "(entry)" : "") << " "
1418+
<< (N->isNonCopyable() ? "(noncopyable)" : "") << "\n";
13721419
}
13731420
});
13741421

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4+
5+
; RUN: llvm-split -o %t_as_indirect %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken -amdgpu-module-splitting-inline-asm-is-indirect-call
6+
; RUN: llvm-dis -o - %t_as_indirect0 | FileCheck --check-prefix=CHECK-INDIRECT0 --implicit-check-not=define %s
7+
; RUN: llvm-dis -o - %t_as_indirect1 | FileCheck --check-prefix=CHECK-INDIRECT1 --implicit-check-not=define %s
8+
9+
; CHECK0: define internal void @HelperB
10+
; CHECK0: define amdgpu_kernel void @B
11+
12+
; CHECK1: define internal void @HelperA()
13+
; CHECK1: define amdgpu_kernel void @A()
14+
15+
; CHECK-INDIRECT0: define internal void @HelperB
16+
; CHECK-INDIRECT0: define amdgpu_kernel void @B
17+
18+
; CHECK-INDIRECT1: define internal void @HelperA()
19+
; CHECK-INDIRECT1: define internal void @HelperB()
20+
; CHECK-INDIRECT1: define amdgpu_kernel void @A()
21+
22+
@addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
23+
24+
define internal void @HelperA() {
25+
ret void
26+
}
27+
28+
define internal void @HelperB() {
29+
ret void
30+
}
31+
32+
define amdgpu_kernel void @A() {
33+
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"()
34+
call void @HelperA()
35+
ret void
36+
}
37+
38+
define amdgpu_kernel void @B(ptr %out) {
39+
call void @HelperB()
40+
ret void
41+
}

llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll

Lines changed: 0 additions & 41 deletions
This file was deleted.

llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,6 @@
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
44
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
55

6-
; We have 4 kernels:
7-
; - Each kernel has an internal helper
8-
; - @A and @B's helpers does an indirect call.
9-
;
10-
; We default to putting A/B in P0, alongside a copy
11-
; of all helpers who have their address taken.
12-
; The other kernels can still go into separate partitions.
13-
;
14-
; Note that dependency discovery shouldn't stop upon finding an
15-
; indirect call. HelperC/D should also end up in P0 as they
16-
; are dependencies of HelperB.
17-
186
; CHECK0: define internal void @HelperD
197
; CHECK0: define amdgpu_kernel void @D
208

0 commit comments

Comments
 (0)