Skip to content

Commit c2ea211

Browse files
committed
[AMDGPU][SplitModule] Handle !callees metadata
1 parent b5e322f commit c2ea211

File tree

2 files changed

+97
-1
lines changed

2 files changed

+97
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,27 @@ void SplitGraph::Node::visitAllDependencies(
482482
}
483483
}
484484

485+
/// Checks if \p I has MD_callees and if it does, parse it and put the function
486+
/// in \p Callees.
487+
///
488+
/// \returns true if there was metadata and it was parsed correctly. false if
489+
/// there was no MD or if it contained unknown entries.
490+
static bool handleCalleesMD(const Instruction &I,
491+
SmallVector<Function *> &Callees) {
492+
auto *MD = I.getMetadata(LLVMContext::MD_callees);
493+
if (!MD)
494+
return false;
495+
496+
for (const auto &Op : MD->operands()) {
497+
Function *Callee = mdconst::extract_or_null<Function>(Op);
498+
if (!Callee)
499+
return false;
500+
Callees.push_back(Callee);
501+
}
502+
503+
return true;
504+
}
505+
485506
void SplitGraph::buildGraph(CallGraph &CG) {
486507
SplitModuleTimer SMT("buildGraph", "graph construction");
487508
LLVM_DEBUG(
@@ -519,6 +540,8 @@ void SplitGraph::buildGraph(CallGraph &CG) {
519540
Fn.printAsOperand(dbgs());
520541
dbgs() << " - analyzing function\n");
521542

543+
SmallVector<Function *> KnownCallees;
544+
522545
bool HasIndirectCall = false;
523546
for (const auto &Inst : instructions(Fn)) {
524547
// look at all calls without a direct callee.
@@ -531,6 +554,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
531554
continue;
532555
}
533556

557+
if (handleCalleesMD(Inst, KnownCallees))
558+
continue;
559+
534560
// everything else is handled conservatively.
535561
HasIndirectCall = true;
536562
break;
@@ -540,7 +566,8 @@ void SplitGraph::buildGraph(CallGraph &CG) {
540566
if (HasIndirectCall) {
541567
LLVM_DEBUG(dbgs() << " indirect call found\n");
542568
FnsWithIndirectCalls.push_back(&Fn);
543-
}
569+
} else if (!KnownCallees.empty())
570+
DirectCallees.insert(KnownCallees.begin(), KnownCallees.end());
544571
}
545572

546573
Node &N = getNode(Cache, Fn);
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: sed -s 's/_MD_/, !callees !{ptr @CallCandidate0}/' %s | llvm-split -o %t -j 3 -mtriple amdgcn-amd-amdhsa
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4+
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
5+
6+
; RUN: sed -s 's/_MD_//g' %s | llvm-split -o %t-nomd -j 3 -mtriple amdgcn-amd-amdhsa
7+
; RUN: llvm-dis -o - %t-nomd0 | FileCheck --check-prefix=CHECK-NOMD0 --implicit-check-not=define %s
8+
; RUN: llvm-dis -o - %t-nomd1 | FileCheck --check-prefix=CHECK-NOMD1 --implicit-check-not=define %s
9+
; RUN: llvm-dis -o - %t-nomd2 | FileCheck --check-prefix=CHECK-NOMD2 --implicit-check-not=define %s
10+
11+
; CHECK0: define internal void @HelperC
12+
; CHECK0: define amdgpu_kernel void @C
13+
14+
; CHECK1: define hidden void @CallCandidate1
15+
; CHECK1: define internal void @HelperB
16+
; CHECK1: define amdgpu_kernel void @B
17+
18+
; CHECK2: define internal void @HelperA
19+
; CHECK2: define hidden void @CallCandidate0
20+
; CHECK2: define amdgpu_kernel void @A
21+
22+
; CHECK-NOMD0: define internal void @HelperC
23+
; CHECK-NOMD0: define amdgpu_kernel void @C
24+
25+
; CHECK-NOMD1: define internal void @HelperB
26+
; CHECK-NOMD1: define amdgpu_kernel void @B
27+
28+
; CHECK-NOMD2: define internal void @HelperA
29+
; CHECK-NOMD2: define hidden void @CallCandidate0
30+
; CHECK-NOMD2: define hidden void @CallCandidate1
31+
; CHECK-NOMD2: define amdgpu_kernel void @A
32+
33+
@addrthief = global [2 x ptr] [ptr @CallCandidate0, ptr @CallCandidate1]
34+
35+
define internal void @HelperA(ptr %call) {
36+
call void %call() _MD_
37+
ret void
38+
}
39+
40+
define internal void @CallCandidate0() {
41+
ret void
42+
}
43+
44+
define internal void @CallCandidate1() {
45+
ret void
46+
}
47+
48+
define internal void @HelperB() {
49+
ret void
50+
}
51+
52+
define internal void @HelperC() {
53+
ret void
54+
}
55+
56+
define amdgpu_kernel void @A(ptr %call) {
57+
call void @HelperA(ptr %call)
58+
ret void
59+
}
60+
61+
define amdgpu_kernel void @B() {
62+
call void @HelperB()
63+
ret void
64+
}
65+
66+
define amdgpu_kernel void @C() {
67+
call void @HelperC()
68+
ret void
69+
}

0 commit comments

Comments
 (0)