Skip to content

Commit 9c7a0e9

Browse files
committed
Comments
1 parent ba993e9 commit 9c7a0e9

File tree

2 files changed

+70
-21
lines changed

2 files changed

+70
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "llvm/Support/Casting.h"
5151
#include "llvm/Support/Debug.h"
5252
#include "llvm/Support/FileSystem.h"
53+
#include "llvm/Support/Path.h"
5354
#include "llvm/Support/Process.h"
5455
#include "llvm/Support/SHA256.h"
5556
#include "llvm/Support/raw_ostream.h"
@@ -68,15 +69,16 @@ using namespace llvm;
6869
namespace {
6970

7071
static cl::opt<float> LargeKernelFactor(
71-
"amdgpu-module-splitting-large-kernel-threshold", cl::init(2.0), cl::Hidden,
72+
"amdgpu-module-splitting-large-kernel-threshold", cl::init(2.0f),
73+
cl::Hidden,
7274
cl::desc(
7375
"consider a kernel as large and needing special treatment when it "
7476
"exceeds the average cost of a partition by this factor; e;g. 2.0 "
7577
"means if the kernel and its dependencies is 2 times bigger than "
7678
"an average partition; 0 disables large kernels handling entirely"));
7779

7880
static cl::opt<float> LargeKernelOverlapForMerge(
79-
"amdgpu-module-splitting-large-kernel-merge-overlap", cl::init(0.8),
81+
"amdgpu-module-splitting-large-kernel-merge-overlap", cl::init(0.8f),
8082
cl::Hidden,
8183
cl::desc("defines how much overlap between two large kernel's dependencies "
8284
"is needed to put them in the same partition"));
@@ -112,7 +114,7 @@ static std::string getName(const Value &V) {
112114
if (!*HideNames)
113115
return V.getName().str();
114116
return toHex(SHA256::hash(arrayRefFromStringRef(V.getName())),
115-
/*LowerCase*/ true);
117+
/*LowerCase=*/true);
116118
}
117119

118120
/// Main logging helper.
@@ -168,18 +170,19 @@ class SplitModuleLogger {
168170

169171
// If a log directory is specified, create a new file with a unique name in
170172
// that directory.
171-
SmallString<0> FilePath;
172173
int Fd;
173-
std::string LogFile = (LogDir + "/" + "Module-%%-%%-%%-%%-%%-%%-%%.txt");
174-
if (auto Err = sys::fs::createUniqueFile(LogFile, Fd, FilePath)) {
175-
dbgs() << LogFile << "\n";
174+
SmallString<0> PathTemplate;
175+
SmallString<0> RealPath;
176+
sys::path::append(PathTemplate, LogDir, "Module-%%-%%-%%-%%-%%-%%-%%.txt");
177+
if (auto Err =
178+
sys::fs::createUniqueFile(PathTemplate.str(), Fd, RealPath)) {
176179
std::string Msg =
177180
"Failed to create log file at '" + LogDir + "': " + Err.message();
178181
report_fatal_error(StringRef(Msg),
179182
/*CrashDiag=*/false);
180183
}
181184

182-
FileOS = std::make_unique<raw_fd_ostream>(Fd, /*shouldClose*/ true);
185+
FileOS = std::make_unique<raw_fd_ostream>(Fd, /*shouldClose=*/true);
183186
}
184187

185188
bool hasLogFile() const { return FileOS != nullptr; }
@@ -304,15 +307,23 @@ static void addAllDependencies(SplitModuleLogger &SML, const CallGraph &CG,
304307
SmallVector<const Function *> WorkList({&Fn});
305308
while (!WorkList.empty()) {
306309
const auto &CurFn = *WorkList.pop_back_val();
310+
assert(!CurFn.isDeclaration());
307311

308312
// Scan for an indirect call. If such a call is found, we have to
309313
// conservatively assume this can call all non-entrypoint functions in the
310314
// module.
311-
for (const auto &BB : CurFn) {
312-
for (const auto &I : BB) {
313-
const auto *CB = dyn_cast<CallBase>(&I);
314-
if (!CB || !CB->isIndirectCall())
315-
continue;
315+
316+
for (auto &CGEntry : *CG[&CurFn]) {
317+
auto *CGNode = CGEntry.second;
318+
auto *Callee = CGNode->getFunction();
319+
if (!Callee) {
320+
// Functions have an edge towards CallsExternalNode if they're external
321+
// declarations, or if they do an indirect call. As we only process
322+
// definitions here, we know this means the function has an indirect
323+
// call. We then have to conservatively assume this can call all
324+
// non-entrypoint functions in the module.
325+
if (CGNode != CG.getCallsExternalNode())
326+
continue; // this is another function-less node we don't care about.
316327

317328
SML << "Indirect call detected in " << getName(CurFn)
318329
<< " - treating all non-entrypoint functions as "
@@ -323,15 +334,8 @@ static void addAllDependencies(SplitModuleLogger &SML, const CallGraph &CG,
323334
HadIndirectCall = true;
324335
return;
325336
}
326-
}
327-
328-
for (auto &CGEntry : *CG[&CurFn]) {
329-
auto *Callee = CGEntry.second->getFunction();
330-
if (!Callee)
331-
continue;
332337

333338
assert(!AMDGPU::isKernelCC(Callee));
334-
335339
if (Callee->isDeclaration())
336340
continue;
337341

@@ -399,7 +403,7 @@ static float calculateOverlap(const DenseSet<const Function *> &A,
399403
++NumCommon;
400404
}
401405

402-
return float(NumCommon) / Total.size();
406+
return static_cast<float>(NumCommon) / Total.size();
403407
}
404408

405409
/// Performs all of the partitioning work on \p M.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
4+
5+
; 3 kernels:
6+
; - A calls nothing
7+
; - B calls @PerryThePlatypus
8+
; - C calls @Perry, an alias of @PerryThePlatypus
9+
;
10+
; We should see through the alias and put B/C in the same
11+
; partition.
12+
;
13+
; Additionally, @PerryThePlatypus gets externalized as
14+
; the alias counts as taking its address.
15+
16+
; CHECK0-NOT: define
17+
; CHECK0: @Perry = internal alias ptr (), ptr @PerryThePlatypus
18+
; CHECK0: define hidden void @PerryThePlatypus()
19+
; CHECK0: define amdgpu_kernel void @B
20+
; CHECK0: define amdgpu_kernel void @C
21+
; CHECK0-NOT: define
22+
23+
; CHECK1-NOT: define
24+
; CHECK1: define amdgpu_kernel void @A
25+
; CHECK1-NOT: define
26+
27+
@Perry = internal alias ptr(), ptr @PerryThePlatypus
28+
29+
define internal void @PerryThePlatypus() {
30+
ret void
31+
}
32+
33+
define amdgpu_kernel void @A() {
34+
ret void
35+
}
36+
37+
define amdgpu_kernel void @B() {
38+
call void @PerryThePlatypus()
39+
ret void
40+
}
41+
42+
define amdgpu_kernel void @C() {
43+
call void @Perry()
44+
ret void
45+
}

0 commit comments

Comments
 (0)