Skip to content

Commit 83ad90d

Browse files
committed
[AMDGPU] Fix module split's assumption on kernels
Module split assumes that a kernel function must have an external linkage; however, that isn't the case. For example, a static kernel function will have a weak_odr linkage Change-Id: I1e5dee0de1fd866b365f4090a574e1b2961f8dca
1 parent 3c47e63 commit 83ad90d

File tree

2 files changed

+99
-6
lines changed

2 files changed

+99
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,12 @@ static auto formatRatioOf(CostType Num, CostType Dem) {
158158
/// Non-copyable functions cannot be cloned into multiple partitions, and only
159159
/// one copy of the function can be present across all partitions.
160160
///
161-
/// External functions fall into this category. If we were to clone them, we
162-
/// would end up with multiple symbol definitions and a very unhappy linker.
161+
/// Kernel functions and external functions fall into this category. If we were
162+
/// to clone them, we would end up with multiple symbol definitions and a very
163+
/// unhappy linker.
163164
static bool isNonCopyable(const Function &F) {
164-
assert(AMDGPU::isEntryFunctionCC(F.getCallingConv())
165-
? F.hasExternalLinkage()
166-
: true && "Kernel w/o external linkage?");
167-
return F.hasExternalLinkage() || !F.isDefinitionExact();
165+
return F.hasExternalLinkage() || !F.isDefinitionExact() ||
166+
AMDGPU::isEntryFunctionCC(F.getCallingConv());
168167
}
169168

170169
/// If \p GV has local linkage, make it external + hidden.
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4+
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
5+
6+
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
7+
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
8+
; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 --implicit-check-not=define %s
9+
; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 --implicit-check-not=define %s
10+
11+
; 2 kernels (A/B) are large and share all their dependencies.
12+
; They should go in the same partition, the remaining kernel should
13+
; go somewhere else, and one partition should be empty.
14+
;
15+
; Also check w/o large kernels processing to verify they are indeed handled
16+
; differently.
17+
18+
; P0 is empty
19+
; CHECK0: declare
20+
21+
; CHECK1: define internal void @HelperC()
22+
; CHECK1: define weak_odr amdgpu_kernel void @C
23+
24+
; CHECK2: define internal void @large2()
25+
; CHECK2: define internal void @large1()
26+
; CHECK2: define internal void @large0()
27+
; CHECK2: define internal void @HelperA()
28+
; CHECK2: define internal void @HelperB()
29+
; CHECK2: define amdgpu_kernel void @A
30+
; CHECK2: define weak_odr amdgpu_kernel void @B
31+
32+
; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
33+
; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C
34+
35+
; NOLARGEKERNELS-CHECK1: define internal void @large2()
36+
; NOLARGEKERNELS-CHECK1: define internal void @large1()
37+
; NOLARGEKERNELS-CHECK1: define internal void @large0()
38+
; NOLARGEKERNELS-CHECK1: define internal void @HelperB()
39+
; NOLARGEKERNELS-CHECK1: define weak_odr amdgpu_kernel void @B
40+
41+
; NOLARGEKERNELS-CHECK2: define internal void @large2()
42+
; NOLARGEKERNELS-CHECK2: define internal void @large1()
43+
; NOLARGEKERNELS-CHECK2: define internal void @large0()
44+
; NOLARGEKERNELS-CHECK2: define internal void @HelperA()
45+
; NOLARGEKERNELS-CHECK2: define amdgpu_kernel void @A
46+
47+
48+
define internal void @large2() {
49+
store volatile i32 42, ptr null
50+
call void @large2()
51+
ret void
52+
}
53+
54+
define internal void @large1() {
55+
call void @large1()
56+
call void @large2()
57+
ret void
58+
}
59+
60+
define internal void @large0() {
61+
call void @large0()
62+
call void @large1()
63+
call void @large2()
64+
ret void
65+
}
66+
67+
define internal void @HelperA() {
68+
call void @large0()
69+
ret void
70+
}
71+
72+
define internal void @HelperB() {
73+
call void @large0()
74+
ret void
75+
}
76+
77+
define amdgpu_kernel void @A() {
78+
call void @HelperA()
79+
ret void
80+
}
81+
82+
define weak_odr amdgpu_kernel void @B() {
83+
call void @HelperB()
84+
ret void
85+
}
86+
87+
define internal void @HelperC() {
88+
ret void
89+
}
90+
91+
define weak_odr amdgpu_kernel void @C() {
92+
call void @HelperC()
93+
ret void
94+
}

0 commit comments

Comments
 (0)