Skip to content

[AMDGPU] Graph-based Module Splitting Rewrite #104763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,802 changes: 1,272 additions & 530 deletions llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,30 +1,24 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; 3 kernels:
; - A does a direct call to HelperA
; - B is storing @HelperA
; - C does a direct call to HelperA
;
; The helper functions will get externalized, which will force A and C into P0 as
; external functions cannot be duplicated.

; CHECK0: define hidden void @HelperA()
; CHECK0: define amdgpu_kernel void @A()
; CHECK0: declare amdgpu_kernel void @B(ptr)
; CHECK0: define amdgpu_kernel void @C()

; CHECK1: declare hidden void @HelperA()
; CHECK1: declare amdgpu_kernel void @A()
; CHECK1: declare amdgpu_kernel void @B(ptr)
; CHECK1: declare amdgpu_kernel void @C()

; CHECK2: declare hidden void @HelperA()
; CHECK2: declare amdgpu_kernel void @A()
; CHECK2: define amdgpu_kernel void @B(ptr %dst)
; CHECK2: declare amdgpu_kernel void @C()
; The helper functions will get externalized, so C/A will end up
; in the same partition.

; P0 is empty.
; CHECK0: declare

; CHECK1: define amdgpu_kernel void @B(ptr %dst)

; CHECK2: define hidden void @HelperA()
; CHECK2: define amdgpu_kernel void @A()
; CHECK2: define amdgpu_kernel void @C()

define internal void @HelperA() {
ret void
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s

Expand Down
20 changes: 0 additions & 20 deletions llvm/test/tools/llvm-split/AMDGPU/debug-name-hiding.ll

This file was deleted.

36 changes: 0 additions & 36 deletions llvm/test/tools/llvm-split/AMDGPU/debug-non-kernel-root.ll

This file was deleted.

9 changes: 3 additions & 6 deletions llvm/test/tools/llvm-split/AMDGPU/declarations.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: not llvm-dis -o - %t1

; Check that all declarations are put into each partition.
; Empty module without any defs should result in a single output module that is
; an exact copy of the input.

; CHECK0: declare void @A
; CHECK0: declare void @B

; CHECK1: declare void @A
; CHECK1: declare void @B

declare void @A()

declare void @B()
18 changes: 7 additions & 11 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s

; 3 kernels:
; - A calls nothing
Expand All @@ -13,16 +13,12 @@
; Additionally, @PerryThePlatypus gets externalized as
; the alias counts as taking its address.

; CHECK0-NOT: define
; CHECK0: @Perry = internal alias ptr (), ptr @PerryThePlatypus
; CHECK0: define hidden void @PerryThePlatypus()
; CHECK0: define amdgpu_kernel void @B
; CHECK0: define amdgpu_kernel void @C
; CHECK0-NOT: define
; CHECK0: define amdgpu_kernel void @A

; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @A
; CHECK1-NOT: define
; CHECK1: @Perry = internal alias ptr (), ptr @PerryThePlatypus
; CHECK1: define hidden void @PerryThePlatypus()
; CHECK1: define amdgpu_kernel void @B
; CHECK1: define amdgpu_kernel void @C

@Perry = internal alias ptr(), ptr @PerryThePlatypus

Expand Down
12 changes: 3 additions & 9 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-cost-ranking.ll
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; 3 kernels with each their own dependencies should go into 3
; distinct partitions. The most expensive kernel should be
; seen first and go into the last partition.

; CHECK0-NOT: define
; CHECK0: define amdgpu_kernel void @C
; CHECK0: define internal void @HelperC
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @A
; CHECK1: define internal void @HelperA
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: define amdgpu_kernel void @B
; CHECK2: define internal void @HelperB
; CHECK2-NOT: define


define amdgpu_kernel void @A() {
call void @HelperA()
Expand Down
33 changes: 12 additions & 21 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-external.ll
Original file line number Diff line number Diff line change
@@ -1,29 +1,20 @@
; RUN: llvm-split -o %t %s -j 4 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-dis -o - %t3 | FileCheck --check-prefix=CHECK3 %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t3 | FileCheck --check-prefix=CHECK3 --implicit-check-not=define %s

; Both overridable helper should go in P0.
; CHECK0: define internal void @PrivateHelper1()
; CHECK0: define amdgpu_kernel void @D

; CHECK0-NOT: define
; CHECK0: define available_externally void @OverridableHelper0()
; CHECK0: define internal void @OverridableHelper1()
; CHECK0: define amdgpu_kernel void @A
; CHECK0: define amdgpu_kernel void @B
; CHECK0-NOT: define
; CHECK1: define internal void @PrivateHelper0()
; CHECK1: define amdgpu_kernel void @C

; CHECK1-NOT: define
; CHECK2: define internal void @OverridableHelper1()
; CHECK2: define amdgpu_kernel void @B

; CHECK2-NOT: define
; CHECK2: define internal void @PrivateHelper1()
; CHECK2: define amdgpu_kernel void @D
; CHECK2-NOT: define

; CHECK3-NOT: define
; CHECK3: define internal void @PrivateHelper0()
; CHECK3: define amdgpu_kernel void @C
; CHECK3-NOT: define
; CHECK3: define available_externally void @OverridableHelper0()
; CHECK3: define amdgpu_kernel void @A

define available_externally void @OverridableHelper0() {
ret void
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; We have 4 kernels:
; - Each kernel has an internal helper
Expand All @@ -15,25 +15,19 @@
; indirect call. HelperC/D should also end up in P0 as they
; are dependencies of HelperB.

; CHECK0-NOT: define
; CHECK0: define hidden void @HelperA
; CHECK0: define hidden void @HelperB
; CHECK0: define hidden void @CallCandidate
; CHECK0: define internal void @HelperC
; CHECK0: define internal void @HelperD
; CHECK0: define amdgpu_kernel void @A
; CHECK0: define amdgpu_kernel void @B
; CHECK0-NOT: define
; CHECK0: define amdgpu_kernel void @D

; CHECK1-NOT: define
; CHECK1: define internal void @HelperD
; CHECK1: define amdgpu_kernel void @D
; CHECK1-NOT: define
; CHECK1: define internal void @HelperC
; CHECK1: define amdgpu_kernel void @C

; CHECK2-NOT: define
; CHECK2: define hidden void @HelperA
; CHECK2: define hidden void @HelperB
; CHECK2: define hidden void @CallCandidate
; CHECK2: define internal void @HelperC
; CHECK2: define amdgpu_kernel void @C
; CHECK2-NOT: define
; CHECK2: define internal void @HelperD
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @B

@addrthief = global [3 x ptr] [ptr @HelperA, ptr @HelperB, ptr @CallCandidate]

Expand Down
28 changes: 11 additions & 17 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-overridable.ll
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; CHECK0-NOT: define
; CHECK0: define void @ExternalHelper
; CHECK0: define amdgpu_kernel void @A
; CHECK0: define amdgpu_kernel void @B
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @D
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: define amdgpu_kernel void @C
; CHECK2-NOT: define
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; CHECK0: define amdgpu_kernel void @D

; CHECK1: define amdgpu_kernel void @C

; CHECK2: define void @ExternalHelper
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @B

define void @ExternalHelper() {
ret void
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,20 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-globals
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; 3 kernels use private/internal global variables.
; The GVs should be copied in each partition as needed.

; CHECK0-NOT: define
; CHECK0: @bar = internal constant ptr
; CHECK0: define amdgpu_kernel void @C
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: @foo = private constant ptr
; CHECK1: define amdgpu_kernel void @A
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: @foo = private constant ptr
; CHECK2: @bar = internal constant ptr
; CHECK2: define amdgpu_kernel void @B
; CHECK2-NOT: define

@foo = private constant ptr poison
@bar = internal constant ptr poison
Expand Down
12 changes: 3 additions & 9 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; 3 kernels use private/internal global variables.
; The GVs should be copied in each partition as needed.

; CHECK0-NOT: define
; CHECK0: @foo = hidden constant ptr poison
; CHECK0: @bar = hidden constant ptr poison
; CHECK0: define amdgpu_kernel void @C
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: @foo = external hidden constant ptr{{$}}
; CHECK1: @bar = external hidden constant ptr{{$}}
; CHECK1: define amdgpu_kernel void @A
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: @foo = external hidden constant ptr{{$}}
; CHECK2: @bar = external hidden constant ptr{{$}}
; CHECK2: define amdgpu_kernel void @B
; CHECK2-NOT: define

@foo = private constant ptr poison
@bar = internal constant ptr poison
Expand Down
Loading
Loading