Skip to content

Revert: [AMDGPU] Graph-based Module Splitting Rewrite (llvm#104763) #106707

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,803 changes: 530 additions & 1,273 deletions llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; 3 kernels:
; - A does a direct call to HelperA
; - B is storing @HelperA
; - C does a direct call to HelperA
;
; The helper functions will get externalized, so C/A will end up
; in the same partition.

; P0 is empty.
; CHECK0: declare

; CHECK1: define amdgpu_kernel void @B(ptr %dst)

; CHECK2: define hidden void @HelperA()
; CHECK2: define amdgpu_kernel void @A()
; CHECK2: define amdgpu_kernel void @C()
; The helper functions will get externalized, which will force A and C into P0 as
; external functions cannot be duplicated.

; CHECK0: define hidden void @HelperA()
; CHECK0: define amdgpu_kernel void @A()
; CHECK0: declare amdgpu_kernel void @B(ptr)
; CHECK0: define amdgpu_kernel void @C()

; CHECK1: declare hidden void @HelperA()
; CHECK1: declare amdgpu_kernel void @A()
; CHECK1: declare amdgpu_kernel void @B(ptr)
; CHECK1: declare amdgpu_kernel void @C()

; CHECK2: declare hidden void @HelperA()
; CHECK2: declare amdgpu_kernel void @A()
; CHECK2: define amdgpu_kernel void @B(ptr %dst)
; CHECK2: declare amdgpu_kernel void @C()

define internal void @HelperA() {
ret void
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s

Expand Down
20 changes: 20 additions & 0 deletions llvm/test/tools/llvm-split/AMDGPU/debug-name-hiding.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -debug -amdgpu-module-splitting-log-private 2>&1 | FileCheck %s --implicit-check-not=MyCustomKernel
; REQUIRES: asserts

; SHA256 of the kernel names.

; CHECK: a097723d21cf9f35d90e6fb7881995ac8c398b3366a6c97efc657404f9fe301c
; CHECK: 626bc23242de8fcfda7f0e66318d29455c081df6b5380e64d14703c95fcbcd59
; CHECK: c38d90a7ca71dc5d694bb9e093dadcdedfc4cb4adf7ed7e46d42fe95a0b4ef55

define amdgpu_kernel void @MyCustomKernel0() {
ret void
}

define amdgpu_kernel void @MyCustomKernel1() {
ret void
}

define amdgpu_kernel void @MyCustomKernel2() {
ret void
}
36 changes: 36 additions & 0 deletions llvm/test/tools/llvm-split/AMDGPU/debug-non-kernel-root.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -debug 2>&1 | FileCheck %s --implicit-check-not="[root]"
; REQUIRES: asserts

; func_3 is never directly called, it needs to be considered
; as a root to handle this module correctly.

; CHECK: [root] kernel_1
; CHECK-NEXT: [dependency] func_1
; CHECK-NEXT: [dependency] func_2
; CHECK-NEXT: [root] func_3
; CHECK-NEXT: [dependency] func_2

define amdgpu_kernel void @kernel_1() {
entry:
call void @func_1()
ret void
}

define linkonce_odr hidden void @func_1() {
entry:
%call = call i32 @func_2()
ret void
}

define linkonce_odr hidden i32 @func_2() #0 {
entry:
ret i32 0
}

define void @func_3() {
entry:
%call = call i32 @func_2()
ret void
}

attributes #0 = { noinline optnone }
9 changes: 6 additions & 3 deletions llvm/test/tools/llvm-split/AMDGPU/declarations.ll
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
; RUN: rm -rf %t0 %t1
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: not llvm-dis -o - %t1
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s

; Empty module without any defs should result in a single output module that is
; an exact copy of the input.
; Check that all declarations are put into each partition.

; CHECK0: declare void @A
; CHECK0: declare void @B

; CHECK1: declare void @A
; CHECK1: declare void @B

declare void @A()

declare void @B()
18 changes: 11 additions & 7 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s

; 3 kernels:
; - A calls nothing
Expand All @@ -13,12 +13,16 @@
; Additionally, @PerryThePlatypus gets externalized as
; the alias counts as taking its address.

; CHECK0: define amdgpu_kernel void @A
; CHECK0-NOT: define
; CHECK0: @Perry = internal alias ptr (), ptr @PerryThePlatypus
; CHECK0: define hidden void @PerryThePlatypus()
; CHECK0: define amdgpu_kernel void @B
; CHECK0: define amdgpu_kernel void @C
; CHECK0-NOT: define

; CHECK1: @Perry = internal alias ptr (), ptr @PerryThePlatypus
; CHECK1: define hidden void @PerryThePlatypus()
; CHECK1: define amdgpu_kernel void @B
; CHECK1: define amdgpu_kernel void @C
; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @A
; CHECK1-NOT: define

@Perry = internal alias ptr(), ptr @PerryThePlatypus

Expand Down
12 changes: 9 additions & 3 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-cost-ranking.ll
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; 3 kernels with each their own dependencies should go into 3
; distinct partitions. The most expensive kernel should be
; seen first and go into the last partition.

; CHECK0-NOT: define
; CHECK0: define amdgpu_kernel void @C
; CHECK0: define internal void @HelperC
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @A
; CHECK1: define internal void @HelperA
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: define amdgpu_kernel void @B
; CHECK2: define internal void @HelperB
; CHECK2-NOT: define


define amdgpu_kernel void @A() {
call void @HelperA()
Expand Down
33 changes: 21 additions & 12 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-external.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
; RUN: llvm-split -o %t %s -j 4 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t3 | FileCheck --check-prefix=CHECK3 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; RUN: llvm-dis -o - %t3 | FileCheck --check-prefix=CHECK3 %s

; CHECK0: define internal void @PrivateHelper1()
; CHECK0: define amdgpu_kernel void @D
; Both overridable helper should go in P0.

; CHECK1: define internal void @PrivateHelper0()
; CHECK1: define amdgpu_kernel void @C
; CHECK0-NOT: define
; CHECK0: define available_externally void @OverridableHelper0()
; CHECK0: define internal void @OverridableHelper1()
; CHECK0: define amdgpu_kernel void @A
; CHECK0: define amdgpu_kernel void @B
; CHECK0-NOT: define

; CHECK2: define internal void @OverridableHelper1()
; CHECK2: define amdgpu_kernel void @B
; CHECK1-NOT: define

; CHECK3: define available_externally void @OverridableHelper0()
; CHECK3: define amdgpu_kernel void @A
; CHECK2-NOT: define
; CHECK2: define internal void @PrivateHelper1()
; CHECK2: define amdgpu_kernel void @D
; CHECK2-NOT: define

; CHECK3-NOT: define
; CHECK3: define internal void @PrivateHelper0()
; CHECK3: define amdgpu_kernel void @C
; CHECK3-NOT: define

define available_externally void @OverridableHelper0() {
ret void
Expand Down
30 changes: 18 additions & 12 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; We have 4 kernels:
; - Each kernel has an internal helper
Expand All @@ -15,19 +15,25 @@
; indirect call. HelperC/D should also end up in P0 as they
; are dependencies of HelperB.

; CHECK0-NOT: define
; CHECK0: define hidden void @HelperA
; CHECK0: define hidden void @HelperB
; CHECK0: define hidden void @CallCandidate
; CHECK0: define internal void @HelperC
; CHECK0: define internal void @HelperD
; CHECK0: define amdgpu_kernel void @D
; CHECK0: define amdgpu_kernel void @A
; CHECK0: define amdgpu_kernel void @B
; CHECK0-NOT: define

; CHECK1: define internal void @HelperC
; CHECK1: define amdgpu_kernel void @C
; CHECK1-NOT: define
; CHECK1: define internal void @HelperD
; CHECK1: define amdgpu_kernel void @D
; CHECK1-NOT: define

; CHECK2: define hidden void @HelperA
; CHECK2: define hidden void @HelperB
; CHECK2: define hidden void @CallCandidate
; CHECK2-NOT: define
; CHECK2: define internal void @HelperC
; CHECK2: define internal void @HelperD
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @B
; CHECK2: define amdgpu_kernel void @C
; CHECK2-NOT: define

@addrthief = global [3 x ptr] [ptr @HelperA, ptr @HelperB, ptr @CallCandidate]

Expand Down
28 changes: 17 additions & 11 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-overridable.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; CHECK0: define amdgpu_kernel void @D

; CHECK1: define amdgpu_kernel void @C

; CHECK2: define void @ExternalHelper
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @B
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; CHECK0-NOT: define
; CHECK0: define void @ExternalHelper
; CHECK0: define amdgpu_kernel void @A
; CHECK0: define amdgpu_kernel void @B
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @D
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: define amdgpu_kernel void @C
; CHECK2-NOT: define

define void @ExternalHelper() {
ret void
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-globals
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; 3 kernels use private/internal global variables.
; The GVs should be copied in each partition as needed.

; CHECK0-NOT: define
; CHECK0: @bar = internal constant ptr
; CHECK0: define amdgpu_kernel void @C
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: @foo = private constant ptr
; CHECK1: define amdgpu_kernel void @A
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: @foo = private constant ptr
; CHECK2: @bar = internal constant ptr
; CHECK2: define amdgpu_kernel void @B
; CHECK2-NOT: define

@foo = private constant ptr poison
@bar = internal constant ptr poison
Expand Down
12 changes: 9 additions & 3 deletions llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; 3 kernels use private/internal global variables.
; The GVs should be copied in each partition as needed.

; CHECK0-NOT: define
; CHECK0: @foo = hidden constant ptr poison
; CHECK0: @bar = hidden constant ptr poison
; CHECK0: define amdgpu_kernel void @C
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: @foo = external hidden constant ptr{{$}}
; CHECK1: @bar = external hidden constant ptr{{$}}
; CHECK1: define amdgpu_kernel void @A
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: @foo = external hidden constant ptr{{$}}
; CHECK2: @bar = external hidden constant ptr{{$}}
; CHECK2: define amdgpu_kernel void @B
; CHECK2-NOT: define

@foo = private constant ptr poison
@bar = internal constant ptr poison
Expand Down
Loading
Loading