|
| 1 | +; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5 |
| 2 | +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s |
| 3 | +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s |
| 4 | +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s |
| 5 | + |
| 6 | +; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0 |
| 7 | +; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s |
| 8 | +; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 --implicit-check-not=define %s |
| 9 | +; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 --implicit-check-not=define %s |
| 10 | + |
| 11 | +; 2 kernels (A/B) are large and share all their dependencies. |
| 12 | +; They should go in the same partition, the remaining kernel should |
| 13 | +; go somewhere else, and one partition should be empty. |
| 14 | +; |
| 15 | +; Also check w/o large kernels processing to verify they are indeed handled |
| 16 | +; differently. |
| 17 | + |
| 18 | +; P0 is empty |
| 19 | +; CHECK0: declare |
| 20 | + |
| 21 | +; CHECK1: define internal void @HelperC() |
| 22 | +; CHECK1: define weak_odr amdgpu_kernel void @C |
| 23 | + |
| 24 | +; CHECK2: define internal void @large2() |
| 25 | +; CHECK2: define internal void @large1() |
| 26 | +; CHECK2: define internal void @large0() |
| 27 | +; CHECK2: define internal void @HelperA() |
| 28 | +; CHECK2: define internal void @HelperB() |
| 29 | +; CHECK2: define amdgpu_kernel void @A |
| 30 | +; CHECK2: define weak_odr amdgpu_kernel void @B |
| 31 | + |
| 32 | +; NOLARGEKERNELS-CHECK0: define internal void @HelperC() |
| 33 | +; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C |
| 34 | + |
| 35 | +; NOLARGEKERNELS-CHECK1: define internal void @large2() |
| 36 | +; NOLARGEKERNELS-CHECK1: define internal void @large1() |
| 37 | +; NOLARGEKERNELS-CHECK1: define internal void @large0() |
| 38 | +; NOLARGEKERNELS-CHECK1: define internal void @HelperB() |
| 39 | +; NOLARGEKERNELS-CHECK1: define weak_odr amdgpu_kernel void @B |
| 40 | + |
| 41 | +; NOLARGEKERNELS-CHECK2: define internal void @large2() |
| 42 | +; NOLARGEKERNELS-CHECK2: define internal void @large1() |
| 43 | +; NOLARGEKERNELS-CHECK2: define internal void @large0() |
| 44 | +; NOLARGEKERNELS-CHECK2: define internal void @HelperA() |
| 45 | +; NOLARGEKERNELS-CHECK2: define amdgpu_kernel void @A |
| 46 | + |
| 47 | + |
| 48 | +define internal void @large2() { |
| 49 | + store volatile i32 42, ptr null |
| 50 | + call void @large2() |
| 51 | + ret void |
| 52 | +} |
| 53 | + |
| 54 | +define internal void @large1() { |
| 55 | + call void @large1() |
| 56 | + call void @large2() |
| 57 | + ret void |
| 58 | +} |
| 59 | + |
| 60 | +define internal void @large0() { |
| 61 | + call void @large0() |
| 62 | + call void @large1() |
| 63 | + call void @large2() |
| 64 | + ret void |
| 65 | +} |
| 66 | + |
| 67 | +define internal void @HelperA() { |
| 68 | + call void @large0() |
| 69 | + ret void |
| 70 | +} |
| 71 | + |
| 72 | +define internal void @HelperB() { |
| 73 | + call void @large0() |
| 74 | + ret void |
| 75 | +} |
| 76 | + |
| 77 | +define amdgpu_kernel void @A() { |
| 78 | + call void @HelperA() |
| 79 | + ret void |
| 80 | +} |
| 81 | + |
| 82 | +define weak_odr amdgpu_kernel void @B() { |
| 83 | + call void @HelperB() |
| 84 | + ret void |
| 85 | +} |
| 86 | + |
| 87 | +define internal void @HelperC() { |
| 88 | + ret void |
| 89 | +} |
| 90 | + |
| 91 | +define weak_odr amdgpu_kernel void @C() { |
| 92 | + call void @HelperC() |
| 93 | + ret void |
| 94 | +} |
0 commit comments