Skip to content

Commit dcc27ea

Browse files
authored
[LinkerWrapper] Always pass -flto if the linker supports it (#102972)
Summary; Now that we use the linker to do LTO / device linking, we need to inform the `clang` invocation to use `-flto` so it forwards arguments like `-On` correctly.
1 parent afd42fb commit dcc27ea

File tree

6 files changed

+21
-18
lines changed

6 files changed

+21
-18
lines changed

clang/test/Driver/linker-wrapper.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ __attribute__((visibility("protected"), used)) int x;
2121
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
2222
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK
2323

24-
// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o
24+
// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o
2525

2626
// RUN: clang-offload-packager -o %t.out \
2727
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x;
3030
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \
3131
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG
3232

33-
// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o -g
33+
// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g
3434

3535
// RUN: clang-offload-packager -o %t.out \
3636
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
@@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x;
3939
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
4040
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK
4141

42-
// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
42+
// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
4343

4444
// RUN: clang-offload-packager -o %t.out \
4545
// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x;
4848
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \
4949
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS
5050

51-
// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps
51+
// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps
5252

5353
// RUN: clang-offload-packager -o %t.out \
5454
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x;
5959
// RUN: --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \
6060
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
6161

62-
// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
62+
// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
6363

6464
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
6565
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \
@@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x;
148148
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \
149149
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND
150150

151-
// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o
151+
// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o
152152

153153
// RUN: clang-offload-packager -o %t.out \
154154
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x;
171171
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
172172
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
173173

174-
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
175-
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
174+
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
175+
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
176176

177177
// RUN: clang-offload-packager -o %t-lib.out \
178178
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -187,8 +187,8 @@ __attribute__((visibility("protected"), used)) int x;
187187
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
188188
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
189189

190-
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
191-
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
190+
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
191+
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
192192

193193
// RUN: clang-offload-packager -o %t.out \
194194
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
527527

528528
// Forward all of the `--offload-opt` and similar options to the device.
529529
if (linkerSupportsLTO(Args)) {
530+
CmdArgs.push_back("-flto");
530531
for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
531532
CmdArgs.append(
532533
{"-Xlinker",

offload/test/api/omp_dynamic_shared_memory_amdgpu.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm -openmp-opt-inline-device
1+
// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm \
2+
// RUN: -openmp-opt-inline-device
23
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
34
// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
45
// REQUIRES: amdgcn-amd-amdhsa

offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm -openmp-opt-inline-device -I %S
1+
// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm \
2+
// RUN: -openmp-opt-inline-device -I %S
23
// RUN: env LIBOMPTARGET_NEXTGEN_PLUGINS=1 \
34
// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
45
// REQUIRES: amdgcn-amd-amdhsa

offload/test/offloading/bug51781.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
// SPMDize. There is no main thread, so there's no issue.
77
//
8-
// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt > %t.spmd 2>&1
8+
// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt > %t.spmd 2>&1
99
// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd
1010
// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd
1111
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
@@ -15,7 +15,7 @@
1515
// Use the custom state machine, which must avoid the same barrier problem as
1616
// the generic state machine.
1717
//
18-
// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt \
18+
// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt \
1919
// RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
2020
// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
2121
// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
@@ -24,7 +24,7 @@
2424
// Repeat with reduction clause, which has managed to break the custom state
2525
// machine in the past.
2626
//
27-
// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt -DADD_REDUCTION \
27+
// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt -DADD_REDUCTION \
2828
// RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
2929
// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
3030
// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom

offload/test/offloading/bug51982.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
// RUN: %libomptarget-compile-generic -O1 && %libomptarget-run-generic
2-
// -O1 to run openmp-opt
3-
// RUN: %libomptarget-compileopt-generic -O1 && %libomptarget-run-generic
1+
// RUN: %libomptarget-compile-generic -O2 && %libomptarget-run-generic
2+
// -O2 to run openmp-opt
3+
// RUN: %libomptarget-compileopt-generic -O2 && %libomptarget-run-generic
44

55
int main(void) {
66
long int aa = 0;

0 commit comments

Comments
 (0)