Skip to content

Commit 2e18c94

Browse files
authored
[Clang] Forward arguments to the device compiler better (#125957)
Summary: Currently we have a subset of arguments that are handled specially to keep them consistent between host and device compiles, however, this is extremely hacky as it only works on a few predetermined options. This is a holdover from the days before the linker wrapper shuttled all of its arguments through `clang`. Now that we just use clang, all we need to do is just use the `--device-compiler=` option to forward it there and let the normal toolchain handle it. For example, ```console clang -fopenmp --offload-arch=gfx1030,sm_89 -Xarch_nvptx64 -O3 -foffload-lto ``` will forward the `-O3` to the LTO compilation only for the NVPTX compilation.
1 parent 17952b3 commit 2e18c94

File tree

4 files changed

+68
-79
lines changed

4 files changed

+68
-79
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 62 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -9151,81 +9151,78 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
91519151
const InputInfoList &Inputs,
91529152
const ArgList &Args,
91539153
const char *LinkingOutput) const {
9154-
const Driver &D = getToolChain().getDriver();
9155-
const llvm::Triple TheTriple = getToolChain().getTriple();
9156-
ArgStringList CmdArgs;
9154+
using namespace options;
9155+
9156+
// A list of permitted options that will be forwarded to the embedded device
9157+
// compilation job.
9158+
const llvm::DenseSet<unsigned> CompilerOptions{
9159+
OPT_v,
9160+
OPT_cuda_path_EQ,
9161+
OPT_rocm_path_EQ,
9162+
OPT_O_Group,
9163+
OPT_g_Group,
9164+
OPT_g_flags_Group,
9165+
OPT_R_value_Group,
9166+
OPT_R_Group,
9167+
OPT_Xcuda_ptxas,
9168+
OPT_ftime_report,
9169+
OPT_ftime_trace,
9170+
OPT_ftime_trace_EQ,
9171+
OPT_ftime_trace_granularity_EQ,
9172+
OPT_ftime_trace_verbose,
9173+
OPT_opt_record_file,
9174+
OPT_opt_record_format,
9175+
OPT_opt_record_passes,
9176+
OPT_fsave_optimization_record,
9177+
OPT_fsave_optimization_record_EQ,
9178+
OPT_fno_save_optimization_record,
9179+
OPT_foptimization_record_file_EQ,
9180+
OPT_foptimization_record_passes_EQ,
9181+
OPT_save_temps,
9182+
OPT_mcode_object_version_EQ,
9183+
OPT_load,
9184+
OPT_fno_lto,
9185+
OPT_flto,
9186+
OPT_flto_EQ};
9187+
const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm};
9188+
auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A) {
9189+
return Set.contains(A->getOption().getID()) ||
9190+
(A->getOption().getGroup().isValid() &&
9191+
Set.contains(A->getOption().getGroup().getID()));
9192+
};
91579193

9158-
// Pass the CUDA path to the linker wrapper tool.
9194+
ArgStringList CmdArgs;
91599195
for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
91609196
auto TCRange = C.getOffloadToolChains(Kind);
91619197
for (auto &I : llvm::make_range(TCRange)) {
91629198
const ToolChain *TC = I.second;
9163-
if (TC->getTriple().isNVPTX()) {
9164-
CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
9165-
if (CudaInstallation.isValid())
9166-
CmdArgs.push_back(Args.MakeArgString(
9167-
"--cuda-path=" + CudaInstallation.getInstallPath()));
9168-
break;
9199+
9200+
// We do not use a bound architecture here so options passed only to a
9201+
// specific architecture via -Xarch_<cpu> will not be forwarded.
9202+
ArgStringList CompilerArgs;
9203+
ArgStringList LinkerArgs;
9204+
for (Arg *A : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
9205+
if (ShouldForward(CompilerOptions, A))
9206+
A->render(Args, CompilerArgs);
9207+
else if (ShouldForward(LinkerOptions, A))
9208+
A->render(Args, LinkerArgs);
91699209
}
9170-
}
9171-
}
91729210

9173-
// Pass in the optimization level to use for LTO.
9174-
if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
9175-
StringRef OOpt;
9176-
if (A->getOption().matches(options::OPT_O4) ||
9177-
A->getOption().matches(options::OPT_Ofast))
9178-
OOpt = "3";
9179-
else if (A->getOption().matches(options::OPT_O)) {
9180-
OOpt = A->getValue();
9181-
if (OOpt == "g")
9182-
OOpt = "1";
9183-
else if (OOpt == "s" || OOpt == "z")
9184-
OOpt = "2";
9185-
} else if (A->getOption().matches(options::OPT_O0))
9186-
OOpt = "0";
9187-
if (!OOpt.empty())
9188-
CmdArgs.push_back(Args.MakeArgString(Twine("--opt-level=O") + OOpt));
9211+
// Forward all of these to the appropriate toolchain.
9212+
for (StringRef Arg : CompilerArgs)
9213+
CmdArgs.push_back(Args.MakeArgString(
9214+
"--device-compiler=" + TC->getTripleString() + "=" + Arg));
9215+
for (StringRef Arg : LinkerArgs)
9216+
CmdArgs.push_back(Args.MakeArgString(
9217+
"--device-linker=" + TC->getTripleString() + "=" + Arg));
9218+
}
91899219
}
91909220

91919221
CmdArgs.push_back(
9192-
Args.MakeArgString("--host-triple=" + TheTriple.getTriple()));
9222+
Args.MakeArgString("--host-triple=" + getToolChain().getTripleString()));
91939223
if (Args.hasArg(options::OPT_v))
91949224
CmdArgs.push_back("--wrapper-verbose");
91959225

9196-
if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
9197-
if (!A->getOption().matches(options::OPT_g0))
9198-
CmdArgs.push_back("--device-debug");
9199-
}
9200-
9201-
// code-object-version=X needs to be passed to clang-linker-wrapper to ensure
9202-
// that it is used by lld.
9203-
if (const Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
9204-
CmdArgs.push_back(Args.MakeArgString("-mllvm"));
9205-
CmdArgs.push_back(Args.MakeArgString(
9206-
Twine("--amdhsa-code-object-version=") + A->getValue()));
9207-
}
9208-
9209-
for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
9210-
CmdArgs.push_back(Args.MakeArgString("--ptxas-arg=" + A));
9211-
9212-
// Forward remarks passes to the LLVM backend in the wrapper.
9213-
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ))
9214-
CmdArgs.push_back(Args.MakeArgString(Twine("--offload-opt=-pass-remarks=") +
9215-
A->getValue()));
9216-
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ))
9217-
CmdArgs.push_back(Args.MakeArgString(
9218-
Twine("--offload-opt=-pass-remarks-missed=") + A->getValue()));
9219-
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ))
9220-
CmdArgs.push_back(Args.MakeArgString(
9221-
Twine("--offload-opt=-pass-remarks-analysis=") + A->getValue()));
9222-
9223-
if (Args.getLastArg(options::OPT_ftime_report))
9224-
CmdArgs.push_back("--device-compiler=-ftime-report");
9225-
9226-
if (Args.getLastArg(options::OPT_save_temps_EQ))
9227-
CmdArgs.push_back("--save-temps");
9228-
92299226
// Construct the link job so we can wrap around it.
92309227
Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput);
92319228
const auto &LinkCommand = C.getJobs().getJobs().back();
@@ -9249,12 +9246,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92499246
options::OPT_fno_openmp_target_jit, false))
92509247
CmdArgs.push_back("--embed-bitcode");
92519248

9252-
// Forward `-mllvm` arguments to the LLVM invocations if present.
9253-
for (Arg *A : Args.filtered(options::OPT_mllvm)) {
9254-
CmdArgs.push_back("-mllvm");
9255-
CmdArgs.push_back(A->getValue());
9256-
A->claim();
9257-
}
9249+
// Save temporary files created by the linker wrapper.
9250+
if (Args.hasArg(options::OPT_save_temps))
9251+
CmdArgs.push_back("--save-temps");
92589252

92599253
// Pass in the C library for GPUs if present and not disabled.
92609254
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r, options::OPT_nogpulib,
@@ -9281,11 +9275,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92819275
});
92829276
}
92839277

9284-
// If we disable the GPU C library support it needs to be forwarded to the
9285-
// link job.
9286-
if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, true))
9287-
CmdArgs.push_back("--device-compiler=-nolibc");
9288-
92899278
// Add the linker arguments to be forwarded by the wrapper.
92909279
CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") +
92919280
LinkCommand->getExecutable()));

clang/test/Driver/amdgpu-openmp-sanitize-options.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@
5656
// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-fsanitize=address".* "-x" "c".*}}
5757
// GPUSAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
5858
// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
59-
// GPUSAN: {{"[^"]*clang-linker-wrapper[^"]*" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
59+
// GPUSAN: {{"[^"]*clang-linker-wrapper[^"]*".* "--host-triple=x86_64-unknown-linux-gnu".* "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
6060

6161
// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "c".*}}
6262
// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-x" "c".*}}
6363
// NOGPUSAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
6464
// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
65-
// NOGPUSAN: {{"[^"]*clang-linker-wrapper[^"]*" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
65+
// NOGPUSAN: {{"[^"]*clang-linker-wrapper[^"]*".* "--host-triple=x86_64-unknown-linux-gnu".* "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}

clang/test/Driver/amdgpu-openmp-toolchain.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272

7373
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a \
7474
// RUN: -O3 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-OPT
75-
// CHECK-OPT: clang-linker-wrapper{{.*}}"--opt-level=O3"
75+
// CHECK-OPT: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-O3"
7676

7777
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-WARN-ATOMIC
7878
// CHECK-WARN-ATOMIC-NOT: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-Werror=atomic-alignment"
@@ -84,4 +84,4 @@
8484

8585
// RUN: %clang -### -target x86_64-pc-linux-gnu -nogpulib -fopenmp --offload-arch=gfx90a \
8686
// RUN: -ftime-report %s 2>&1 | FileCheck %s --check-prefix=CHECK-TIME-REPORT
87-
// CHECK-TIME-REPORT: clang-linker-wrapper{{.*}}"--device-compiler=-ftime-report"
87+
// CHECK-TIME-REPORT: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-ftime-report"

clang/test/Driver/openmp-offload.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,13 +184,13 @@
184184
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -g %s 2>&1 \
185185
// RUN: | FileCheck -check-prefix=CHK-NEW-DRIVER-DEBUG %s
186186

187-
// CHK-NEW-DRIVER-DEBUG: clang-linker-wrapper{{.*}} "--device-debug"
187+
// CHK-NEW-DRIVER-DEBUG: clang-linker-wrapper{{.*}} "--device-compiler=powerpc64le-ibm-linux-gnu=-g"
188188

189189
/// Check arguments to the linker wrapper
190190
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu \
191191
// RUN: -mllvm -abc %s 2>&1 | FileCheck -check-prefix=CHK-NEW-DRIVER-MLLVM %s
192192

193-
// CHK-NEW-DRIVER-MLLVM: clang-linker-wrapper{{.*}} "-abc"
193+
// CHK-NEW-DRIVER-MLLVM: clang-linker-wrapper{{.*}} "--device-linker=powerpc64le-ibm-linux-gnu=-mllvm" "--device-linker=powerpc64le-ibm-linux-gnu=-abc"
194194

195195
//
196196
// Ensure that we generate the correct bindings for '-fsyntax-only' for OpenMP.

0 commit comments

Comments
 (0)