Skip to content

[Clang] Forward arguments to the device compiler better #125957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 62 additions & 73 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9151,81 +9151,78 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
const llvm::Triple TheTriple = getToolChain().getTriple();
ArgStringList CmdArgs;
using namespace options;

// A list of permitted options that will be forwarded to the embedded device
// compilation job.
const llvm::DenseSet<unsigned> CompilerOptions{
OPT_v,
OPT_cuda_path_EQ,
OPT_rocm_path_EQ,
OPT_O_Group,
OPT_g_Group,
OPT_g_flags_Group,
OPT_R_value_Group,
OPT_R_Group,
OPT_Xcuda_ptxas,
OPT_ftime_report,
OPT_ftime_trace,
OPT_ftime_trace_EQ,
OPT_ftime_trace_granularity_EQ,
OPT_ftime_trace_verbose,
OPT_opt_record_file,
OPT_opt_record_format,
OPT_opt_record_passes,
OPT_fsave_optimization_record,
OPT_fsave_optimization_record_EQ,
OPT_fno_save_optimization_record,
OPT_foptimization_record_file_EQ,
OPT_foptimization_record_passes_EQ,
OPT_save_temps,
OPT_mcode_object_version_EQ,
OPT_load,
OPT_fno_lto,
OPT_flto,
OPT_flto_EQ};
const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm};
auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A) {
return Set.contains(A->getOption().getID()) ||
(A->getOption().getGroup().isValid() &&
Set.contains(A->getOption().getGroup().getID()));
};

// Pass the CUDA path to the linker wrapper tool.
ArgStringList CmdArgs;
for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
auto TCRange = C.getOffloadToolChains(Kind);
for (auto &I : llvm::make_range(TCRange)) {
const ToolChain *TC = I.second;
if (TC->getTriple().isNVPTX()) {
CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
if (CudaInstallation.isValid())
CmdArgs.push_back(Args.MakeArgString(
"--cuda-path=" + CudaInstallation.getInstallPath()));
break;

// We do not use a bound architecture here so options passed only to a
// specific architecture via -Xarch_<cpu> will not be forwarded.
ArgStringList CompilerArgs;
ArgStringList LinkerArgs;
for (Arg *A : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
if (ShouldForward(CompilerOptions, A))
A->render(Args, CompilerArgs);
else if (ShouldForward(LinkerOptions, A))
A->render(Args, LinkerArgs);
}
}
}

// Pass in the optimization level to use for LTO.
if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
StringRef OOpt;
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
OOpt = "3";
else if (A->getOption().matches(options::OPT_O)) {
OOpt = A->getValue();
if (OOpt == "g")
OOpt = "1";
else if (OOpt == "s" || OOpt == "z")
OOpt = "2";
} else if (A->getOption().matches(options::OPT_O0))
OOpt = "0";
if (!OOpt.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("--opt-level=O") + OOpt));
// Forward all of these to the appropriate toolchain.
for (StringRef Arg : CompilerArgs)
CmdArgs.push_back(Args.MakeArgString(
"--device-compiler=" + TC->getTripleString() + "=" + Arg));
for (StringRef Arg : LinkerArgs)
CmdArgs.push_back(Args.MakeArgString(
"--device-linker=" + TC->getTripleString() + "=" + Arg));
}
}

CmdArgs.push_back(
Args.MakeArgString("--host-triple=" + TheTriple.getTriple()));
Args.MakeArgString("--host-triple=" + getToolChain().getTripleString()));
if (Args.hasArg(options::OPT_v))
CmdArgs.push_back("--wrapper-verbose");

if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
if (!A->getOption().matches(options::OPT_g0))
CmdArgs.push_back("--device-debug");
}

// code-object-version=X needs to be passed to clang-linker-wrapper to ensure
// that it is used by lld.
if (const Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
CmdArgs.push_back(Args.MakeArgString("-mllvm"));
CmdArgs.push_back(Args.MakeArgString(
Twine("--amdhsa-code-object-version=") + A->getValue()));
}

for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
CmdArgs.push_back(Args.MakeArgString("--ptxas-arg=" + A));

// Forward remarks passes to the LLVM backend in the wrapper.
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ))
CmdArgs.push_back(Args.MakeArgString(Twine("--offload-opt=-pass-remarks=") +
A->getValue()));
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--offload-opt=-pass-remarks-missed=") + A->getValue()));
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--offload-opt=-pass-remarks-analysis=") + A->getValue()));

if (Args.getLastArg(options::OPT_ftime_report))
CmdArgs.push_back("--device-compiler=-ftime-report");

if (Args.getLastArg(options::OPT_save_temps_EQ))
CmdArgs.push_back("--save-temps");

// Construct the link job so we can wrap around it.
Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput);
const auto &LinkCommand = C.getJobs().getJobs().back();
Expand All @@ -9249,12 +9246,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_openmp_target_jit, false))
CmdArgs.push_back("--embed-bitcode");

// Forward `-mllvm` arguments to the LLVM invocations if present.
for (Arg *A : Args.filtered(options::OPT_mllvm)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(A->getValue());
A->claim();
}
// Save temporary files created by the linker wrapper.
if (Args.hasArg(options::OPT_save_temps))
CmdArgs.push_back("--save-temps");

// Pass in the C library for GPUs if present and not disabled.
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r, options::OPT_nogpulib,
Expand All @@ -9281,11 +9275,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
});
}

// If we disable the GPU C library support it needs to be forwarded to the
// link job.
if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, true))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont see these options in either of the new option sets, is this code not needed anymore or is it handled somewhere else already?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is leftover from an ill-conceived attempt at making the AMDGPU target link it automatically if it was present. It's useless now so I just deleted it.

CmdArgs.push_back("--device-compiler=-nolibc");

// Add the linker arguments to be forwarded by the wrapper.
CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") +
LinkCommand->getExecutable()));
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/amdgpu-openmp-sanitize-options.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@
// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-fsanitize=address".* "-x" "c".*}}
// GPUSAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
// GPUSAN: {{"[^"]*clang-linker-wrapper[^"]*" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
// GPUSAN: {{"[^"]*clang-linker-wrapper[^"]*".* "--host-triple=x86_64-unknown-linux-gnu".* "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}

// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "c".*}}
// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-x" "c".*}}
// NOGPUSAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
// NOGPUSAN: {{"[^"]*clang-linker-wrapper[^"]*" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
// NOGPUSAN: {{"[^"]*clang-linker-wrapper[^"]*".* "--host-triple=x86_64-unknown-linux-gnu".* "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
4 changes: 2 additions & 2 deletions clang/test/Driver/amdgpu-openmp-toolchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@

// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a \
// RUN: -O3 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-OPT
// CHECK-OPT: clang-linker-wrapper{{.*}}"--opt-level=O3"
// CHECK-OPT: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-O3"

// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-WARN-ATOMIC
// CHECK-WARN-ATOMIC-NOT: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-Werror=atomic-alignment"
Expand All @@ -84,4 +84,4 @@

// RUN: %clang -### -target x86_64-pc-linux-gnu -nogpulib -fopenmp --offload-arch=gfx90a \
// RUN: -ftime-report %s 2>&1 | FileCheck %s --check-prefix=CHECK-TIME-REPORT
// CHECK-TIME-REPORT: clang-linker-wrapper{{.*}}"--device-compiler=-ftime-report"
// CHECK-TIME-REPORT: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-ftime-report"
4 changes: 2 additions & 2 deletions clang/test/Driver/openmp-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,13 @@
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -g %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NEW-DRIVER-DEBUG %s

// CHK-NEW-DRIVER-DEBUG: clang-linker-wrapper{{.*}} "--device-debug"
// CHK-NEW-DRIVER-DEBUG: clang-linker-wrapper{{.*}} "--device-compiler=powerpc64le-ibm-linux-gnu=-g"

/// Check arguments to the linker wrapper
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu \
// RUN: -mllvm -abc %s 2>&1 | FileCheck -check-prefix=CHK-NEW-DRIVER-MLLVM %s

// CHK-NEW-DRIVER-MLLVM: clang-linker-wrapper{{.*}} "-abc"
// CHK-NEW-DRIVER-MLLVM: clang-linker-wrapper{{.*}} "--device-linker=powerpc64le-ibm-linux-gnu=-mllvm" "--device-linker=powerpc64le-ibm-linux-gnu=-abc"

//
// Ensure that we generate the correct bindings for '-fsyntax-only' for OpenMP.
Expand Down