Skip to content

[Clang] Make -Xarch_ handling generic for all toolchains #125421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group<W_Group>,
def Xanalyzer : Separate<["-"], "Xanalyzer">,
HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
Group<StaticAnalyzer_Group>;
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>;
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>,
HelpText<"Pass <arg> to the compiliation if the target matches <arch>">,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do need a better documentation. It's not obvious that could be a GPU name or an arch name from the target triple.

MetaVarName<"<arch> <arg>">;
def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>,
HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>,
Expand Down Expand Up @@ -1115,8 +1117,8 @@ def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">,

// Common offloading options
let Group = offload_Group in {
def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>,
Visibility<[ClangOption, FlangOption]>,
def offload_arch_EQ : Joined<["--"], "offload-arch=">,
Visibility<[ClangOption, FlangOption]>, Flags<[NoXarchOption]>,
HelpText<"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). "
"If 'native' is used the compiler will detect locally installed architectures. "
"For HIP offloading, the device architecture can be followed by target ID features "
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3409,7 +3409,9 @@ class OffloadingActionBuilder final {
// Collect all offload arch parameters, removing duplicates.
std::set<StringRef> GpuArchs;
bool Error = false;
for (Arg *A : Args) {
const ToolChain &TC = *ToolChains.front();
for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"",
AssociatedOffloadKind)) {
if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
A->getOption().matches(options::OPT_no_offload_arch_EQ)))
continue;
Expand All @@ -3420,7 +3422,6 @@ class OffloadingActionBuilder final {
ArchStr == "all") {
GpuArchs.clear();
} else if (ArchStr == "native") {
const ToolChain &TC = *ToolChains.front();
auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args);
if (!GPUsOrErr) {
TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
Expand Down
42 changes: 32 additions & 10 deletions clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,8 @@ void ToolChain::TranslateXarchArgs(
A->getOption().matches(options::OPT_Xarch_host))
ValuePos = 0;

unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
const InputArgList &BaseArgs = Args.getBaseArgs();
unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos));
unsigned Prev = Index;
std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));

Expand All @@ -1672,8 +1673,31 @@ void ToolChain::TranslateXarchArgs(
Diags.Report(DiagID) << A->getAsString(Args);
return;
}

XarchArg->setBaseArg(A);
A = XarchArg.release();

// Linker input arguments require custom handling. The problem is that we
// have already constructed the phase actions, so we can not treat them as
// "input arguments".
if (A->getOption().hasFlag(options::LinkerInput)) {
// Convert the argument into individual Zlinker_input_args. Need to do this
// manually to avoid memory leaks with the allocated arguments.
for (const char *Value : A->getValues()) {
auto Opt = Opts.getOption(options::OPT_Zlinker_input);
unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
auto NewArg =
new Arg(Opt, BaseArgs.MakeArgString(Opt.getPrefix() + Opt.getName()),
Index, BaseArgs.getArgString(Index + 1), A);

DAL->append(NewArg);
if (!AllocatedArgs)
DAL->AddSynthesizedArg(NewArg);
else
AllocatedArgs->push_back(NewArg);
}
}

if (!AllocatedArgs)
DAL->AddSynthesizedArg(A);
else
Expand All @@ -1697,19 +1721,17 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
} else if (A->getOption().matches(options::OPT_Xarch_host)) {
NeedTrans = !IsDevice;
Skip = IsDevice;
} else if (A->getOption().matches(options::OPT_Xarch__) && IsDevice) {
// Do not translate -Xarch_ options for non CUDA/HIP toolchain since
// they may need special translation.
// Skip this argument unless the architecture matches BoundArch
if (BoundArch.empty() || A->getValue(0) != BoundArch)
Skip = true;
else
NeedTrans = true;
} else if (A->getOption().matches(options::OPT_Xarch__)) {
NeedTrans = A->getValue() == getArchName() ||
(!BoundArch.empty() && A->getValue() == BoundArch);
Skip = !NeedTrans;
}
if (NeedTrans || Skip)
Modified = true;
if (NeedTrans)
if (NeedTrans) {
A->claim();
TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
}
if (!Skip)
DAL->append(A);
}
Expand Down
24 changes: 0 additions & 24 deletions clang/lib/Driver/ToolChains/Darwin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2777,30 +2777,6 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
// and try to push it down into tool specific logic.

for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches either the toolchain
// triple arch, or the arch being bound.
StringRef XarchArch = A->getValue(0);
if (!(XarchArch == getArchName() ||
(!BoundArch.empty() && XarchArch == BoundArch)))
continue;

Arg *OriginalArg = A;
TranslateXarchArgs(Args, A, DAL);

// Linker input arguments require custom handling. The problem is that we
// have already constructed the phase actions, so we can not treat them as
// "input arguments".
if (A->getOption().hasFlag(options::LinkerInput)) {
// Convert the argument into individual Zlinker_input_args.
for (const char *Value : A->getValues()) {
DAL->AddSeparateArg(
OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
}
continue;
}
}

// Sob. These is strictly gcc compatible for the time being. Apple
// gcc translates options twice, which means that self-expanding
// options add duplicates.
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Driver/Xarch.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
// RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -target x86_64-unknown-windows-msvc -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -target aarch64-unknown-linux-gnu -Xarch_aarch64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -target powerpc64le-unknown-linux-gnu -Xarch_powerpc64le -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// O3ONCE: "-O3"
// O3ONCE-NOT: "-O3"

// RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s
// RUN: %clang -target x86_64-unknown-linux-gnu -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s
// O3NONE-NOT: "-O3"
// O3NONE: argument unused during compilation: '-Xarch_i386 -O3'

// RUN: not %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -o -Xarch_i386 -S %s -S -Xarch_i386 -o 2>&1 | FileCheck -check-prefix=INVALID %s
// INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
// INVALID: error: invalid Xarch argument: '-Xarch_i386 -S'
// INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'

// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -Wl,foo %s -### 2>&1 | FileCheck -check-prefix=LINKER %s
// LINKER: "foo"
34 changes: 34 additions & 0 deletions clang/test/Driver/offload-Xarch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: %clang -x cuda %s -Xarch_nvptx64 -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -x cuda %s -Xarch_device -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -x hip %s -Xarch_amdgcn -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc \
// RUN: -Xarch_amdgcn -march=gfx90a -Xarch_amdgcn -O3 -S -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=O3ONCE %s
// RUN: %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
// RUN: -Xarch_nvptx64 -march=sm_52 -Xarch_nvptx64 -O3 -S -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=O3ONCE %s
// O3ONCE: "-O3"
// O3ONCE-NOT: "-O3"
Comment on lines +10 to +11
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I'd also add the check that -O3 is present in a compilation with fcuda-is-device. Otherwise the tests would succeed somewhere else.


// RUN: %clang -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -nogpulib \
// RUN: --target=x86_64-unknown-linux-gnu -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60 -nogpuinc \
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx90a,gfx1030 -ccc-print-bindings -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=OPENMP %s
//
// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX1030_BC:.+]]"
// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX90A_BC:.+]]"
// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM52_PTX:.+]]"
// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM52_PTX]]"], output: "[[SM52_CUBIN:.+]]"
// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM60_PTX:.+]]"
// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM60_PTX]]"], output: "[[SM60_CUBIN:.+]]"
// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[GFX1030_BC]]", "[[GFX90A_BC]]", "[[SM52_CUBIN]]", "[[SM60_CUBIN]]"], output: "[[BINARY:.+]]"
// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"

// RUN: %clang -x cuda %s --offload-arch=sm_52,sm_60 -Xarch_sm_52 -O3 -Xarch_sm_60 -O0 \
// RUN: --target=x86_64-unknown-linux-gnu -Xarch_host -O3 -S -nogpulib -nogpuinc -### 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA %s
// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52" {{.*}}"-O3"
// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_60" {{.*}}"-O0"
// CUDA: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}}"-O3"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Using -D<something readable> as an argument to pass may let you unambiguously mark each subcompilation, and avoid triggering that -O1/-O2 problem.