Skip to content

Commit b37be0e

Browse files
jhuber6sarnex
andauthored
[Offload] Handle BoundArchitecture for non-GPU targets (#132037)
Summary: Offloading tends to have a bound architecture that directly correponds to the `-mcpu` argument for that embedded job. This is currently handled by the GPU offloading toolchains, but is ignored by the CPU ones. This is problematic for languages like SYCL or OpenMP which permit 'offloading' to non-GPU targets. This patch handles this by putting generic handling in the GCC toolchain for those languages. I would've made this fully generic but it regressed some HIP sanitizer tests because their use-case is really weird. This also only goes for the languages that inherit from 'generic_gcc`. I could've made it in the base class, but I felt like it wasn't necessary as we only support offloading based off of this toolchain. In the future if we need it we can move it around. --------- Co-authored-by: Nick Sarnie <[email protected]>
1 parent 6b9716b commit b37be0e

File tree

2 files changed

+44
-33
lines changed

2 files changed

+44
-33
lines changed

clang/lib/Driver/ToolChains/Gnu.cpp

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3388,46 +3388,44 @@ Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
33883388
}
33893389

33903390
llvm::opt::DerivedArgList *
3391-
Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef,
3391+
Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args,
3392+
StringRef BoundArch,
33923393
Action::OffloadKind DeviceOffloadKind) const {
3394+
if (DeviceOffloadKind != Action::OFK_SYCL &&
3395+
DeviceOffloadKind != Action::OFK_OpenMP)
3396+
return nullptr;
33933397

3394-
// If this tool chain is used for an OpenMP offloading device we have to make
3395-
// sure we always generate a shared library regardless of the commands the
3396-
// user passed to the host. This is required because the runtime library
3397-
// is required to load the device image dynamically at run time.
3398+
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
3399+
3400+
// Filter all the arguments we don't care passing to the offloading
3401+
// toolchain as they can mess up with the creation of a shared library.
3402+
const llvm::DenseSet<unsigned> OpenMPFiltered{
3403+
options::OPT_shared, options::OPT_dynamic, options::OPT_static,
3404+
options::OPT_fPIE, options::OPT_fno_PIE, options::OPT_fpie,
3405+
options::OPT_fno_pie};
3406+
for (auto *A : Args)
3407+
if (DeviceOffloadKind != Action::OFK_OpenMP ||
3408+
!OpenMPFiltered.contains(A->getOption().getID()))
3409+
DAL->append(A);
3410+
3411+
// Request a shared library for CPU offloading. Given that these options
3412+
// are decided implicitly, they do not refer to any base argument.
3413+
const OptTable &Opts = getDriver().getOpts();
33983414
if (DeviceOffloadKind == Action::OFK_OpenMP) {
3399-
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
3400-
const OptTable &Opts = getDriver().getOpts();
3401-
3402-
// Request the shared library. Given that these options are decided
3403-
// implicitly, they do not refer to any base argument.
34043415
DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_shared));
34053416
DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_fPIC));
3417+
}
34063418

3407-
// Filter all the arguments we don't care passing to the offloading
3408-
// toolchain as they can mess up with the creation of a shared library.
3409-
for (auto *A : Args) {
3410-
switch ((options::ID)A->getOption().getID()) {
3411-
default:
3412-
DAL->append(A);
3413-
break;
3414-
case options::OPT_shared:
3415-
case options::OPT_dynamic:
3416-
case options::OPT_static:
3417-
case options::OPT_fPIC:
3418-
case options::OPT_fno_PIC:
3419-
case options::OPT_fpic:
3420-
case options::OPT_fno_pic:
3421-
case options::OPT_fPIE:
3422-
case options::OPT_fno_PIE:
3423-
case options::OPT_fpie:
3424-
case options::OPT_fno_pie:
3425-
break;
3426-
}
3427-
}
3428-
return DAL;
3419+
// Add the bound architecture to the arguments list if present.
3420+
if (!BoundArch.empty()) {
3421+
options::ID Opt =
3422+
getTriple().isARM() || getTriple().isPPC() || getTriple().isAArch64()
3423+
? options::OPT_mcpu_EQ
3424+
: options::OPT_march_EQ;
3425+
DAL->eraseArg(Opt);
3426+
DAL->AddJoinedArg(nullptr, Opts.getOption(Opt), BoundArch);
34293427
}
3430-
return nullptr;
3428+
return DAL;
34313429
}
34323430

34333431
void Generic_ELF::anchor() {}

clang/test/Driver/openmp-offload.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,16 @@
218218
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu \
219219
// RUN: -foffload-lto=thin %s 2>&1 | FileCheck -check-prefix=CHK-DEVICE-LTO-THIN %s
220220
// CHK-DEVICE-LTO-THIN: clang-linker-wrapper{{.*}} "--device-compiler=powerpc64le-ibm-linux-gnu=-flto=thin"
221+
222+
//
223+
// Check forwarding architectures to non-GPU targets
224+
//
225+
// RUN: %clang -### --target=aarch64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=aarch64-unknown-linux-gnu \
226+
// RUN: --offload-arch=a64fx %s 2>&1 | FileCheck -check-prefix=CHK-CPU-ARCH-A %s
227+
// CHK-CPU-ARCH-A: "-cc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-target-cpu" "generic"
228+
// CHK-CPU-ARCH-A: "-cc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-target-cpu" "a64fx"
229+
//
230+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=x86_64-unknown-linux-gnu \
231+
// RUN: --offload-arch=znver4 %s 2>&1 | FileCheck -check-prefix=CHK-CPU-ARCH-X %s
232+
// CHK-CPU-ARCH-X: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-target-cpu" "x86-64"
233+
// CHK-CPU-ARCH-X: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-target-cpu" "znver4"

0 commit comments

Comments
 (0)