Skip to content

Link devicelib during device code compilation #15114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ def err_drv_cuda_host_arch : Error<
def err_drv_no_sycl_libspirv : Error<
"cannot find '%0'; provide path to libspirv library via '-fsycl-libspirv-path', or "
"pass '-fno-sycl-libspirv' to build without linking with libspirv">;
def err_drv_no_sycl_libdevice : Error<
"cannot find '%0'"
"pass '-fno-sycl-libdevice' to build without linking with sycl libdevice">;
def warn_flag_no_sycl_libspirv
: Warning<"'-fno-sycl-libspirv' should not be used with target '%0'; "
"libspirv is required for correct behavior">,
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6881,6 +6881,8 @@ def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">,
HelpText<"Path to libspirv library">;
def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">,
HelpText<"Disable check for libspirv">;
def fno_sycl_libdevice : Flag<["-"], "fno-sycl-libdevice">,
HelpText<"Disable check for sycl libdevice">;
def fsycl_use_spirv_backend_for_spirv_gen : Flag<["-"], "fsycl-use-spirv-backend-for-spirv-gen">,
Flags<[HelpHidden]>,
HelpText<"Use the SPIR-V backend for SPIR-V code generation. "
Expand Down
33 changes: 7 additions & 26 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5533,7 +5533,7 @@ class OffloadingActionBuilder final {
// AOT compilation.
bool SYCLDeviceLibLinked = false;
Action *NativeCPULib = nullptr;
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
if (IsSPIR || IsSYCLNativeCPU) {
bool UseJitLink =
IsSPIR &&
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
Expand Down Expand Up @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final {
++NumOfDeviceLibLinked;
Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibName));
if (TC->getTriple().isNVPTX() ||
(TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga)) {
if (TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga) {
auto *SYCLDeviceLibsInputAction =
C.MakeAction<InputAction>(*InputArg, types::TY_Object);
auto *SYCLDeviceLibsUnbundleAction =
Expand Down Expand Up @@ -5892,10 +5891,9 @@ class OffloadingActionBuilder final {
}
}

// For NVPTX backend we need to also link libclc and CUDA libdevice
// at the same stage that we link all of the unbundled SYCL libdevice
// objects together.
if ((TC->getTriple().isNVPTX() || isNativeCPU) && NumOfDeviceLibLinked) {
// For Natice CPU backend we need to also link libclc at the same stage
// that we link all of the unbundled SYCL libdevice objects together.
if (isNativeCPU && NumOfDeviceLibLinked) {
std::string LibSpirvFile;
if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) {
auto ProvidedPath =
Expand Down Expand Up @@ -5950,23 +5948,6 @@ class OffloadingActionBuilder final {
// return here to not generate cuda actions
return NumOfDeviceLibLinked != 0;
}

const toolchains::CudaToolChain *CudaTC =
static_cast<const toolchains::CudaToolChain *>(TC);
for (const auto &LinkInputEnum : enumerate(DeviceLinkerInputs)) {
const char *BoundArch =
SYCLTargetInfoList[LinkInputEnum.index()].BoundArch;
std::string LibDeviceFile =
CudaTC->CudaInstallation.getLibDeviceFile(BoundArch);
if (!LibDeviceFile.empty()) {
Arg *CudaDeviceLibInputArg =
MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibDeviceFile));
auto *SYCLDeviceLibInputAction = C.MakeAction<InputAction>(
*CudaDeviceLibInputArg, types::TY_LLVM_BC);
DeviceLinkObjects.push_back(SYCLDeviceLibInputAction);
}
}
}
return NumOfDeviceLibLinked != 0;
}
Expand Down
38 changes: 38 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,9 @@ static const char *getLibSpirvTargetName(const ToolChain &HostTC) {
return "remangled-l64-signed_char.libspirv-nvptx64-nvidia-cuda.bc";
}

// Select DeviceLib file.
static const char *getDeviceLibTargetName() { return "devicelib--cuda.bc"; }

void NVPTXToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
Expand Down Expand Up @@ -1033,6 +1036,41 @@ void CudaToolChain::addClangTargetOptions(
CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
}

auto NoDeviceLib = DriverArgs.hasArg(options::OPT_fno_sycl_libdevice) ||
getDriver().offloadDeviceOnly();

if (DeviceOffloadingKind == Action::OFK_SYCL && !NoDeviceLib) {
std::string DeviceLibFile;

SmallVector<StringRef, 8> LibraryPaths;

// Expected path w/out install.
SmallString<256> WithoutInstallPath(getDriver().ResourceDir);
llvm::sys::path::append(WithoutInstallPath, Twine("../../"));
LibraryPaths.emplace_back(WithoutInstallPath.c_str());

// Select remangled libclc variant
std::string DeviceLibTargetName = getDeviceLibTargetName();

for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> DeviceLibTargetFile(LibraryPath);
llvm::sys::path::append(DeviceLibTargetFile, DeviceLibTargetName);
if (llvm::sys::fs::exists(DeviceLibTargetFile) ||
DriverArgs.hasArg(options::OPT__HASH_HASH_HASH)) {
DeviceLibFile = std::string(DeviceLibTargetFile.str());
break;
}
}

if (DeviceLibFile.empty()) {
getDriver().Diag(diag::err_drv_no_sycl_libdevice)
<< getDeviceLibTargetName();
return;
}

CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(DeviceLibFile));
}
if (DriverArgs.hasArg(options::OPT_nogpulib))
return;

Expand Down
30 changes: 22 additions & 8 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
SmallVector<std::string, 8> LibraryList;
const llvm::opt::ArgList &Args = C.getArgs();

// For NVPTX we only use one single bitcode library and ignore
// manually specified SYCL device libraries.
bool IgnoreSingleLibs = TargetTriple.isNVPTX();

struct DeviceLibOptInfo {
StringRef DeviceLibName;
StringRef DeviceLibOption;
Expand All @@ -233,10 +237,13 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ))
NoDeviceLibs = true;

bool PrintUnusedLibWarning = false;
for (StringRef Val : A->getValues()) {
if (Val == "all") {
for (const auto &K : DeviceLibLinkInfo.keys())
DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal");
DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) ||
(K == "internal" && NoDeviceLibs);
PrintUnusedLibWarning = false;
break;
}
auto LinkInfoIter = DeviceLibLinkInfo.find(Val);
Expand All @@ -247,10 +254,18 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
}
DeviceLibLinkInfo[Val] = true && !NoDeviceLibs;
DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs;
PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs;
}
if (PrintUnusedLibWarning)
C.getDriver().Diag(diag::warn_ignored_clang_option)
<< A->getSpelling() << A->getAsString(Args);
}
}

if (IgnoreSingleLibs && !NoDeviceLibs)
return LibraryList;

using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;

const SYCLDeviceLibsList SYCLDeviceWrapperLibs = {
Expand Down Expand Up @@ -304,10 +319,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver();
StringRef LibSuffix = ".bc";
if (TargetTriple.isNVPTX() ||
(TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga))
// For NVidia or FPGA, we are unbundling objects.
if (TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)
// For FPGA, we are unbundling objects.
LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o";
if (IsNewOffload)
// For new offload model, we use packaged .bc files.
Expand All @@ -323,7 +337,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
};

addLibraries(SYCLDeviceWrapperLibs);
if (IsSpirvAOT || TargetTriple.isNVPTX())
if (IsSpirvAOT)
addLibraries(SYCLDeviceFallbackLibs);

bool NativeBfloatLibs;
Expand Down Expand Up @@ -551,7 +565,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
this->getToolChain().getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga;
StringRef LibPostfix = ".bc";
if (IsNVPTX || IsFPGA) {
if (IsFPGA) {
LibPostfix = ".o";
if (HostTC->getTriple().isWindowsMSVCEnvironment() &&
C.getDriver().IsCLMode())
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// intrinsics. This allows the driver to link in the libdevice definitions for
// cosf etc. later in the driver flow.

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"
Expand Down
76 changes: 32 additions & 44 deletions clang/test/Driver/sycl-offload-nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,22 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object
// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object
// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object
// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl)
//
/// Check phases specifying a compute capability.
// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
Expand All @@ -97,28 +91,22 @@
// CHK-PHASES: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object
// CHK-PHASES: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object
// CHK-PHASES: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object
// CHK-PHASES: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35)
// CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35)
// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35)
// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35)
// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35)
// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object
// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35)
// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35)
// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35)
// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35)
// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35)
// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object
// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl)

/// Check calling preprocessor only
// RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
Expand Down
2 changes: 1 addition & 1 deletion libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL )
foreach( file subnormal_use_default subnormal_disable )
link_bc(
TARGET ${file}
RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR}
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll
)
install( FILES $<TARGET_PROPERTY:${file},TARGET_FILE> ARCHIVE
Expand Down Expand Up @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Enable SPIR-V builtin function declarations, so they don't
# have to be explicity declared in the soruce.
list( APPEND flags -Xclang -fdeclare-spirv-builtins)

set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} )

Expand Down
Loading
Loading