Skip to content

[LinkerWrapper] Pass all files to the device linker #97573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions clang/test/Driver/linker-wrapper-libs.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ int bar() { return weak; }
// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES

// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o

//
Expand All @@ -72,7 +72,7 @@ int bar() { return weak; }
// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL

// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o

//
Expand All @@ -96,7 +96,7 @@ int bar() { return weak; }
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE

// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o

//
// Check that we do not extract an external weak symbol.
Expand Down Expand Up @@ -161,7 +161,7 @@ int bar() { return weak; }
// RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED

// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o
// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o

Expand All @@ -185,7 +185,7 @@ int bar() { return weak; }
// RUN: --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE

// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o
// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.s
// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o
// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o
4 changes: 2 additions & 2 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS

// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps
// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
Expand Down Expand Up @@ -147,7 +147,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND

// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.bc
// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
Expand Down
98 changes: 60 additions & 38 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,13 @@ Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
return *Path;
}

/// We will defer LTO to the target's linker if we are not doing JIT and it is
/// supported by the toolchain.
bool linkerSupportsLTO(const ArgList &Args) {
llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
return Triple.isNVPTX() || Triple.isAMDGPU();
}

/// Returns the hashed value for a constant string.
std::string getHash(StringRef Str) {
llvm::MD5 Hasher;
Expand Down Expand Up @@ -555,18 +562,23 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
llvm::copy(LinkerArgs, std::back_inserter(CmdArgs));
}

// Pass on -mllvm options to the clang invocation.
for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Arg->getValue());
}
// Pass on -mllvm options to the linker invocation.
for (const opt::Arg *Arg : Args.filtered(OPT_mllvm))
CmdArgs.push_back(
Args.MakeArgString("-Wl,-mllvm=" + StringRef(Arg->getValue())));

if (Args.hasArg(OPT_debug))
CmdArgs.push_back("-g");

if (SaveTemps)
CmdArgs.push_back("-save-temps");

if (SaveTemps && linkerSupportsLTO(Args))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is -save-temps propagated from the top-level compilation?
If so, how do we handle --save-temps=obj -o /some/other/dir/foo.o ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this logic we only have the link job remaining, so we pass -Wl,--save-temps to it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My question is the origin of that option. E.g. is the user uses --save-temps=obj -o /some/other/dir/foo.o at the top-level clang invocation, what do we get if the offloading w/ the new driver and this patch are in effect?

Will the intermediate files end up in the current directory? In the object file directory? Both? E.g clang's files end up in the object file directory, but linker's intermediate files end up in the current dir? Or, perhaps not even created, if clangs--save-temps are not propagated to the linker-wrapper.

CmdArgs.push_back("-Wl,--save-temps");

if (Args.hasArg(OPT_embed_bitcode))
CmdArgs.push_back("-Wl,--lto-emit-llvm");

if (Verbose)
CmdArgs.push_back("-v");

Expand All @@ -587,8 +599,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
Args.MakeArgString(Arg.split('=').second)});
}

// The OpenMPOpt pass can introduce new calls and is expensive, we do not want
// this when running CodeGen through clang.
// The OpenMPOpt pass can introduce new calls and is expensive, we do
// not want this when running CodeGen through clang.
if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ))
CmdArgs.append({"-mllvm", "-openmp-opt-disable"});

Expand Down Expand Up @@ -772,8 +784,9 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);

// Search for bitcode files in the input and create an LTO input file. If it
// is not a bitcode file, scan its symbol table for symbols we need to save.
// Search for bitcode files in the input and create an LTO input file. If
// it is not a bitcode file, scan its symbol table for symbols we need to
// save.
for (OffloadFile &File : InputFiles) {
MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), "");

Expand Down Expand Up @@ -807,7 +820,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
if (!Name)
return Name.takeError();

// Record if we've seen these symbols in any object or shared libraries.
// Record if we've seen these symbols in any object or shared
// libraries.
if ((*ObjFile)->isRelocatableObject())
UsedInRegularObj.insert(Saver.save(*Name));
else
Expand Down Expand Up @@ -844,17 +858,18 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
return false;
};

// We assume visibility of the whole program if every input file was bitcode.
// We assume visibility of the whole program if every input file was
// bitcode.
auto Features = getTargetFeatures(BitcodeInputFiles);
auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
Args.hasArg(OPT_builtin_bitcode_EQ) ||
Args.hasArg(OPT_clang_backend)
? createLTO(Args, Features, OutputBitcode)
: createLTO(Args, Features);

// We need to resolve the symbols so the LTO backend knows which symbols need
// to be kept or can be internalized. This is a simplified symbol resolution
// scheme to approximate the full resolution a linker would do.
// We need to resolve the symbols so the LTO backend knows which symbols
// need to be kept or can be internalized. This is a simplified symbol
// resolution scheme to approximate the full resolution a linker would do.
uint64_t Idx = 0;
DenseSet<StringRef> PrevailingSymbols;
for (auto &BitcodeInput : BitcodeInputFiles) {
Expand Down Expand Up @@ -886,7 +901,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
// We need LTO to preseve the following global symbols:
// 1) Symbols used in regular objects.
// 2) Sections that will be given a __start/__stop symbol.
// 3) Prevailing symbols that are needed visible to external libraries.
// 3) Prevailing symbols that are needed visible to external
// libraries.
Res.VisibleToRegularObj =
UsedInRegularObj.contains(Sym.getName()) ||
isValidCIdentifier(Sym.getSectionName()) ||
Expand All @@ -901,9 +917,9 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
(UsedInSharedLib.contains(Sym.getName()) ||
!Sym.canBeOmittedFromSymbolTable());

// The final definition will reside in this linkage unit if the symbol is
// defined and local to the module. This only checks for bitcode files,
// full assertion will require complete symbol resolution.
// The final definition will reside in this linkage unit if the symbol
// is defined and local to the module. This only checks for bitcode
// files, full assertion will require complete symbol resolution.
Res.FinalDefinitionInLinkageUnit =
Sym.getVisibility() != GlobalValue::DefaultVisibility &&
(!Sym.isUndefined() && !Sym.isCommon());
Expand Down Expand Up @@ -956,8 +972,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
return Error::success();
}

// Append the new inputs to the device linker input. If the user requested an
// internalizing link we need to pass the bitcode to clang.
// Append the new inputs to the device linker input. If the user requested
// an internalizing link we need to pass the bitcode to clang.
for (StringRef File :
Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)
? BitcodeOutput
Expand All @@ -972,10 +988,9 @@ Expected<StringRef> writeOffloadFile(const OffloadFile &File) {

StringRef Prefix =
sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier());
StringRef Suffix = getImageKindName(Binary.getImageKind());

auto TempFileOrErr = createOutputFile(
Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Suffix);
Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), "o");
if (!TempFileOrErr)
return TempFileOrErr.takeError();

Expand Down Expand Up @@ -1188,8 +1203,8 @@ DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));

// If every input file is bitcode we have whole program visibility as we do
// only support static linking with bitcode.
// If every input file is bitcode we have whole program visibility as we
// do only support static linking with bitcode.
auto ContainsBitcode = [](const OffloadFile &F) {
return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode;
};
Expand Down Expand Up @@ -1277,12 +1292,15 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
if (File.getBinary()->getOffloadKind() != OFK_None)
ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());

// First link and remove all the input files containing bitcode.
// First link and remove all the input files containing bitcode if
// the target linker does not support it natively.
SmallVector<StringRef> InputFiles;
if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
return Err;
if (!linkerSupportsLTO(LinkerArgs))
if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
return Err;

// Write any remaining device inputs to an output file for the linker.
// Write any remaining device inputs to an output file for the
// linker.
for (const OffloadFile &File : Input) {
auto FileNameOrErr = writeOffloadFile(File);
if (!FileNameOrErr)
Expand All @@ -1291,9 +1309,10 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
}

// Link the remaining device files using the device linker.
auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode)
? linkDevice(InputFiles, LinkerArgs)
: InputFiles.front();
auto OutputOrErr =
!Args.hasArg(OPT_embed_bitcode) || linkerSupportsLTO(LinkerArgs)
? linkDevice(InputFiles, LinkerArgs)
: InputFiles.front();
if (!OutputOrErr)
return OutputOrErr.takeError();

Expand Down Expand Up @@ -1420,12 +1439,14 @@ Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
bool NewSymbol = Syms.count(Sym.getName()) == 0;
auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];

// We will extract if it defines a currenlty undefined non-weak symbol.
// We will extract if it defines a currenlty undefined non-weak
// symbol.
bool ResolvesStrongReference =
((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
!Sym.isUndefined());
// We will extract if it defines a new global symbol visible to the host.
// This is only necessary for code targeting an offloading language.
// We will extract if it defines a new global symbol visible to the
// host. This is only necessary for code targeting an offloading
// language.
bool NewGlobalSymbol =
((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
!Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
Expand Down Expand Up @@ -1480,8 +1501,9 @@ Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
!(OldSym & Sym_Weak) &&
!(*FlagsOrErr & SymbolRef::SF_Undefined);

// We will extract if it defines a new global symbol visible to the host.
// This is only necessary for code targeting an offloading language.
// We will extract if it defines a new global symbol visible to the
// host. This is only necessary for code targeting an offloading
// language.
bool NewGlobalSymbol =
((NewSymbol || (OldSym & Sym_Undefined)) &&
!(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
Expand Down Expand Up @@ -1648,8 +1670,8 @@ getDeviceInput(const ArgList &Args) {

Expected<bool> ExtractOrErr =
getSymbols(Binary.getBinary()->getImage(),
Binary.getBinary()->getOffloadKind(), /*IsArchive=*/true,
Saver, Syms[ID]);
Binary.getBinary()->getOffloadKind(),
/*IsArchive=*/true, Saver, Syms[ID]);
if (!ExtractOrErr)
return ExtractOrErr.takeError();

Expand Down
Loading