Skip to content

[LinkerWrapper] Extract device archives if symbol is used on the host #111921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions clang/test/Driver/linker-wrapper-libs.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,30 +51,6 @@ int bar() { return weak; }
// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o

//
// Check that we extract a static library that defines a global visibile to the
// host.
//
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
// RUN: clang-offload-packager -o %t-lib.out \
// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
// RUN: llvm-ar rcs %t.a %t.o
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL

// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o

//
// Check that we do not extract a global symbol if the source file was not
// created by an offloading language that expects there to be a host version of
Expand Down Expand Up @@ -142,29 +118,6 @@ int bar() { return weak; }
// LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o
// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030

//
// Check that we do not extract a static library that defines a global visibile
// to the host that is already defined.
//
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
// RUN: clang-offload-packager -o %t-lib.out \
// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
// RUN: llvm-ar rcs %t.a %t.o
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED

// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
// LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o
// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o

//
// Check that we can use --[no-]whole-archive to control extraction.
//
Expand Down
7 changes: 2 additions & 5 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

// REQUIRES: system-linux

// An externally visible variable so static libraries extract.
__attribute__((visibility("protected"), used)) int x;

// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc
Expand Down Expand Up @@ -169,7 +166,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack-
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-off.o -fembed-offload-object=%t-off.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o --whole-archive %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID

// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
Expand All @@ -185,7 +182,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t2.o -fembed-offload-object=%t2.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o --whole-archive %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL

// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
Expand Down
123 changes: 105 additions & 18 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1479,14 +1479,7 @@ Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
bool ResolvesStrongReference =
((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
!Sym.isUndefined());
// We will extract if it defines a new global symbol visible to the
// host. This is only necessary for code targeting an offloading
// language.
bool NewGlobalSymbol =
((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
!Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
(Sym.getVisibility() != GlobalValue::HiddenVisibility));
ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
ShouldExtract |= ResolvesStrongReference;

// Update this symbol in the "table" with the new information.
if (OldSym & Sym_Undefined && !Sym.isUndefined())
Expand Down Expand Up @@ -1535,15 +1528,7 @@ Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
!(OldSym & Sym_Weak) &&
!(*FlagsOrErr & SymbolRef::SF_Undefined);

// We will extract if it defines a new global symbol visible to the
// host. This is only necessary for code targeting an offloading
// language.
bool NewGlobalSymbol =
((NewSymbol || (OldSym & Sym_Undefined)) &&
!(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
!(*FlagsOrErr & SymbolRef::SF_Hidden));
ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
ShouldExtract |= ResolvesStrongReference;

// Update this symbol in the "table" with the new information.
if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
Expand Down Expand Up @@ -1588,10 +1573,101 @@ Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
}
}

Error getNeededDeviceGlobalsFromBitcode(MemoryBufferRef Buffer,
SmallVectorImpl<std::string> &Symbols) {

LLVMContext Context;
SMDiagnostic Err;
std::unique_ptr<Module> M = getLazyIRModule(
MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false), Err,
Context);
if (!M)
return createStringError(inconvertibleErrorCode(),
"Failed to create module");

// Extract offloading data from globals referenced by the
// `llvm.embedded.object` metadata with the `.llvm.offloading` section.
auto *MD = M->getNamedMetadata("llvm.offloading.symbols");
if (!MD)
return Error::success();

for (const MDNode *Op : MD->operands()) {
GlobalVariable *GV =
mdconst::dyn_extract_or_null<GlobalVariable>(Op->getOperand(0));
if (!GV)
continue;

auto *CDS = dyn_cast<ConstantDataSequential>(GV->getInitializer());
if (!CDS)
continue;

Symbols.emplace_back(CDS->getAsCString().str());
}

return Error::success();
}

Error getNeededDeviceGlobalsFromObject(const ObjectFile &Obj,
SmallVectorImpl<std::string> &Symbols) {
for (const auto &Sec : Obj.sections()) {
auto NameOrErr = Sec.getName();
if (!NameOrErr)
return NameOrErr.takeError();

if (*NameOrErr != ".llvm.rodata.offloading")
continue;

auto ContentsOrErr = Sec.getContents();
if (!ContentsOrErr)
return ContentsOrErr.takeError();
llvm::transform(llvm::split(ContentsOrErr->drop_back(), '\0'),
std::back_inserter(Symbols),
[](StringRef Str) { return Str.str(); });
}
return Error::success();
}

Error getNeededDeviceGlobals(MemoryBufferRef Buffer,
SmallVectorImpl<std::string> &Symbols) {
file_magic Type = identify_magic(Buffer.getBuffer());
switch (Type) {
case file_magic::bitcode:
return getNeededDeviceGlobalsFromBitcode(Buffer, Symbols);
case file_magic::elf_relocatable:
case file_magic::coff_object: {
Expected<std::unique_ptr<ObjectFile>> ObjFile =
ObjectFile::createObjectFile(Buffer, Type);
if (!ObjFile)
return ObjFile.takeError();
return getNeededDeviceGlobalsFromObject(*ObjFile->get(), Symbols);
}
case file_magic::archive: {
Expected<std::unique_ptr<llvm::object::Archive>> LibFile =
object::Archive::create(Buffer);
if (!LibFile)
return LibFile.takeError();
Error Err = Error::success();
for (auto Child : (*LibFile)->children(Err)) {
auto ChildBufferOrErr = Child.getMemoryBufferRef();
if (!ChildBufferOrErr)
return ChildBufferOrErr.takeError();
if (Error Err = getNeededDeviceGlobals(*ChildBufferOrErr, Symbols))
return Err;
}
if (Err)
return Err;
return Error::success();
}
default:
return Error::success();
}
}

/// Search the input files and libraries for embedded device offloading code
/// and add it to the list of files to be linked. Files coming from static
/// libraries are only added to the input if they are used by an existing
/// input file. Returns a list of input files intended for a single linking job.
/// input file. Returns a list of input files intended for a single linking
/// job.
Expected<SmallVector<SmallVector<OffloadFile>>>
getDeviceInput(const ArgList &Args) {
llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
Expand All @@ -1612,6 +1688,7 @@ getDeviceInput(const ArgList &Args) {
bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false;
SmallVector<OffloadFile> ObjectFilesToExtract;
SmallVector<OffloadFile> ArchiveFilesToExtract;
SmallVector<std::string> NeededSymbols;
for (const opt::Arg *Arg : Args.filtered(
OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
if (Arg->getOption().matches(OPT_whole_archive) ||
Expand Down Expand Up @@ -1642,6 +1719,9 @@ getDeviceInput(const ArgList &Args) {
if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
continue;

if (Error Err = getNeededDeviceGlobals(Buffer, NeededSymbols))
return std::move(Err);

SmallVector<OffloadFile> Binaries;
if (Error Err = extractOffloadBinaries(Buffer, Binaries))
return std::move(Err);
Expand All @@ -1668,6 +1748,13 @@ getDeviceInput(const ArgList &Args) {
CompatibleTargets.emplace_back(ID);

for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
// Initialize the symbol table with the device globals that host needs.
// This will force them to be extracted if they are defined in a library.
if (!Syms.contains(ID)) {
for (auto &Symbol : NeededSymbols)
auto &Val = Syms[ID][Symbol] = Sym_Undefined;
}

Expected<bool> ExtractOrErr = getSymbols(
Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(),
/*IsArchive=*/false, Saver, Syms[ID]);
Expand Down
Loading