Skip to content

[BOLT] Use symbol table info in registerFragment #89648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 29, 2024
Merged
3 changes: 3 additions & 0 deletions bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,9 @@ class RewriteInstance {
/// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;

/// FILE symbols used for disambiguating split function parents.
std::vector<ELFSymbolRef> FileSymbols;

std::unique_ptr<DWARFRewriter> DebugInfoRewriter;

std::unique_ptr<BoltAddressTranslation> BAT;
Expand Down
17 changes: 15 additions & 2 deletions bolt/include/bolt/Utils/NameResolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,23 @@ class NameResolver {
static constexpr char Sep = '/';

public:
/// Return unique version of the \p Name in the form "Name<Sep><Number>".
/// Return the number of uniquified versions of a given \p Name.
uint64_t getUniquifiedNameCount(StringRef Name) const {
if (Counters.contains(Name))
return Counters.at(Name);
return 0;
}

/// Return unique version of the \p Name in the form "Name<Sep><ID>".
std::string getUniqueName(StringRef Name, const uint64_t ID) const {
return (Name + Twine(Sep) + Twine(ID)).str();
}

/// Register new version of \p Name and return unique version in the form
/// "Name<Sep><Number>".
std::string uniquify(StringRef Name) {
const uint64_t ID = ++Counters[Name];
return (Name + Twine(Sep) + Twine(ID)).str();
return getUniqueName(Name, ID);
}

/// For uniquified \p Name, return the original form (that may no longer be
Expand Down
122 changes: 95 additions & 27 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
continue;

if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
FileSymbols.emplace_back(Symbol);
StringRef Name =
cantFail(std::move(NameOrError), "cannot get symbol name for file");
// Ignore Clang LTO artificial FILE symbol as it is not always generated,
Expand Down Expand Up @@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
}

registerFragments();
FileSymbols.clear();
}

Error RewriteInstance::discoverRtFiniAddress() {
Expand Down Expand Up @@ -1417,50 +1419,116 @@ void RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions)
return;

// Process fragments with ambiguous parents separately as they are typically a
// vanishing minority of cases and require expensive symbol table lookups.
std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (!Function.isFragment())
continue;
unsigned ParentsFound = 0;
for (StringRef Name : Function.getNames()) {
StringRef BaseName, Suffix;
std::tie(BaseName, Suffix) = Name.split('/');
StringRef BaseName = NR.restore(Name);
const bool IsGlobal = BaseName == Name;
const size_t ColdSuffixPos = BaseName.find(".cold");
if (ColdSuffixPos == StringRef::npos)
continue;
// For cold function with local (foo.cold/1) symbol, prefer a parent with
// local symbol as well (foo/1) over global symbol (foo).
std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
if (Suffix != "") {
ParentName.append(Twine("/", Suffix).str());
const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
if (BDLocal || !BD)
BD = BDLocal;
}
if (!BD) {
if (opts::Verbosity >= 1)
BC->outs() << "BOLT-INFO: parent function not found for " << Name
<< "\n";
const uint64_t NumPossibleLocalParents =
NR.getUniquifiedNameCount(ParentName);
// The most common case: single local parent fragment.
if (!BD && NumPossibleLocalParents == 1) {
BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
} else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
// Global parent and either no local candidates (second most common), or
// the fragment is global as well (uncommon).
} else {
// Any other case: need to disambiguate using FILE symbols.
AmbiguousFragments.emplace_back(ParentName, &Function);
continue;
}
const uint64_t Address = BD->getAddress();
BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
if (!BF) {
if (opts::Verbosity >= 1)
BC->outs() << formatv(
"BOLT-INFO: parent function not found at {0:x}\n", Address);
continue;
if (BD) {
BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
if (BF) {
BC->registerFragment(Function, *BF);
continue;
}
}
BC->registerFragment(Function, *BF);
++ParentsFound;
}
if (!ParentsFound) {
BC->errs() << "BOLT-ERROR: parent function not found for " << Function
<< '\n';
exit(1);
}
}

if (AmbiguousFragments.empty())
return;

if (!BC->hasSymbolsWithFileName()) {
BC->errs() << "BOLT-ERROR: input file has split functions but does not "
"have FILE symbols. If the binary was stripped, preserve "
"FILE symbols with --keep-file-symbols strip option";
exit(1);
}

// The first global symbol is identified by the symbol table sh_info value.
// Used as local symbol search stopping point.
auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) {
return Sec.sh_type == ELF::SHT_SYMTAB;
});
assert(SymTab);
// Symtab sh_info contains the value one greater than the symbol table index
// of the last local symbol.
ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);

for (auto &[ParentName, BF] : AmbiguousFragments) {
const uint64_t Address = BF->getAddress();

// Get fragment's own symbol
const auto SymIt = FileSymRefs.find(Address);
if (SymIt == FileSymRefs.end()) {
BC->errs()
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
<< Twine::utohexstr(Address) << '\n';
exit(1);
}

// Find containing FILE symbol
ELFSymbolRef Symbol = SymIt->second;
auto FSI = llvm::upper_bound(FileSymbols, Symbol);
if (FSI == FileSymbols.begin()) {
BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
<< cantFail(Symbol.getName()) << '\n';
exit(1);
}

ELFSymbolRef StopSymbol = LocalSymEnd;
if (FSI != FileSymbols.end())
StopSymbol = *FSI;

uint64_t ParentAddress{0};
// Iterate over local file symbols and check symbol names to match parent.
for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
if (cantFail(Symbol.getName()) == ParentName) {
ParentAddress = cantFail(Symbol.getAddress());
break;
}
}

// No local parent is found, use global parent function.
if (!ParentAddress)
if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
ParentAddress = ParentBD->getAddress();

if (BinaryFunction *ParentBF =
BC->getBinaryFunctionAtAddress(ParentAddress)) {
BC->registerFragment(*BF, *ParentBF);
continue;
}
BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
exit(1);
}
}

void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
Expand Down
54 changes: 44 additions & 10 deletions bolt/test/X86/fragment-lite.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,42 @@
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz.s -o %t.baz.o
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz2.s -o %t.baz2.o
# RUN: link_fdata %s %t.o %t.main.fdata
# RUN: link_fdata %s %t.baz.o %t.baz.fdata
# RUN: merge-fdata %t.main.fdata %t.baz.fdata > %t.fdata
# RUN: %clang %cflags %t.o %t.baz.o -o %t.exe -Wl,-q
# RUN: link_fdata %s %t.baz2.o %t.baz2.fdata
# RUN: merge-fdata %t.main.fdata %t.baz.fdata %t.baz2.fdata > %t.fdata
# RUN: %clang %cflags %t.o %t.baz.o %t.baz2.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.out --lite=1 --data %t.fdata -v=1 -print-cfg \
# RUN: 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: processing main.cold.1 as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing foo.cold.1/1 as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing bar.cold.1/1 as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing foo.cold.1/1(*2) as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing bar.cold.1/1(*2) as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing baz.cold.1 as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing baz.cold.1/1 as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing baz.cold.1/1(*2) as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing baz.cold.1/2(*2) as a sibling of non-ignored function

# CHECK: Binary Function "main.cold.1" after building cfg
# CHECK: Parent : main

# CHECK: Binary Function "foo.cold.1/1" after building cfg
# CHECK: Binary Function "foo.cold.1/1(*2)" after building cfg
# CHECK: Parent : foo

# CHECK: Binary Function "bar.cold.1/1" after building cfg
# CHECK: Parent : bar/1
# CHECK: Binary Function "bar.cold.1/1(*2)" after building cfg
# CHECK: Parent : bar/1(*2)

# CHECK: Binary Function "baz.cold.1" after building cfg
# CHECK: Parent : baz{{$}}

# CHECK: Binary Function "baz.cold.1/1" after building cfg
# CHECK: Parent : baz/1
# CHECK: Binary Function "baz.cold.1/1(*2)" after building cfg
# CHECK: Parent : baz/1(*2)

# CHECK: Binary Function "baz.cold.1/2(*2)" after building cfg
# CHECK: Parent : baz/2(*2)

#--- main.s
.file "main.s"
.globl main
.type main, %function
main:
Expand Down Expand Up @@ -126,6 +133,7 @@ baz.cold.1:
.size baz.cold.1, .-baz.cold.1

#--- baz.s
.file "baz.s"
.local baz
.type baz, %function
baz:
Expand All @@ -149,3 +157,29 @@ baz.cold.1:
retq
.cfi_endproc
.size baz.cold.1, .-baz.cold.1

#--- baz2.s
.file "baz2.s"
.local baz
.type baz, %function
baz:
.cfi_startproc
# FDATA: 0 [unknown] 0 1 baz/2 0 1 0
cmpl $0x0, %eax
je baz.cold.1
retq
.cfi_endproc
.size baz, .-baz

.section .text.cold
.local baz.cold.1
.type baz.cold.1, %function
baz.cold.1:
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
movl $0x0, %eax
popq %rbp
retq
.cfi_endproc
.size baz.cold.1, .-baz.cold.1