Skip to content

Commit a1e9608

Browse files
authored
[BOLT] Use symbol table info in registerFragment (#89648)
Fragment matching relies on symbol names to identify and register split function fragments. However, as split fragments are often local symbols, name aliasing is possible. For such cases, use symbol table to resolve ambiguities. This requires the presence of FILE symbols in the input binary. As BOLT requires non-stripped binary, this is a reasonable assumption. Note that `strip -g` removes FILE symbols by default, but `--keep-file-symbols` can be used to preserve them. Depends on: #89861 Test Plan: Updated X86/fragment-lite.s
1 parent 7e2eeb5 commit a1e9608

File tree

4 files changed

+157
-39
lines changed

4 files changed

+157
-39
lines changed

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,9 @@ class RewriteInstance {
494494
/// Store all non-zero symbols in this map for a quick address lookup.
495495
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
496496

497+
/// FILE symbols used for disambiguating split function parents.
498+
std::vector<ELFSymbolRef> FileSymbols;
499+
497500
std::unique_ptr<DWARFRewriter> DebugInfoRewriter;
498501

499502
std::unique_ptr<BoltAddressTranslation> BAT;

bolt/include/bolt/Utils/NameResolver.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,23 @@ class NameResolver {
2828
static constexpr char Sep = '/';
2929

3030
public:
31-
/// Return unique version of the \p Name in the form "Name<Sep><Number>".
31+
/// Return the number of uniquified versions of a given \p Name.
32+
uint64_t getUniquifiedNameCount(StringRef Name) const {
33+
if (Counters.contains(Name))
34+
return Counters.at(Name);
35+
return 0;
36+
}
37+
38+
/// Return unique version of the \p Name in the form "Name<Sep><ID>".
39+
std::string getUniqueName(StringRef Name, const uint64_t ID) const {
40+
return (Name + Twine(Sep) + Twine(ID)).str();
41+
}
42+
43+
/// Register new version of \p Name and return unique version in the form
44+
/// "Name<Sep><Number>".
3245
std::string uniquify(StringRef Name) {
3346
const uint64_t ID = ++Counters[Name];
34-
return (Name + Twine(Sep) + Twine(ID)).str();
47+
return getUniqueName(Name, ID);
3548
}
3649

3750
/// For uniquified \p Name, return the original form (that may no longer be

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 95 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
840840
continue;
841841

842842
if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
843+
FileSymbols.emplace_back(Symbol);
843844
StringRef Name =
844845
cantFail(std::move(NameOrError), "cannot get symbol name for file");
845846
// Ignore Clang LTO artificial FILE symbol as it is not always generated,
@@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
13401341
}
13411342

13421343
registerFragments();
1344+
FileSymbols.clear();
13431345
}
13441346

13451347
Error RewriteInstance::discoverRtFiniAddress() {
@@ -1417,50 +1419,116 @@ void RewriteInstance::registerFragments() {
14171419
if (!BC->HasSplitFunctions)
14181420
return;
14191421

1422+
// Process fragments with ambiguous parents separately as they are typically a
1423+
// vanishing minority of cases and require expensive symbol table lookups.
1424+
std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
14201425
for (auto &BFI : BC->getBinaryFunctions()) {
14211426
BinaryFunction &Function = BFI.second;
14221427
if (!Function.isFragment())
14231428
continue;
1424-
unsigned ParentsFound = 0;
14251429
for (StringRef Name : Function.getNames()) {
1426-
StringRef BaseName, Suffix;
1427-
std::tie(BaseName, Suffix) = Name.split('/');
1430+
StringRef BaseName = NR.restore(Name);
1431+
const bool IsGlobal = BaseName == Name;
14281432
const size_t ColdSuffixPos = BaseName.find(".cold");
14291433
if (ColdSuffixPos == StringRef::npos)
14301434
continue;
1431-
// For cold function with local (foo.cold/1) symbol, prefer a parent with
1432-
// local symbol as well (foo/1) over global symbol (foo).
1433-
std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
1435+
StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
14341436
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
1435-
if (Suffix != "") {
1436-
ParentName.append(Twine("/", Suffix).str());
1437-
const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
1438-
if (BDLocal || !BD)
1439-
BD = BDLocal;
1440-
}
1441-
if (!BD) {
1442-
if (opts::Verbosity >= 1)
1443-
BC->outs() << "BOLT-INFO: parent function not found for " << Name
1444-
<< "\n";
1437+
const uint64_t NumPossibleLocalParents =
1438+
NR.getUniquifiedNameCount(ParentName);
1439+
// The most common case: single local parent fragment.
1440+
if (!BD && NumPossibleLocalParents == 1) {
1441+
BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
1442+
} else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
1443+
// Global parent and either no local candidates (second most common), or
1444+
// the fragment is global as well (uncommon).
1445+
} else {
1446+
// Any other case: need to disambiguate using FILE symbols.
1447+
AmbiguousFragments.emplace_back(ParentName, &Function);
14451448
continue;
14461449
}
1447-
const uint64_t Address = BD->getAddress();
1448-
BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
1449-
if (!BF) {
1450-
if (opts::Verbosity >= 1)
1451-
BC->outs() << formatv(
1452-
"BOLT-INFO: parent function not found at {0:x}\n", Address);
1453-
continue;
1450+
if (BD) {
1451+
BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
1452+
if (BF) {
1453+
BC->registerFragment(Function, *BF);
1454+
continue;
1455+
}
14541456
}
1455-
BC->registerFragment(Function, *BF);
1456-
++ParentsFound;
1457-
}
1458-
if (!ParentsFound) {
14591457
BC->errs() << "BOLT-ERROR: parent function not found for " << Function
14601458
<< '\n';
14611459
exit(1);
14621460
}
14631461
}
1462+
1463+
if (AmbiguousFragments.empty())
1464+
return;
1465+
1466+
if (!BC->hasSymbolsWithFileName()) {
1467+
BC->errs() << "BOLT-ERROR: input file has split functions but does not "
1468+
"have FILE symbols. If the binary was stripped, preserve "
1469+
"FILE symbols with --keep-file-symbols strip option";
1470+
exit(1);
1471+
}
1472+
1473+
// The first global symbol is identified by the symbol table sh_info value.
1474+
// Used as local symbol search stopping point.
1475+
auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
1476+
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
1477+
auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) {
1478+
return Sec.sh_type == ELF::SHT_SYMTAB;
1479+
});
1480+
assert(SymTab);
1481+
// Symtab sh_info contains the value one greater than the symbol table index
1482+
// of the last local symbol.
1483+
ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);
1484+
1485+
for (auto &[ParentName, BF] : AmbiguousFragments) {
1486+
const uint64_t Address = BF->getAddress();
1487+
1488+
// Get fragment's own symbol
1489+
const auto SymIt = FileSymRefs.find(Address);
1490+
if (SymIt == FileSymRefs.end()) {
1491+
BC->errs()
1492+
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
1493+
<< Twine::utohexstr(Address) << '\n';
1494+
exit(1);
1495+
}
1496+
1497+
// Find containing FILE symbol
1498+
ELFSymbolRef Symbol = SymIt->second;
1499+
auto FSI = llvm::upper_bound(FileSymbols, Symbol);
1500+
if (FSI == FileSymbols.begin()) {
1501+
BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
1502+
<< cantFail(Symbol.getName()) << '\n';
1503+
exit(1);
1504+
}
1505+
1506+
ELFSymbolRef StopSymbol = LocalSymEnd;
1507+
if (FSI != FileSymbols.end())
1508+
StopSymbol = *FSI;
1509+
1510+
uint64_t ParentAddress{0};
1511+
// Iterate over local file symbols and check symbol names to match parent.
1512+
for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
1513+
if (cantFail(Symbol.getName()) == ParentName) {
1514+
ParentAddress = cantFail(Symbol.getAddress());
1515+
break;
1516+
}
1517+
}
1518+
1519+
// No local parent is found, use global parent function.
1520+
if (!ParentAddress)
1521+
if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
1522+
ParentAddress = ParentBD->getAddress();
1523+
1524+
if (BinaryFunction *ParentBF =
1525+
BC->getBinaryFunctionAtAddress(ParentAddress)) {
1526+
BC->registerFragment(*BF, *ParentBF);
1527+
continue;
1528+
}
1529+
BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
1530+
exit(1);
1531+
}
14641532
}
14651533

14661534
void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,

bolt/test/X86/fragment-lite.s

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,42 @@
33
# RUN: split-file %s %t
44
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
55
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz.s -o %t.baz.o
6+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz2.s -o %t.baz2.o
67
# RUN: link_fdata %s %t.o %t.main.fdata
78
# RUN: link_fdata %s %t.baz.o %t.baz.fdata
8-
# RUN: merge-fdata %t.main.fdata %t.baz.fdata > %t.fdata
9-
# RUN: %clang %cflags %t.o %t.baz.o -o %t.exe -Wl,-q
9+
# RUN: link_fdata %s %t.baz2.o %t.baz2.fdata
10+
# RUN: merge-fdata %t.main.fdata %t.baz.fdata %t.baz2.fdata > %t.fdata
11+
# RUN: %clang %cflags %t.o %t.baz.o %t.baz2.o -o %t.exe -Wl,-q
1012
# RUN: llvm-bolt %t.exe -o %t.out --lite=1 --data %t.fdata -v=1 -print-cfg \
1113
# RUN: 2>&1 | FileCheck %s
1214

1315
# CHECK: BOLT-INFO: processing main.cold.1 as a sibling of non-ignored function
14-
# CHECK: BOLT-INFO: processing foo.cold.1/1 as a sibling of non-ignored function
15-
# CHECK: BOLT-INFO: processing bar.cold.1/1 as a sibling of non-ignored function
16+
# CHECK: BOLT-INFO: processing foo.cold.1/1(*2) as a sibling of non-ignored function
17+
# CHECK: BOLT-INFO: processing bar.cold.1/1(*2) as a sibling of non-ignored function
1618
# CHECK: BOLT-INFO: processing baz.cold.1 as a sibling of non-ignored function
17-
# CHECK: BOLT-INFO: processing baz.cold.1/1 as a sibling of non-ignored function
19+
# CHECK: BOLT-INFO: processing baz.cold.1/1(*2) as a sibling of non-ignored function
20+
# CHECK: BOLT-INFO: processing baz.cold.1/2(*2) as a sibling of non-ignored function
1821

1922
# CHECK: Binary Function "main.cold.1" after building cfg
2023
# CHECK: Parent : main
2124

22-
# CHECK: Binary Function "foo.cold.1/1" after building cfg
25+
# CHECK: Binary Function "foo.cold.1/1(*2)" after building cfg
2326
# CHECK: Parent : foo
2427

25-
# CHECK: Binary Function "bar.cold.1/1" after building cfg
26-
# CHECK: Parent : bar/1
28+
# CHECK: Binary Function "bar.cold.1/1(*2)" after building cfg
29+
# CHECK: Parent : bar/1(*2)
2730

2831
# CHECK: Binary Function "baz.cold.1" after building cfg
2932
# CHECK: Parent : baz{{$}}
3033

31-
# CHECK: Binary Function "baz.cold.1/1" after building cfg
32-
# CHECK: Parent : baz/1
34+
# CHECK: Binary Function "baz.cold.1/1(*2)" after building cfg
35+
# CHECK: Parent : baz/1(*2)
36+
37+
# CHECK: Binary Function "baz.cold.1/2(*2)" after building cfg
38+
# CHECK: Parent : baz/2(*2)
3339

3440
#--- main.s
41+
.file "main.s"
3542
.globl main
3643
.type main, %function
3744
main:
@@ -126,6 +133,7 @@ baz.cold.1:
126133
.size baz.cold.1, .-baz.cold.1
127134

128135
#--- baz.s
136+
.file "baz.s"
129137
.local baz
130138
.type baz, %function
131139
baz:
@@ -149,3 +157,29 @@ baz.cold.1:
149157
retq
150158
.cfi_endproc
151159
.size baz.cold.1, .-baz.cold.1
160+
161+
#--- baz2.s
162+
.file "baz2.s"
163+
.local baz
164+
.type baz, %function
165+
baz:
166+
.cfi_startproc
167+
# FDATA: 0 [unknown] 0 1 baz/2 0 1 0
168+
cmpl $0x0, %eax
169+
je baz.cold.1
170+
retq
171+
.cfi_endproc
172+
.size baz, .-baz
173+
174+
.section .text.cold
175+
.local baz.cold.1
176+
.type baz.cold.1, %function
177+
baz.cold.1:
178+
.cfi_startproc
179+
pushq %rbp
180+
movq %rsp, %rbp
181+
movl $0x0, %eax
182+
popq %rbp
183+
retq
184+
.cfi_endproc
185+
.size baz.cold.1, .-baz.cold.1

0 commit comments

Comments
 (0)