Skip to content

Commit 0f1b043

Browse files
committed
[lld][ELF] Add --why-live flag (inspired by Mach-O)
This prints the stack of reasons that symbols that match the given glob(s) survived GC. It has no effect unless section GC occurs. A symbol may be live intrisically, because referenced by another symbol or section, or because part of a live section. Sections have similar reasons. This implementation does not require -ffunction-sections or -fdata-sections to produce readable results, althought it does tend to work better (as does GC).
1 parent 8942d5e commit 0f1b043

File tree

5 files changed

+260
-13
lines changed

5 files changed

+260
-13
lines changed

lld/ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ struct Config {
223223
llvm::StringRef thinLTOCacheDir;
224224
llvm::StringRef thinLTOIndexOnlyArg;
225225
llvm::StringRef whyExtract;
226+
llvm::SmallVector<llvm::GlobPattern, 0> whyLive;
226227
llvm::StringRef cmseInputLib;
227228
llvm::StringRef cmseOutputLib;
228229
StringRef zBtiReport = "none";

lld/ELF/Driver.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,15 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
14721472
ctx.arg.warnSymbolOrdering =
14731473
args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
14741474
ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract);
1475+
for (opt::Arg *arg : args.filtered(OPT_why_live)) {
1476+
StringRef value(arg->getValue());
1477+
if (Expected<GlobPattern> pat = GlobPattern::create(arg->getValue())) {
1478+
ctx.arg.whyLive.emplace_back(std::move(*pat));
1479+
} else {
1480+
ErrAlways(ctx) << arg->getSpelling() << ": " << pat.takeError();
1481+
continue;
1482+
}
1483+
}
14751484
ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
14761485
ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
14771486
ctx.arg.zForceBti = hasZOption(args, "force-bti");

lld/ELF/MarkLive.cpp

Lines changed: 112 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929
#include "Target.h"
3030
#include "lld/Common/CommonLinkerContext.h"
3131
#include "lld/Common/Strings.h"
32+
#include "llvm/ADT/DenseMapInfoVariant.h"
3233
#include "llvm/ADT/STLExtras.h"
3334
#include "llvm/Object/ELF.h"
3435
#include "llvm/Support/TimeProfiler.h"
36+
#include <variant>
3537
#include <vector>
3638

3739
using namespace llvm;
@@ -42,6 +44,10 @@ using namespace lld;
4244
using namespace lld::elf;
4345

4446
namespace {
47+
48+
// Something that can be the most proximate reason that something else is alive.
49+
typedef std::variant<InputSectionBase *, Symbol *> LiveReason;
50+
4551
template <class ELFT> class MarkLive {
4652
public:
4753
MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {}
@@ -50,7 +56,10 @@ template <class ELFT> class MarkLive {
5056
void moveToMain();
5157

5258
private:
53-
void enqueue(InputSectionBase *sec, uint64_t offset);
59+
void enqueue(InputSectionBase *sec, uint64_t offset = 0,
60+
Symbol *sym = nullptr,
61+
std::optional<LiveReason> reason = std::nullopt);
62+
void printWhyLive(Symbol *s) const;
5463
void markSymbol(Symbol *sym);
5564
void mark();
5665

@@ -70,6 +79,12 @@ template <class ELFT> class MarkLive {
7079
// There are normally few input sections whose names are valid C
7180
// identifiers, so we just store a SmallVector instead of a multimap.
7281
DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
82+
83+
// The most proximate reason that something is live. If something doesn't have
84+
// a recorded reason, it is either dead, intrinsically live, or an
85+
// unreferenced symbol in a live section. (These cases are trivially
86+
// detectable and need not be stored.)
87+
DenseMap<LiveReason, LiveReason> whyLive;
7388
};
7489
} // namespace
7590

@@ -101,6 +116,12 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
101116
Symbol &sym = sec.file->getRelocTargetSym(rel);
102117
sym.used = true;
103118

119+
LiveReason reason;
120+
if (!ctx.arg.whyLive.empty()) {
121+
Defined *reasonSym = sec.getEnclosingSymbol(rel.r_offset);
122+
reason = reasonSym ? LiveReason(reasonSym) : LiveReason(&sec);
123+
}
124+
104125
if (auto *d = dyn_cast<Defined>(&sym)) {
105126
auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
106127
if (!relSec)
@@ -119,17 +140,29 @@ void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
119140
// group/SHF_LINK_ORDER rules (b) if the associated text section should be
120141
// discarded, marking the LSDA will unnecessarily retain the text section.
121142
if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
122-
relSec->nextInSectionGroup)))
123-
enqueue(relSec, offset);
143+
relSec->nextInSectionGroup))) {
144+
Symbol *canonicalSym = d;
145+
if (!ctx.arg.whyLive.empty() && d->isSection()) {
146+
if (Symbol *s = relSec->getEnclosingSymbol(offset))
147+
canonicalSym = s;
148+
else
149+
canonicalSym = nullptr;
150+
}
151+
enqueue(relSec, offset, canonicalSym, reason);
152+
}
124153
return;
125154
}
126155

127-
if (auto *ss = dyn_cast<SharedSymbol>(&sym))
128-
if (!ss->isWeak())
156+
if (auto *ss = dyn_cast<SharedSymbol>(&sym)) {
157+
if (!ss->isWeak()) {
129158
cast<SharedFile>(ss->file)->isNeeded = true;
159+
if (!ctx.arg.whyLive.empty())
160+
whyLive.try_emplace(&sym, reason);
161+
}
162+
}
130163

131164
for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
132-
enqueue(sec, 0);
165+
enqueue(sec, 0, nullptr, reason);
133166
}
134167

135168
// The .eh_frame section is an unfortunate special case.
@@ -187,7 +220,8 @@ static bool isReserved(InputSectionBase *sec) {
187220
}
188221

189222
template <class ELFT>
190-
void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
223+
void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset,
224+
Symbol *sym, std::optional<LiveReason> reason) {
191225
// Usually, a whole section is marked as live or dead, but in mergeable
192226
// (splittable) sections, each piece of data has independent liveness bit.
193227
// So we explicitly tell it which offset is in use.
@@ -201,15 +235,71 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
201235
return;
202236
sec->partition = sec->partition ? 1 : partition;
203237

238+
if (!ctx.arg.whyLive.empty() && reason) {
239+
if (sym) {
240+
// If a specific symbol is referenced, that makes it alive. It may in turn
241+
// make its section alive.
242+
whyLive.try_emplace(sym, *reason);
243+
whyLive.try_emplace(sec, sym);
244+
} else {
245+
// Otherwise, the reference generically makes the section live.
246+
whyLive.try_emplace(sec, *reason);
247+
}
248+
}
249+
204250
// Add input section to the queue.
205251
if (InputSection *s = dyn_cast<InputSection>(sec))
206252
queue.push_back(s);
207253
}
208254

255+
// Print the stack of reasons that the given symbol is live.
256+
template <class ELFT> void MarkLive<ELFT>::printWhyLive(Symbol *s) const {
257+
// Skip dead symbols. A symbol is dead if it belongs to a dead section.
258+
if (auto *d = dyn_cast<Defined>(s)) {
259+
auto *reason = dyn_cast_or_null<InputSectionBase>(d->section);
260+
if (reason && !reason->isLive())
261+
return;
262+
}
263+
264+
auto msg = Msg(ctx);
265+
msg << "live symbol: " << toStr(ctx, *s);
266+
267+
LiveReason cur = s;
268+
while (true) {
269+
auto it = whyLive.find(cur);
270+
// If there is a specific reason this object is live...
271+
if (it != whyLive.end()) {
272+
cur = it->second;
273+
} else {
274+
// This object is live, but it has no tracked reason. It is either
275+
// intrinsically live or an unreferenced symbol in a live section. Return
276+
// in the first case.
277+
if (!std::holds_alternative<Symbol *>(cur))
278+
return;
279+
auto *d = dyn_cast<Defined>(std::get<Symbol *>(cur));
280+
if (!d)
281+
return;
282+
auto *reason = dyn_cast_or_null<InputSectionBase>(d->section);
283+
if (!reason)
284+
return;
285+
cur = LiveReason{reason};
286+
}
287+
288+
msg << "\n>>> kept live by ";
289+
if (std::holds_alternative<Symbol *>(cur)) {
290+
auto *s = std::get<Symbol *>(cur);
291+
msg << toStr(ctx, *s);
292+
} else {
293+
auto *s = std::get<InputSectionBase *>(cur);
294+
msg << toStr(ctx, s);
295+
}
296+
}
297+
}
298+
209299
template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
210300
if (auto *d = dyn_cast_or_null<Defined>(sym))
211301
if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
212-
enqueue(isec, d->value);
302+
enqueue(isec, d->value, sym);
213303
}
214304

215305
// This is the main function of the garbage collector.
@@ -256,7 +346,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
256346
}
257347
for (InputSectionBase *sec : ctx.inputSections) {
258348
if (sec->flags & SHF_GNU_RETAIN) {
259-
enqueue(sec, 0);
349+
enqueue(sec, 0, nullptr, std::nullopt);
260350
continue;
261351
}
262352
if (sec->flags & SHF_LINK_ORDER)
@@ -295,7 +385,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
295385
// Preserve special sections and those which are specified in linker
296386
// script KEEP command.
297387
if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
298-
enqueue(sec, 0);
388+
enqueue(sec);
299389
} else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
300390
isValidCIdentifier(sec->name)) {
301391
// As a workaround for glibc libc.a before 2.34
@@ -323,11 +413,20 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
323413
resolveReloc(sec, rel, false);
324414

325415
for (InputSectionBase *isec : sec.dependentSections)
326-
enqueue(isec, 0);
416+
enqueue(isec, 0, nullptr, &sec);
327417

328418
// Mark the next group member.
329419
if (sec.nextInSectionGroup)
330-
enqueue(sec.nextInSectionGroup, 0);
420+
enqueue(sec.nextInSectionGroup, 0, nullptr, &sec);
421+
}
422+
423+
if (!ctx.arg.whyLive.empty()) {
424+
for (Symbol *sym : ctx.symtab->getSymbols()) {
425+
if (llvm::any_of(ctx.arg.whyLive, [sym](const llvm::GlobPattern &pat) {
426+
return pat.match(sym->getName());
427+
}))
428+
printWhyLive(sym);
429+
}
331430
}
332431
}
333432

@@ -353,7 +452,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
353452
continue;
354453
if (ctx.symtab->find(("__start_" + sec->name).str()) ||
355454
ctx.symtab->find(("__stop_" + sec->name).str()))
356-
enqueue(sec, 0);
455+
enqueue(sec);
357456
}
358457

359458
mark();

lld/ELF/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,12 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
559559
"__real_symbol references to symbol">,
560560
MetaVarName<"<symbol>">;
561561

562+
defm why_live
563+
: EEq<"why-live",
564+
"Report a chain of references preventing garbage collection for "
565+
"each symbol matching <glob>">,
566+
MetaVarName<"<glob>">;
567+
562568
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
563569
HelpText<"Linker option extensions">;
564570

lld/test/ELF/why-live.s

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# REQUIRES: x86
2+
3+
# RUN: llvm-mc -n -filetype=obj -triple=x86_64 %s -o %t.o
4+
# RUN: echo -e ".globl test_shared\n .section .test_shared,\"ax\",@progbits\n test_shared: jmp test_shared" |\
5+
# RUN: llvm-mc -n -filetype=obj -triple=x86_64 -o %t.shared.o
6+
# RUN: ld.lld -shared %t.shared.o -o %t.so
7+
8+
## Simple live section
9+
.globl _start
10+
.section ._start,"ax",@progbits
11+
_start:
12+
jmp test_simple
13+
jmp .Llocal
14+
jmp .Llocal_within_symbol
15+
jmp test_shared
16+
.size _start, .-_start
17+
18+
.globl test_simple
19+
.section .test_simple,"ax",@progbits
20+
test_simple:
21+
jmp test_simple
22+
jmp test_from_unsized
23+
24+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_simple | FileCheck %s --check-prefix=SIMPLE
25+
26+
# SIMPLE: live symbol: test_simple
27+
# SIMPLE-NEXT: >>> kept live by _start
28+
29+
## Live only by being a member of .test_simple
30+
.globl test_incidental
31+
test_incidental:
32+
jmp test_incidental
33+
34+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_incidental | FileCheck %s --check-prefix=INCIDENTAL
35+
36+
# INCIDENTAL: live symbol: test_incidental
37+
# INCIDENTAL-NEXT: >>> kept live by {{.*}}.o:(.test_simple)
38+
# INCIDENTAL-NEXT: >>> kept live by test_simple
39+
# INCIDENTAL-NEXT: >>> kept live by _start
40+
41+
## Reached from a reference in section .test_simple directly, since test_simple is an unsized symbol.
42+
.globl test_from_unsized
43+
.section .test_from_unsized,"ax",@progbits
44+
test_from_unsized:
45+
jmp test_from_unsized
46+
47+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_from_unsized | FileCheck %s --check-prefix=FROM-UNSIZED
48+
49+
# FROM-UNSIZED: live symbol: test_from_unsized
50+
# FROM-UNSIZED-NEXT: >>> kept live by {{.*}}.o:(.test_simple)
51+
# FROM-UNSIZED-NEXT: >>> kept live by test_simple
52+
# FROM-UNSIZED-NEXT: >>> kept live by _start
53+
54+
## Symbols in dead sections are dead and not reported.
55+
.globl test_dead
56+
.section .test_dead,"ax",@progbits
57+
test_dead:
58+
jmp test_dead
59+
60+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_dead | count 0
61+
62+
## Undefined symbols are considered live, since they are not in dead sections.
63+
64+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_undef -u test_undef | FileCheck %s --check-prefix=UNDEFINED
65+
66+
# UNDEFINED: live symbol: test_undef
67+
# UNDEFINED-NOT: >>>
68+
69+
## Defined symbols without input section parents are live.
70+
.globl test_absolute
71+
test_absolute = 1234
72+
73+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_absolute | FileCheck %s --check-prefix=ABSOLUTE
74+
75+
# ABSOLUTE: live symbol: test_absolute
76+
# ABSOLUTE-NOT: >>>
77+
78+
## Retained sections are intrinsically live, and they make contained symbols live.
79+
.globl test_retained
80+
.section .test_retained,"axR",@progbits
81+
test_retained:
82+
jmp test_retained
83+
84+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_retained | FileCheck %s --check-prefix=RETAINED
85+
86+
# RETAINED: live symbol: test_retained
87+
# RETAINED-NEXT: >>> kept live by {{.*}}:(.test_retained)
88+
89+
## Relocs that reference offsets from sections (e.g., from local symbols) are considered to point to the section if no enclosing symbol exists.
90+
91+
.globl test_section_offset
92+
.section .test_section_offset,"ax",@progbits
93+
test_section_offset:
94+
jmp test_section_offset
95+
.Llocal:
96+
jmp test_section_offset
97+
98+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_section_offset | FileCheck %s --check-prefix=SECTION-OFFSET
99+
100+
# SECTION-OFFSET: live symbol: test_section_offset
101+
# SECTION-OFFSET-NEXT: >>> kept live by {{.*}}:(.test_section_offset)
102+
# SECTION-OFFSET-NEXT: >>> kept live by _start
103+
104+
## Relocs that reference offsets from sections (e.g., from local symbols) are considered to point to the enclosing symbol if one exists.
105+
106+
.globl test_section_offset_within_symbol
107+
.section .test_section_offset_within_symbol,"ax",@progbits
108+
test_section_offset_within_symbol:
109+
jmp test_section_offset_within_symbol
110+
.Llocal_within_symbol:
111+
jmp test_section_offset_within_symbol
112+
.size test_section_offset_within_symbol, .-test_section_offset_within_symbol
113+
114+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections --why-live=test_section_offset_within_symbol | FileCheck %s --check-prefix=SECTION-OFFSET-WITHIN-SYMBOL
115+
116+
# SECTION-OFFSET-WITHIN-SYMBOL: live symbol: test_section_offset_within_symbol
117+
# SECTION-OFFSET-WITHIN-SYMBOL-NEXT: >>> kept live by _start
118+
119+
## Shared symbols
120+
121+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_shared | FileCheck %s --check-prefix=SHARED
122+
123+
# SHARED: live symbol: test_shared
124+
# SHARED-NEXT: >>> kept live by _start
125+
126+
## Globs match multiple cases. Multiple --why-live flags union.
127+
128+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_s* | FileCheck %s --check-prefix=MULTIPLE
129+
# RUN: ld.lld %t.o %t.so -o /dev/null --gc-sections %t.so --why-live=test_simple --why-live=test_shared | FileCheck %s --check-prefix=MULTIPLE
130+
131+
# MULTIPLE-DAG: live symbol: test_simple
132+
# MULTIPLE-DAG: live symbol: test_shared

0 commit comments

Comments
 (0)