Skip to content

Commit 535ab47

Browse files
committed
[ELF] Fix unnecessary inclusion of unreferenced provide symbols
Previously, linker was unnecessarily including a PROVIDE symbol which was referenced by another unused PROVIDE symbol. For example, if a linker script contained the below code and 'not_used_sym' provide symbol is not included, then linker was still unnecessarily including 'foo' PROVIDE symbol because it was referenced by 'not_used_sym'. This commit fixes this behavior. PROVIDE(not_used_sym = foo) PROVIDE(foo = 0x1000) This commit fixes this behavior by using dfs-like algorithm to find all the symbols referenced in provide expressions of included provide symbols. This commit also fixes the issue of unused section not being garbage-collected if a symbol of the section is referenced by an unused PROVIDE symbol. Closes #74771 Closes #84730
1 parent 8e5de66 commit 535ab47

File tree

8 files changed

+197
-31
lines changed

8 files changed

+197
-31
lines changed

lld/ELF/Driver.cpp

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,12 +2362,6 @@ static void readSymbolPartitionSection(InputSectionBase *s) {
23622362
sym->partition = newPart.getNumber();
23632363
}
23642364

2365-
static Symbol *addUnusedUndefined(StringRef name,
2366-
uint8_t binding = STB_GLOBAL) {
2367-
return symtab.addSymbol(
2368-
Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
2369-
}
2370-
23712365
static void markBuffersAsDontNeed(bool skipLinkedOutput) {
23722366
// With --thinlto-index-only, all buffers are nearly unused from now on
23732367
// (except symbol/section names used by infrequent passes). Mark input file
@@ -2454,15 +2448,15 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
24542448
continue;
24552449

24562450
Symbol *wrap =
2457-
addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
2451+
symtab.addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
24582452

24592453
// If __real_ is referenced, pull in the symbol if it is lazy. Do this after
24602454
// processing __wrap_ as that may have referenced __real_.
24612455
StringRef realName = saver().save("__real_" + name);
24622456
if (symtab.find(realName))
2463-
addUnusedUndefined(name, sym->binding);
2457+
symtab.addUnusedUndefined(name, sym->binding);
24642458

2465-
Symbol *real = addUnusedUndefined(realName);
2459+
Symbol *real = symtab.addUnusedUndefined(realName);
24662460
v.push_back({sym, real, wrap});
24672461

24682462
// We want to tell LTO not to inline symbols to be overwritten
@@ -2729,7 +2723,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
27292723
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
27302724
// -u foo a.a b.so will extract a.a.
27312725
for (StringRef name : config->undefined)
2732-
addUnusedUndefined(name)->referenced = true;
2726+
symtab.addUnusedUndefined(name)->referenced = true;
27332727

27342728
// Add all files to the symbol table. This will add almost all
27352729
// symbols that we need to the symbol table. This process might
@@ -2754,13 +2748,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
27542748
config->hasDynSymTab =
27552749
!ctx.sharedFiles.empty() || config->isPic || config->exportDynamic;
27562750

2757-
// Some symbols (such as __ehdr_start) are defined lazily only when there
2758-
// are undefined symbols for them, so we add these to trigger that logic.
2759-
for (StringRef name : script->referencedSymbols) {
2760-
Symbol *sym = addUnusedUndefined(name);
2761-
sym->isUsedInRegularObj = true;
2762-
sym->referenced = true;
2763-
}
2751+
script->addScriptReferencedSymbolsToSymTable();
27642752

27652753
// Prevent LTO from removing any definition referenced by -u.
27662754
for (StringRef name : config->undefined)

lld/ELF/LinkerScript.cpp

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,7 @@ static bool shouldDefineSym(SymbolAssignment *cmd) {
201201
if (!cmd->provide)
202202
return true;
203203

204-
// If a symbol was in PROVIDE(), we need to define it only
205-
// when it is a referenced undefined symbol.
206-
Symbol *b = symtab.find(cmd->name);
207-
if (b && !b->isDefined() && !b->isCommon())
208-
return true;
209-
return false;
204+
return LinkerScript::shouldAddProvideSym(cmd->name);
210205
}
211206

212207
// Called by processSymbolAssignments() to assign definitions to
@@ -1518,3 +1513,50 @@ void LinkerScript::checkFinalScriptConditions() const {
15181513
checkMemoryRegion(lmaRegion, sec, sec->getLMA());
15191514
}
15201515
}
1516+
1517+
// Add symbols referred by the provide symbol to the symbol table.
1518+
// This function must only be called for provide symbols that should be added
1519+
// to the link.
1520+
static void
1521+
addProvideSymReferences(StringRef provideSym,
1522+
llvm::StringSet<> &addedRefsFromProvideSym) {
1523+
assert(LinkerScript::shouldAddProvideSym(provideSym) &&
1524+
"This function must only be called for provide symbols that should be "
1525+
"added to the link.");
1526+
addedRefsFromProvideSym.insert(provideSym);
1527+
for (StringRef name : script->provideMap[provideSym]) {
1528+
Symbol *sym = symtab.addUnusedUndefined(name);
1529+
sym->isUsedInRegularObj = true;
1530+
sym->referenced = true;
1531+
script->referencedSymbols.push_back(name);
1532+
if (script->provideMap.count(name) &&
1533+
LinkerScript::shouldAddProvideSym(name) &&
1534+
!addedRefsFromProvideSym.count(name))
1535+
addProvideSymReferences(name, addedRefsFromProvideSym);
1536+
}
1537+
}
1538+
1539+
void LinkerScript::addScriptReferencedSymbolsToSymTable() {
1540+
// Some symbols (such as __ehdr_start) are defined lazily only when there
1541+
// are undefined symbols for them, so we add these to trigger that logic.
1542+
for (StringRef name : referencedSymbols) {
1543+
Symbol *sym = symtab.addUnusedUndefined(name);
1544+
sym->isUsedInRegularObj = true;
1545+
sym->referenced = true;
1546+
}
1547+
1548+
// Keeps track of references from which PROVIDE symbols have been added to the
1549+
// symbol table.
1550+
llvm::StringSet<> addedRefsFromProvideSym;
1551+
for (const auto &provideEntry : provideMap) {
1552+
StringRef provideSym = provideEntry.first;
1553+
if (LinkerScript::shouldAddProvideSym(provideSym) &&
1554+
!addedRefsFromProvideSym.count(provideSym))
1555+
addProvideSymReferences(provideSym, addedRefsFromProvideSym);
1556+
}
1557+
}
1558+
1559+
bool LinkerScript::shouldAddProvideSym(StringRef symName) {
1560+
Symbol *sym = symtab.find(symName);
1561+
return sym && !sym->isDefined() && !sym->isCommon();
1562+
}

lld/ELF/LinkerScript.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/ArrayRef.h"
1717
#include "llvm/ADT/DenseMap.h"
1818
#include "llvm/ADT/MapVector.h"
19+
#include "llvm/ADT/SmallVector.h"
1920
#include "llvm/ADT/StringRef.h"
2021
#include "llvm/Support/Compiler.h"
2122
#include <cstddef>
@@ -348,6 +349,18 @@ class LinkerScript final {
348349
// Check backward location counter assignment and memory region/LMA overflows.
349350
void checkFinalScriptConditions() const;
350351

352+
// Add symbols that are referenced in the linker script to the symbol table.
353+
// Symbols referenced in a PROVIDE command are only added to the symbol table
354+
// if the PROVIDE command actually provides the symbol.
355+
// It also adds the symbols referenced by the used provide symbols to the
356+
// linker script referenced symbols list.
357+
void addScriptReferencedSymbolsToSymTable();
358+
359+
// Returns true if the PROVIDE symbol should be added to the link.
360+
// A PROVIDE symbol is added to the link only if it satisfies an
361+
// undefined reference.
362+
static bool shouldAddProvideSym(StringRef symName);
363+
351364
// SECTIONS command list.
352365
SmallVector<SectionCommand *, 0> sectionCommands;
353366

@@ -379,6 +392,14 @@ class LinkerScript final {
379392

380393
// Sections that will be warned/errored by --orphan-handling.
381394
SmallVector<const InputSectionBase *, 0> orphanSections;
395+
396+
// Stores the mapping: provide symbol -> symbols referred in the provide
397+
// expression. For example, if the PROVIDE command is:
398+
//
399+
// PROVIDE(v = a + b + c);
400+
//
401+
// then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
402+
llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
382403
};
383404

384405
LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;

lld/ELF/ScriptParser.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/Support/TimeProfiler.h"
3737
#include <cassert>
3838
#include <limits>
39+
#include <optional>
3940
#include <vector>
4041

4142
using namespace llvm;
@@ -138,6 +139,10 @@ class ScriptParser final : ScriptLexer {
138139

139140
// A set to detect an INCLUDE() cycle.
140141
StringSet<> seen;
142+
143+
// If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
144+
// then this member is set to the provide symbol name.
145+
std::optional<llvm::StringRef> activeProvideSym;
141146
};
142147
} // namespace
143148

@@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
10551060
;
10561061
return nullptr;
10571062
}
1063+
llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
1064+
if (provide)
1065+
activeProvideSym = name;
10581066
SymbolAssignment *cmd = readSymbolAssignment(name);
10591067
cmd->provide = provide;
10601068
cmd->hidden = hidden;
@@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() {
15701578
tok = unquote(tok);
15711579
else if (!isValidSymbolName(tok))
15721580
setError("malformed number: " + tok);
1573-
script->referencedSymbols.push_back(tok);
1581+
if (activeProvideSym)
1582+
script->provideMap[activeProvideSym.value()].push_back(tok);
1583+
else
1584+
script->referencedSymbols.push_back(tok);
15741585
return [=] { return script->getSymbolValue(tok, location); };
15751586
}
15761587

lld/ELF/SymbolTable.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,3 +333,7 @@ void SymbolTable::scanVersionScript() {
333333
// --dynamic-list.
334334
handleDynamicList();
335335
}
336+
337+
Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
338+
return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
339+
}

lld/ELF/SymbolTable.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ class SymbolTable {
5757

5858
void handleDynamicList();
5959

60+
Symbol *addUnusedUndefined(StringRef name,
61+
uint8_t binding = llvm::ELF::STB_GLOBAL);
62+
6063
// Set of .so files to not link the same shared object file more than once.
6164
llvm::DenseMap<llvm::CachedHashStringRef, SharedFile *> soNames;
6265

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# REQUIRES: x86
2+
3+
# This test verifies that garbage-collection is correctly garbage collecting
4+
# unused sections when the symbol of the unused section is only referred by
5+
# an unused PROVIDE symbol.
6+
7+
# RUN: rm -rf %t && split-file %s %t && cd %t
8+
# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
9+
# RUN: ld.lld -o a_nogc a.o -T script.t
10+
# RUN: llvm-readobj --sections --symbols a_nogc | FileCheck -check-prefix=NOGC %s
11+
# RUN: ld.lld -o a_gc a.o --gc-sections --print-gc-sections -T script.t | FileCheck --check-prefix=GC_LINK %s
12+
# RUN: llvm-readobj --sections --symbols a_gc | FileCheck -check-prefix=GC %s
13+
14+
NOGC: Name: foo
15+
NOGC: Name: used
16+
NOGC: Name: bar
17+
NOGC: Name: baz
18+
NOGC: Name: another_used
19+
NOGC: Name: baz_ref
20+
NOGC-NOT: unused
21+
NOGC-NOT: another_unused
22+
23+
GC_LINK: removing unused section a.o:(.text.bar)
24+
25+
GC: Name: foo
26+
GC: Name: used
27+
GC: Name: baz
28+
GC: Name: another_used
29+
GC: Name: baz_ref
30+
GC-NOT: bar
31+
GC-NOT: unused
32+
GC-NOT: another_unused
33+
34+
#--- a.s
35+
.global _start
36+
_start:
37+
call foo
38+
call used
39+
40+
.section .text.foo,"ax",@progbits
41+
foo:
42+
nop
43+
44+
.section .text.bar,"ax",@progbits
45+
.global bar
46+
bar:
47+
nop
48+
49+
.section .text.baz,"ax",@progbits
50+
.global baz
51+
baz:
52+
nop
53+
54+
55+
#--- script.t
56+
PROVIDE(unused = bar + used);
57+
PROVIDE(used = another_used);
58+
PROVIDE(baz_ref = baz);
59+
PROVIDE(another_used = baz_ref);
60+
PROVIDE(another_unused = unused + bar + 0x1);

lld/test/ELF/linkerscript/symbolreferenced.s

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,31 @@
2121
# RUN: ld.lld -o chain -T chain.t a.o
2222
# RUN: llvm-nm chain | FileCheck %s
2323

24-
# CHECK: 0000000000001000 a f1
25-
# CHECK-NEXT: 0000000000001000 A f2
26-
# CHECK-NEXT: 0000000000001000 a g1
27-
# CHECK-NEXT: 0000000000001000 A g2
28-
# CHECK-NEXT: 0000000000001000 A newsym
24+
# CHECK: 0000000000007000 a f1
25+
# CHECK-NEXT: 0000000000007000 A f2
26+
# CHECK-NEXT: 0000000000007000 A f3
27+
# CHECK-NEXT: 0000000000007000 A f4
28+
# CHECK-NEXT: 0000000000006000 A f5
29+
# CHECK-NEXT: 0000000000003000 A f6
30+
# CHECK-NEXT: 0000000000001000 A f7
31+
# CHECK-NEXT: 0000000000007500 A newsym
32+
# CHECK: 0000000000002000 A u
33+
# CHECK-NEXT: 0000000000002000 A v
34+
# CHECK-NEXT: 0000000000002000 A w
35+
36+
# CHECK-NOT: g1
37+
# CHECK-NOT: g2
38+
# CHECK-NOT: unused
39+
# CHECK-NOT: another_unused
40+
41+
# RUN: ld.lld -o chain_with_cycle -T chain_with_cycle.t a.o
42+
# RUN: llvm-nm chain_with_cycle | FileCheck %s --check-prefix=CHAIN_WITH_CYCLE
43+
44+
# CHAIN_WITH_CYCLE: 000 A f1
45+
# CHAIN_WITH_CYCLE: 000 A f2
46+
# CHAIN_WITH_CYCLE: 000 A f3
47+
# CHAIN_WITH_CYCLE: 000 A f4
48+
# CHAIN_WITH_CYCLE: 000 A newsym
2949

3050
# RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
3151
# ERR-COUNT-3: error: chain2.t:1: symbol not found: undef
@@ -40,13 +60,30 @@ patatino:
4060
movl newsym, %eax
4161

4262
#--- chain.t
43-
PROVIDE(f2 = 0x1000);
63+
PROVIDE(f7 = 0x1000);
64+
PROVIDE(f5 = f6 + 0x3000);
65+
PROVIDE(f6 = f7 + 0x2000);
66+
PROVIDE(f4 = f5 + 0x1000);
67+
PROVIDE(f3 = f4);
68+
PROVIDE(f2 = f3);
4469
PROVIDE_HIDDEN(f1 = f2);
45-
PROVIDE(newsym = f1);
70+
PROVIDE(newsym = f1 + 0x500);
71+
72+
u = v;
73+
PROVIDE(w = 0x2000);
74+
PROVIDE(v = w);
4675

4776
PROVIDE(g2 = 0x1000);
4877
PROVIDE_HIDDEN(g1 = g2);
4978
PROVIDE(unused = g1);
79+
PROVIDE_HIDDEN(another_unused = g1);
80+
81+
#--- chain_with_cycle.t
82+
PROVIDE(f1 = f2 + f3);
83+
PROVIDE(f2 = f3 + f4);
84+
PROVIDE(f3 = f4);
85+
PROVIDE(f4 = f1);
86+
PROVIDE(newsym = f1);
5087

5188
#--- chain2.t
5289
PROVIDE(f2 = undef);

0 commit comments

Comments
 (0)