Skip to content

Commit 4254840

Browse files
committed
Speed up --start-lib and --end-lib.
--{start,end}-lib give files grouped by the options the archive file semantics. That is, each object file between them acts as if it were in an archive file whose sole member is the file. Therefore, files between --{start,end}-lib are linked to the final output only if they are needed to resolve some undefined symbols. Previously, the feature was implemented this way: 1. We read a symbol table and insert defined symbols to the symbol table as lazy symbols. 2. If an undefind symbol is resolved to a lazy symbol, that lazy symbol instantiate ObjFile class for that symbol, which re-insert all defined symbols to the symbol table. So, if an ObjFile is instantiated, defined symbols are inserted to the symbol table twice. Since inserting long symbol names is not cheap, there's a room to optimize here. This patch optimzies it. Now, LazyObjFile remembers symbol handles and passed them over to a new ObjFile instance, so that the ObjFile doesn't insert the same strings. Here is a quick benchmark to link clang. "Original" is the original lld with unmodified command line options. For "Case 1" and "Case 2", I extracted all files from archive files and replace .a's in a command line with .o's wrapped with --{start,end}-lib. I used the original lld for Case 1" and use this patch for Case 2. Original: 5.892 Case 1: 6.001 (+1.8%) Case 2: 5.701 (-3.2%) So, interestingly, --{start,end}-lib are now faster than the regular linking scheme with archive files. That's perhaps not too surprising, though, because for regular archive files, we look up the symbol table with the same string twice. Differential Revision: https://reviews.llvm.org/D62188 llvm-svn: 361473
1 parent 77b4f0a commit 4254840

File tree

3 files changed

+108
-59
lines changed

3 files changed

+108
-59
lines changed

lld/ELF/InputFiles.cpp

Lines changed: 105 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -913,62 +913,91 @@ StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
913913
return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this);
914914
}
915915

916+
// Initialize this->Symbols. this->Symbols is a parallel array as
917+
// its corresponding ELF symbol table.
916918
template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
917-
this->Symbols.reserve(this->getELFSyms<ELFT>().size());
918-
for (const Elf_Sym &Sym : this->getELFSyms<ELFT>())
919-
this->Symbols.push_back(createSymbol(&Sym));
920-
}
921-
922-
template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
923-
uint32_t SecIdx = getSectionIndex(*Sym);
924-
if (SecIdx >= this->Sections.size())
925-
fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
926-
927-
InputSectionBase *Sec = this->Sections[SecIdx];
928-
uint8_t Binding = Sym->getBinding();
929-
uint8_t StOther = Sym->st_other;
930-
uint8_t Type = Sym->getType();
931-
uint64_t Value = Sym->st_value;
932-
uint64_t Size = Sym->st_size;
933-
934-
if (Binding == STB_LOCAL) {
935-
if (Sym->getType() == STT_FILE)
936-
SourceFile = CHECK(Sym->getName(this->StringTable), this);
937-
938-
if (this->StringTable.size() <= Sym->st_name)
939-
fatal(toString(this) + ": invalid symbol name offset");
940-
941-
StringRefZ Name = this->StringTable.data() + Sym->st_name;
942-
if (Sym->st_shndx == SHN_UNDEF)
943-
return make<Undefined>(this, Name, Binding, StOther, Type);
944-
return make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
945-
}
919+
ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>();
920+
this->Symbols.resize(ESyms.size());
921+
922+
// Our symbol table may have already been partially initialized
923+
// because of LazyObjFile.
924+
for (size_t I = 0, End = ESyms.size(); I != End; ++I)
925+
if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL)
926+
this->Symbols[I] =
927+
Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this));
928+
929+
// Fill this->Symbols. A symbol is either local or global.
930+
for (size_t I = 0, End = ESyms.size(); I != End; ++I) {
931+
const Elf_Sym &ESym = ESyms[I];
932+
933+
// Read symbol attributes.
934+
uint32_t SecIdx = getSectionIndex(ESym);
935+
if (SecIdx >= this->Sections.size())
936+
fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
937+
938+
InputSectionBase *Sec = this->Sections[SecIdx];
939+
uint8_t Binding = ESym.getBinding();
940+
uint8_t StOther = ESym.st_other;
941+
uint8_t Type = ESym.getType();
942+
uint64_t Value = ESym.st_value;
943+
uint64_t Size = ESym.st_size;
944+
StringRefZ Name = this->StringTable.data() + ESym.st_name;
945+
946+
// Handle local symbols. Local symbols are not added to the symbol
947+
// table because they are not visible from other object files. We
948+
// allocate symbol instances and add their pointers to Symbols.
949+
if (Binding == STB_LOCAL) {
950+
if (ESym.getType() == STT_FILE)
951+
SourceFile = CHECK(ESym.getName(this->StringTable), this);
952+
953+
if (this->StringTable.size() <= ESym.st_name)
954+
fatal(toString(this) + ": invalid symbol name offset");
955+
956+
if (ESym.st_shndx == SHN_UNDEF)
957+
this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
958+
else
959+
this->Symbols[I] =
960+
make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
961+
continue;
962+
}
946963

947-
StringRef Name = CHECK(Sym->getName(this->StringTable), this);
964+
// Handle global undefined symbols.
965+
if (ESym.st_shndx == SHN_UNDEF) {
966+
resolveSymbol(this->Symbols[I],
967+
Undefined{this, Name, Binding, StOther, Type});
968+
continue;
969+
}
948970

949-
if (Sym->st_shndx == SHN_UNDEF)
950-
return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type});
971+
// Handle global common symbols.
972+
if (ESym.st_shndx == SHN_COMMON) {
973+
if (Value == 0 || Value >= UINT32_MAX)
974+
fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
975+
"' has invalid alignment: " + Twine(Value));
976+
resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther,
977+
Type, Value, Size});
978+
continue;
979+
}
951980

952-
if (Sec == &InputSection::Discarded)
953-
return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type,
954-
/*DiscardedSecIdx=*/SecIdx});
981+
// If a defined symbol is in a discarded section, handle it as if it
982+
// were an undefined symbol. Such symbol doesn't comply with the
983+
// standard, but in practice, a .eh_frame often directly refer
984+
// COMDAT member sections, and if a comdat group is discarded, some
985+
// defined symbol in a .eh_frame becomes dangling symbols.
986+
if (Sec == &InputSection::Discarded) {
987+
resolveSymbol(this->Symbols[I],
988+
Undefined{this, Name, Binding, StOther, Type, SecIdx});
989+
continue;
990+
}
955991

956-
if (Sym->st_shndx == SHN_COMMON) {
957-
if (Value == 0 || Value >= UINT32_MAX)
958-
fatal(toString(this) + ": common symbol '" + Name +
959-
"' has invalid alignment: " + Twine(Value));
960-
return Symtab->addSymbol(
961-
CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
962-
}
992+
// Handle global defined symbols.
993+
if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
994+
Binding == STB_GNU_UNIQUE) {
995+
resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther,
996+
Type, Value, Size, Sec});
997+
continue;
998+
}
963999

964-
switch (Binding) {
965-
default:
9661000
fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding));
967-
case STB_GLOBAL:
968-
case STB_WEAK:
969-
case STB_GNU_UNIQUE:
970-
return Symtab->addSymbol(
971-
Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
9721001
}
9731002
}
9741003

@@ -1455,10 +1484,16 @@ InputFile *LazyObjFile::fetch() {
14551484

14561485
InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
14571486
File->GroupId = GroupId;
1487+
1488+
// Copy symbol vector so that the new InputFile doesn't have to
1489+
// insert the same defined symbols to the symbol table again.
1490+
File->Symbols = std::move(Symbols);
14581491
return File;
14591492
}
14601493

14611494
template <class ELFT> void LazyObjFile::parse() {
1495+
using Elf_Sym = typename ELFT::Sym;
1496+
14621497
// A lazy object file wraps either a bitcode file or an ELF file.
14631498
if (isBitcode(this->MB)) {
14641499
std::unique_ptr<lto::InputFile> Obj =
@@ -1476,23 +1511,36 @@ template <class ELFT> void LazyObjFile::parse() {
14761511
return;
14771512
}
14781513

1514+
// Find a symbol table.
14791515
ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
14801516
ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
14811517

14821518
for (const typename ELFT::Shdr &Sec : Sections) {
14831519
if (Sec.sh_type != SHT_SYMTAB)
14841520
continue;
14851521

1486-
typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this);
1522+
// A symbol table is found.
1523+
ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this);
14871524
uint32_t FirstGlobal = Sec.sh_info;
1488-
StringRef StringTable =
1489-
CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
1490-
1491-
for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal)) {
1492-
if (Sym.st_shndx == SHN_UNDEF)
1525+
StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
1526+
this->Symbols.resize(ESyms.size());
1527+
1528+
// Get existing symbols or insert placeholder symbols.
1529+
for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I)
1530+
if (ESyms[I].st_shndx != SHN_UNDEF)
1531+
this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this));
1532+
1533+
// Replace existing symbols with LazyObject symbols.
1534+
//
1535+
// resolveSymbol() may trigger this->fetch() if an existing symbol
1536+
// is an undefined symbol. If that happens, this LazyObjFile has
1537+
// served its purpose, and we can exit from the loop early.
1538+
for (Symbol *Sym : this->Symbols) {
1539+
if (!Sym)
14931540
continue;
1494-
Symtab->addSymbol(
1495-
LazyObject{*this, CHECK(Sym.getName(StringTable), this)});
1541+
resolveSymbol(Sym, LazyObject{*this, Sym->getName()});
1542+
if (AddedToLink)
1543+
return;
14961544
}
14971545
return;
14981546
}

lld/ELF/InputFiles.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,11 @@ class InputFile {
139139
// Index of MIPS GOT built for this file.
140140
llvm::Optional<size_t> MipsGotIndex;
141141

142+
std::vector<Symbol *> Symbols;
143+
142144
protected:
143145
InputFile(Kind K, MemoryBufferRef M);
144146
std::vector<InputSectionBase *> Sections;
145-
std::vector<Symbol *> Symbols;
146147

147148
private:
148149
const Kind FileKind;
@@ -255,7 +256,6 @@ template <class ELFT> class ObjFile : public ELFFileBase {
255256
StringRef getSectionName(const Elf_Shdr &Sec);
256257

257258
bool shouldMerge(const Elf_Shdr &Sec);
258-
Symbol *createSymbol(const Elf_Sym *Sym);
259259

260260
// Each ELF symbol contains a section index which the symbol belongs to.
261261
// However, because the number of bits dedicated for that is limited, a

lld/ELF/SymbolTable.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ Symbol *SymbolTable::insert(StringRef Name) {
9292
Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
9393
SymVector.push_back(Sym);
9494

95+
Sym->setName(Name);
9596
Sym->SymbolKind = Symbol::PlaceholderKind;
9697
Sym->VersionId = Config->DefaultSymbolVersion;
9798
Sym->Visibility = STV_DEFAULT;

0 commit comments

Comments
 (0)