Skip to content

Commit 64498c5

Browse files
[LTO][ELF][lld] Use unique string saver in ELF bitcode symbol parsing (#106670)
lld ELF [BitcodeFile](https://github.com/llvm/llvm-project/blob/a527248a3c2d638b0c92a06992f3f1c1f80842ad/lld/ELF/InputFiles.h#L328) uses [string saver](https://github.com/llvm/llvm-project/blob/a527248a3c2d638b0c92a06992f3f1c1f80842ad/lld/include/lld/Common/CommonLinkerContext.h#L57) to keep copies of bitcode symbols. Symbol duplication is very common when compiling application binaries. This change proposes to introduce a UniqueStringSaver in lld context and use it for bitcode symbol parsing. The implementation covers ELF only. Similar opportunities should exist on other (COFF, MachO, wasm) formats. For an internal production binary where lto indexing takes ~10GiB originally, this changes optimizes away ~800MiB (~7.8%), measured by https://github.com/google/pprof. Flame graph breaks down memory by usage call stacks and agrees with this measurement.
1 parent f00c946 commit 64498c5

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

lld/ELF/InputFiles.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,8 +1744,10 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
17441744
uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
17451745
uint8_t visibility = mapVisibility(objSym.getVisibility());
17461746

1747+
// Symbols can be duplicated in bitcode files because of '#include' and
1748+
// linkonce_odr. Use unique_saver to save symbol names for de-duplication.
17471749
if (!sym)
1748-
sym = symtab.insert(saver().save(objSym.getName()));
1750+
sym = symtab.insert(unique_saver().save(objSym.getName()));
17491751

17501752
int c = objSym.getComdatIndex();
17511753
if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
@@ -1797,7 +1799,9 @@ void BitcodeFile::parseLazy() {
17971799
symbols = std::make_unique<Symbol *[]>(numSymbols);
17981800
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
17991801
if (!irSym.isUndefined()) {
1800-
auto *sym = symtab.insert(saver().save(irSym.getName()));
1802+
// Symbols can be duplicated in bitcode files because of '#include' and
1803+
// linkonce_odr. Use unique_saver to save symbol names for de-duplication.
1804+
auto *sym = symtab.insert(unique_saver().save(irSym.getName()));
18011805
sym->resolve(LazySymbol{*this});
18021806
symbols[i] = sym;
18031807
}

lld/include/lld/Common/CommonLinkerContext.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class CommonLinkerContext {
3838

3939
llvm::BumpPtrAllocator bAlloc;
4040
llvm::StringSaver saver{bAlloc};
41+
llvm::UniqueStringSaver unique_saver{bAlloc};
4142
llvm::DenseMap<void *, SpecificAllocBase *> instances;
4243

4344
ErrorHandler e;
@@ -54,8 +55,13 @@ template <typename T = CommonLinkerContext> T &context() {
5455

5556
bool hasContext();
5657

57-
inline llvm::StringSaver &saver() { return context().saver; }
5858
inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
59+
inline llvm::StringSaver &saver() { return context().saver; }
60+
inline llvm::UniqueStringSaver &unique_saver() {
61+
// FIXME: Look into other places where duplications are common in saved
62+
// strings and unique saver make sense.
63+
return context().unique_saver;
64+
}
5965
} // namespace lld
6066

6167
#endif

0 commit comments

Comments
 (0)