Skip to content

Commit 9ade4e2

Browse files
[NFCI][LTO][lld] Optimize away symbol copies within LTO global resolution in ELF (#106193)
`StringMap<T>` creates a [copy of the string](https://github.com/llvm/llvm-project/blob/d4c519e7b2ac21350ec08b23eda44bf4a2d3c974/llvm/include/llvm/ADT/StringMapEntry.h#L55-L58) for entry insertions and intentionally keep copies [since the implementation optimizes string memory usage](https://github.com/llvm/llvm-project/blob/d4c519e7b2ac21350ec08b23eda44bf4a2d3c974/llvm/include/llvm/ADT/StringMap.h#L124). On the other hand, linker keeps copies of symbol names [1] in `lld::elf::parseFiles` [2] before invoking `compileBitcodeFiles` [3]. This change proposes to optimize away string copies inside [LTO::GlobalResolutions](https://github.com/llvm/llvm-project/blob/24e791b4164986a1ca7776e3ae0292ef20d20c47/llvm/include/llvm/LTO/LTO.h#L409), which will make LTO indexing more memory efficient for ELF. There are similar opportunities for other (COFF, wasm, MachO) formats. The optimization takes place for lld (ELF) only. For the rest of use cases (gold plugin, `llvm-lto2`, etc), LTO owns a string saver to keep copies and use global resolution key for de-duplication. Together with @kazutakahirata's work to make `ComputeCrossModuleImport` more memory efficient, we see a ~20% peak memory usage reduction in a binary where peak memory usage needs to go down. Thanks to the optimization in 329ba52, the max (as opposed to the sum) of `ComputeCrossModuleImport` or `GlobalResolution` shows up in peak memory usage. * Regarding correctness, the set of [resolved](https://github.com/llvm/llvm-project/blob/80c47ad3aec9d7f22e1b1bdc88960a91b66f89f1/llvm/lib/LTO/LTO.cpp#L739) [per-module symbols](https://github.com/llvm/llvm-project/blob/80c47ad3aec9d7f22e1b1bdc88960a91b66f89f1/llvm/include/llvm/LTO/LTO.h#L188-L191) is a subset of [llvm::lto::InputFile::Symbols](https://github.com/llvm/llvm-project/blob/80c47ad3aec9d7f22e1b1bdc88960a91b66f89f1/llvm/include/llvm/LTO/LTO.h#L120). And bitcode symbol parsing saves symbol name when iterating `obj->symbols` in `BitcodeFile::parse` already. This change updates `BitcodeFile::parseLazy` to keep copies of per-module undefined symbols. * Presumably the undefined symbols in a LTO unit (copied in this patch in linker unique saver) is a small set compared with the set of symbols in global-resolution (copied before this patch), making this a worthwhile trade-off. Benchmarking this change alone shows measurable memory savings across various benchmarks. [1] ELF https://github.com/llvm/llvm-project/blob/1cea5c2138bef3d8fec75508df6dbb858e6e3560/lld/ELF/InputFiles.cpp#L1748 [2] https://github.com/llvm/llvm-project/blob/ef7b18a53c0d186dcda1e322be6035407fdedb55/lld/ELF/Driver.cpp#L2863 [3] https://github.com/llvm/llvm-project/blob/ef7b18a53c0d186dcda1e322be6035407fdedb55/lld/ELF/Driver.cpp#L2995
1 parent 80c47ad commit 9ade4e2

File tree

5 files changed

+52
-6
lines changed

5 files changed

+52
-6
lines changed

lld/ELF/InputFiles.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,6 +1804,10 @@ void BitcodeFile::parseLazy() {
18041804
auto *sym = symtab.insert(unique_saver().save(irSym.getName()));
18051805
sym->resolve(LazySymbol{*this});
18061806
symbols[i] = sym;
1807+
} else {
1808+
// Keep copies of per-module undefined symbols for LTO::GlobalResolutions
1809+
// usage.
1810+
unique_saver().save(irSym.getName());
18071811
}
18081812
}
18091813

lld/ELF/LTO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ static lto::Config createConfig() {
135135
config->ltoValidateAllVtablesHaveTypeInfos;
136136
c.AllVtablesHaveTypeInfos = ctx.ltoAllVtablesHaveTypeInfos;
137137
c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty();
138+
c.KeepSymbolNameCopies = false;
138139

139140
for (const llvm::StringRef &name : config->thinLTOModulesToCompile)
140141
c.ThinLTOModulesToCompile.emplace_back(name);

llvm/include/llvm/LTO/Config.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ struct Config {
8888
/// want to know a priori all possible output files.
8989
bool AlwaysEmitRegularLTOObj = false;
9090

91+
/// If true, the LTO instance creates copies of the symbol names for LTO::run.
92+
/// The lld linker uses string saver to keep symbol names alive and doesn't
93+
/// need to create copies, so it can set this field to false.
94+
bool KeepSymbolNameCopies = true;
95+
9196
/// Allows non-imported definitions to get the potentially more constraining
9297
/// visibility from the prevailing definition. FromPrevailing is the default
9398
/// because it works for many binary formats. ELF can use the more optimized

llvm/include/llvm/LTO/LTO.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#ifndef LLVM_LTO_LTO_H
1616
#define LLVM_LTO_LTO_H
1717

18+
#include <memory>
19+
20+
#include "llvm/ADT/DenseMap.h"
1821
#include "llvm/ADT/MapVector.h"
1922
#include "llvm/ADT/StringMap.h"
2023
#include "llvm/Bitcode/BitcodeReader.h"
@@ -23,6 +26,7 @@
2326
#include "llvm/Object/IRSymtab.h"
2427
#include "llvm/Support/Caching.h"
2528
#include "llvm/Support/Error.h"
29+
#include "llvm/Support/StringSaver.h"
2630
#include "llvm/Support/thread.h"
2731
#include "llvm/Transforms/IPO/FunctionAttrs.h"
2832
#include "llvm/Transforms/IPO/FunctionImport.h"
@@ -403,10 +407,19 @@ class LTO {
403407
};
404408
};
405409

410+
// GlobalResolutionSymbolSaver allocator.
411+
std::unique_ptr<llvm::BumpPtrAllocator> Alloc;
412+
413+
// Symbol saver for global resolution map.
414+
std::unique_ptr<llvm::StringSaver> GlobalResolutionSymbolSaver;
415+
406416
// Global mapping from mangled symbol names to resolutions.
407-
// Make this an optional to guard against accessing after it has been reset
417+
// Make this an unique_ptr to guard against accessing after it has been reset
408418
// (to reduce memory after we're done with it).
409-
std::optional<StringMap<GlobalResolution>> GlobalResolutions;
419+
std::unique_ptr<llvm::DenseMap<StringRef, GlobalResolution>>
420+
GlobalResolutions;
421+
422+
void releaseGlobalResolutionsMemory();
410423

411424
void addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
412425
ArrayRef<SymbolResolution> Res, unsigned Partition,

llvm/lib/LTO/LTO.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ cl::opt<bool> EnableLTOInternalization(
7777
"enable-lto-internalization", cl::init(true), cl::Hidden,
7878
cl::desc("Enable global value internalization in LTO"));
7979

80+
static cl::opt<bool>
81+
LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(false), cl::Hidden,
82+
cl::desc("Keep copies of symbols in LTO indexing"));
83+
8084
/// Indicate we are linking with an allocator that supports hot/cold operator
8185
/// new interfaces.
8286
extern cl::opt<bool> SupportsHotColdNew;
@@ -587,8 +591,14 @@ LTO::LTO(Config Conf, ThinBackend Backend,
587591
: Conf(std::move(Conf)),
588592
RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
589593
ThinLTO(std::move(Backend)),
590-
GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()),
591-
LTOMode(LTOMode) {}
594+
GlobalResolutions(
595+
std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
596+
LTOMode(LTOMode) {
597+
if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) {
598+
Alloc = std::make_unique<BumpPtrAllocator>();
599+
GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(*Alloc);
600+
}
601+
}
592602

593603
// Requires a destructor for MapVector<BitcodeModule>.
594604
LTO::~LTO() = default;
@@ -606,7 +616,12 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
606616
assert(ResI != ResE);
607617
SymbolResolution Res = *ResI++;
608618

609-
auto &GlobalRes = (*GlobalResolutions)[Sym.getName()];
619+
StringRef SymbolName = Sym.getName();
620+
// Keep copies of symbols if the client of LTO says so.
621+
if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(SymbolName))
622+
SymbolName = GlobalResolutionSymbolSaver->save(SymbolName);
623+
624+
auto &GlobalRes = (*GlobalResolutions)[SymbolName];
610625
GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
611626
if (Res.Prevailing) {
612627
assert(!GlobalRes.Prevailing &&
@@ -660,6 +675,14 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
660675
}
661676
}
662677

678+
void LTO::releaseGlobalResolutionsMemory() {
679+
// Release GlobalResolutions dense-map itself.
680+
GlobalResolutions.reset();
681+
// Release the string saver memory.
682+
GlobalResolutionSymbolSaver.reset();
683+
Alloc.reset();
684+
}
685+
663686
static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
664687
ArrayRef<SymbolResolution> Res) {
665688
StringRef Path = Input->getName();
@@ -1771,7 +1794,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
17711794
// are no further accesses. We specifically want to do this before computing
17721795
// cross module importing, which adds to peak memory via the computed import
17731796
// and export lists.
1774-
GlobalResolutions.reset();
1797+
releaseGlobalResolutionsMemory();
17751798

17761799
if (Conf.OptLevel > 0)
17771800
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,

0 commit comments

Comments
 (0)