Skip to content

[lld-macho] Reduce memory usage of printing thunks in map file #122785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 27 additions & 26 deletions lld/MachO/MapFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,20 +161,6 @@ static uint64_t getSymSizeForMap(Defined *sym) {
return sym->size;
}

// Merges two vectors of input sections in order of their outSecOff values.
// This approach creates a new (temporary) vector which is not ideal but the
// ideal approach leads to a lot of code duplication.
static std::vector<ConcatInputSection *>
mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1,
ArrayRef<ConcatInputSection *> inputs2) {
std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size());
std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(),
vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) {
return a->outSecOff < b->outSecOff;
});
return vec;
}

void macho::writeMapFile() {
if (config->mapFile.empty())
return;
Expand Down Expand Up @@ -217,15 +203,32 @@ void macho::writeMapFile() {
seg->name.str().c_str(), osec->name.str().c_str());
}

// Shared function to print an array of symbols.
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
for (const ConcatInputSection *isec : arr) {
for (Defined *sym : isec->symbols) {
if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
getSymSizeForMap(sym),
readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
// Helper lambda that prints all symbols from one ConcatInputSection.
auto printOne = [&](const ConcatInputSection *isec) {
for (Defined *sym : isec->symbols) {
if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0)) {
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
getSymSizeForMap(sym),
readerToFileOrdinal.lookup(sym->getFile()),
sym->getName().str().data());
}
}
};
// Shared function to print one or two arrays of ConcatInputSection in
// ascending outSecOff order. The second array is optional; if provided, we
// interleave the printing in sorted order without allocating a merged temp
// array.
auto printIsecArrSyms = [&](ArrayRef<ConcatInputSection *> arr1,
ArrayRef<ConcatInputSection *> arr2 = {}) {
// Print both arrays in sorted order, interleaving as necessary.
while (!arr1.empty() || !arr2.empty()) {
if (!arr1.empty() && (arr2.empty() || arr1.front()->outSecOff <=
arr2.front()->outSecOff)) {
printOne(arr1.front());
arr1 = arr1.drop_front();
} else if (!arr2.empty()) {
printOne(arr2.front());
arr2 = arr2.drop_front();
}
}
};
Expand All @@ -235,9 +238,7 @@ void macho::writeMapFile() {
for (const OutputSegment *seg : outputSegments) {
for (const OutputSection *osec : seg->getSections()) {
if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
auto inputsAndThunks =
mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
printIsecArrSyms(inputsAndThunks);
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
Expand Down
15 changes: 7 additions & 8 deletions lld/test/MachO/arm64-thunks.s
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,7 @@
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s

## Check that the thunks appear in the map file and that everything is sorted by address
# Because of the `.space` instructions, there will end up being a lot of dead symbols in the
# linker map (linker map will be ~2.7GB). So to avoid the test trying to (slowly) match regex
# across all the ~2.7GB of the linker map - generate a version of the linker map without dead symbols.
# RUN: awk '/# Dead Stripped Symbols:/ {exit} {print}' %t/thunk.map > %t/thunk_no_dead_syms.map

# RUN: FileCheck %s --input-file %t/thunk_no_dead_syms.map --check-prefix=MAP
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP

# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
Expand Down Expand Up @@ -339,7 +333,12 @@ _main:
ret

.section __TEXT,__cstring
.space 0x4000000
# The .space below has to be composed of non-zero characters. Otherwise, the
# linker will create a symbol for every '0' in the section, leading to
# dramatic memory usage and a huge linker map file
.space 0x4000000, 'A'
.byte 0


.section __TEXT,__lcxx_override,regular,pure_instructions

Expand Down
Loading