Skip to content

Commit 95d21f6

Browse files
authored
[lld-macho] Reduce memory usage of printing thunks in map file (#122785)
This commit improves the memory efficiency of the lld-macho linker by optimizing how thunks are printed in the map file. Previously, merging vectors of input sections required creating a temporary vector, which increased memory usage and in some cases caused the linker to run out of memory as reported in comments on #120496. The new approach interleaves the printing of two arrays of ConcatInputSection in sorted order without allocating additional memory for a merged array.
1 parent f9a8006 commit 95d21f6

File tree

2 files changed

+34
-34
lines changed

2 files changed

+34
-34
lines changed

lld/MachO/MapFile.cpp

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -161,20 +161,6 @@ static uint64_t getSymSizeForMap(Defined *sym) {
161161
return sym->size;
162162
}
163163

164-
// Merges two vectors of input sections in order of their outSecOff values.
165-
// This approach creates a new (temporary) vector which is not ideal but the
166-
// ideal approach leads to a lot of code duplication.
167-
static std::vector<ConcatInputSection *>
168-
mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1,
169-
ArrayRef<ConcatInputSection *> inputs2) {
170-
std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size());
171-
std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(),
172-
vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) {
173-
return a->outSecOff < b->outSecOff;
174-
});
175-
return vec;
176-
}
177-
178164
void macho::writeMapFile() {
179165
if (config->mapFile.empty())
180166
return;
@@ -217,15 +203,32 @@ void macho::writeMapFile() {
217203
seg->name.str().c_str(), osec->name.str().c_str());
218204
}
219205

220-
// Shared function to print an array of symbols.
221-
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
222-
for (const ConcatInputSection *isec : arr) {
223-
for (Defined *sym : isec->symbols) {
224-
if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
225-
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
226-
getSymSizeForMap(sym),
227-
readerToFileOrdinal[sym->getFile()],
228-
sym->getName().str().data());
206+
// Helper lambda that prints all symbols from one ConcatInputSection.
207+
auto printOne = [&](const ConcatInputSection *isec) {
208+
for (Defined *sym : isec->symbols) {
209+
if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0)) {
210+
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
211+
getSymSizeForMap(sym),
212+
readerToFileOrdinal.lookup(sym->getFile()),
213+
sym->getName().str().data());
214+
}
215+
}
216+
};
217+
// Shared function to print one or two arrays of ConcatInputSection in
218+
// ascending outSecOff order. The second array is optional; if provided, we
219+
// interleave the printing in sorted order without allocating a merged temp
220+
// array.
221+
auto printIsecArrSyms = [&](ArrayRef<ConcatInputSection *> arr1,
222+
ArrayRef<ConcatInputSection *> arr2 = {}) {
223+
// Print both arrays in sorted order, interleaving as necessary.
224+
while (!arr1.empty() || !arr2.empty()) {
225+
if (!arr1.empty() && (arr2.empty() || arr1.front()->outSecOff <=
226+
arr2.front()->outSecOff)) {
227+
printOne(arr1.front());
228+
arr1 = arr1.drop_front();
229+
} else if (!arr2.empty()) {
230+
printOne(arr2.front());
231+
arr2 = arr2.drop_front();
229232
}
230233
}
231234
};
@@ -235,9 +238,7 @@ void macho::writeMapFile() {
235238
for (const OutputSegment *seg : outputSegments) {
236239
for (const OutputSection *osec : seg->getSections()) {
237240
if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
238-
auto inputsAndThunks =
239-
mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
240-
printIsecArrSyms(inputsAndThunks);
241+
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
241242
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
242243
printIsecArrSyms(concatOsec->inputs);
243244
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {

lld/test/MachO/arm64-thunks.s

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,7 @@
1717
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
1818
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
1919

20-
## Check that the thunks appear in the map file and that everything is sorted by address
21-
# Because of the `.space` instructions, there will end up being a lot of dead symbols in the
22-
# linker map (linker map will be ~2.7GB). So to avoid the test trying to (slowly) match regex
23-
# across all the ~2.7GB of the linker map - generate a version of the linker map without dead symbols.
24-
# RUN: awk '/# Dead Stripped Symbols:/ {exit} {print}' %t/thunk.map > %t/thunk_no_dead_syms.map
25-
26-
# RUN: FileCheck %s --input-file %t/thunk_no_dead_syms.map --check-prefix=MAP
20+
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
2721

2822
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
2923
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
@@ -339,7 +333,12 @@ _main:
339333
ret
340334

341335
.section __TEXT,__cstring
342-
.space 0x4000000
336+
# The .space below has to be composed of non-zero characters. Otherwise, the
337+
# linker will create a symbol for every '0' in the section, leading to
338+
# dramatic memory usage and a huge linker map file
339+
.space 0x4000000, 'A'
340+
.byte 0
341+
343342

344343
.section __TEXT,__lcxx_override,regular,pure_instructions
345344

0 commit comments

Comments
 (0)