Skip to content

Commit 428a7c1

Browse files
committed
[lld-macho] Have ICF operate on all sections at once
ICF previously operated only within a given OutputSection. We would merge all CFStrings first, then merge all regular code sections in a second phase. This worked fine since CFStrings would never reference regular `__text` sections. However, I would like to expand ICF to merge functions that reference unwind info. Unwind info references the LSDA section, which can in turn reference the `__text` section, so we cannot perform ICF in phases. In order to have ICF operate on InputSections spanning multiple OutputSections, we need a way to distinguish InputSections that are destined for different OutputSections, so that we don't fold across section boundaries. We achieve this by creating OutputSections early, and setting `InputSection::parent` to point to them. This is what LLD-ELF does. (This change should also make it easier to implement the `section$start$` symbols.) This diff also folds InputSections w/o checking their flags, which I think is the right behavior -- if they are destined for the same OutputSection, they will have the same flags in the output (even if their input flags differ). I.e. the `parent` pointer check subsumes the `flags` check. In practice this has nearly no effect (ICF did not become any more effective on chromium_framework). I've also updated ICF.cpp's block comment to better reflect its current status. Reviewed By: #lld-macho, smeenai Differential Revision: https://reviews.llvm.org/D105641
1 parent 182ba8a commit 428a7c1

File tree

11 files changed

+99
-56
lines changed

11 files changed

+99
-56
lines changed

lld/MachO/ConcatOutputSection.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ using namespace llvm::MachO;
2424
using namespace lld;
2525
using namespace lld::macho;
2626

27+
MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections;
28+
2729
void ConcatOutputSection::addInput(ConcatInputSection *input) {
30+
assert(input->parent == this);
2831
if (inputs.empty()) {
2932
align = input->align;
3033
flags = input->getFlags();
@@ -33,7 +36,6 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
3336
finalizeFlags(input);
3437
}
3538
inputs.push_back(input);
36-
input->parent = this;
3739
}
3840

3941
// Branch-range extension can be implemented in two ways, either through ...
@@ -357,3 +359,22 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) {
357359
break;
358360
}
359361
}
362+
363+
ConcatOutputSection *
364+
ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
365+
NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
366+
ConcatOutputSection *&osec = concatOutputSections[names];
367+
if (!osec)
368+
osec = make<ConcatOutputSection>(names.second);
369+
return osec;
370+
}
371+
372+
NamePair macho::maybeRenameSection(NamePair key) {
373+
auto newNames = config->sectionRenameMap.find(key);
374+
if (newNames != config->sectionRenameMap.end())
375+
return newNames->second;
376+
auto newName = config->segmentRenameMap.find(key.first);
377+
if (newName != config->segmentRenameMap.end())
378+
return std::make_pair(newName->second, key.second);
379+
return key;
380+
}

lld/MachO/ConcatOutputSection.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class ConcatOutputSection final : public OutputSection {
3131

3232
const ConcatInputSection *firstSection() const { return inputs.front(); }
3333
const ConcatInputSection *lastSection() const { return inputs.back(); }
34+
bool isNeeded() const override { return !inputs.empty(); }
3435

3536
// These accessors will only be valid after finalizing the section
3637
uint64_t getSize() const override { return size; }
@@ -50,6 +51,8 @@ class ConcatOutputSection final : public OutputSection {
5051
return sec->kind() == ConcatKind;
5152
}
5253

54+
static ConcatOutputSection *getOrCreateForInput(const InputSection *);
55+
5356
private:
5457
void finalizeFlags(InputSection *input);
5558

@@ -79,6 +82,12 @@ struct ThunkInfo {
7982
uint8_t sequence = 0; // how many thunks created so-far?
8083
};
8184

85+
NamePair maybeRenameSection(NamePair key);
86+
87+
// Output sections are added to output segments in iteration order
88+
// of ConcatOutputSection, so must have deterministic iteration order.
89+
extern llvm::MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
90+
8291
extern llvm::DenseMap<Symbol *, ThunkInfo> thunkMap;
8392

8493
} // namespace macho

lld/MachO/Driver.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ static void compileBitcodeFiles() {
577577
// any CommonSymbols.
578578
static void replaceCommonSymbols() {
579579
TimeTraceScope timeScope("Replace common symbols");
580+
ConcatOutputSection *osec = nullptr;
580581
for (Symbol *sym : symtab->getSymbols()) {
581582
auto *common = dyn_cast<CommonSymbol>(sym);
582583
if (common == nullptr)
@@ -589,6 +590,9 @@ static void replaceCommonSymbols() {
589590
auto *isec = make<ConcatInputSection>(
590591
segment_names::data, section_names::common, common->getFile(), data,
591592
common->align, S_ZEROFILL);
593+
if (!osec)
594+
osec = ConcatOutputSection::getOrCreateForInput(isec);
595+
isec->parent = osec;
592596
inputSections.push_back(isec);
593597

594598
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
@@ -1037,6 +1041,7 @@ static void gatherInputSections() {
10371041
int inputOrder = 0;
10381042
for (const InputFile *file : inputFiles) {
10391043
for (const SubsectionMap &map : file->subsections) {
1044+
ConcatOutputSection *osec = nullptr;
10401045
for (const SubsectionEntry &entry : map) {
10411046
if (auto *isec = dyn_cast<ConcatInputSection>(entry.isec)) {
10421047
if (isec->isCoalescedWeak())
@@ -1047,6 +1052,9 @@ static void gatherInputSections() {
10471052
continue;
10481053
}
10491054
isec->outSecOff = inputOrder++;
1055+
if (!osec)
1056+
osec = ConcatOutputSection::getOrCreateForInput(isec);
1057+
isec->parent = osec;
10501058
inputSections.push_back(isec);
10511059
} else if (auto *isec = dyn_cast<CStringInputSection>(entry.isec)) {
10521060
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)

lld/MachO/ICF.cpp

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -52,22 +52,18 @@ ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
5252
//
5353
// Summary of segments & sections:
5454
//
55-
// Since folding never occurs across output-section boundaries,
56-
// ConcatOutputSection is the natural input for ICF.
57-
//
5855
// The __TEXT segment is readonly at the MMU. Some sections are already
5956
// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
6057
// synthetic and inherently free of duplicates (__TEXT,__stubs &
61-
// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
62-
// could work on __TEXT,__concat, but doing so induces many test failures.
58+
// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
59+
// because doing so induces many test failures.
6360
//
6461
// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
6562
// thus ineligible for ICF.
6663
//
6764
// The __DATA_CONST segment is read/write at the MMU, but is logically const to
68-
// the application after dyld applies fixups to pointer data. Some sections are
69-
// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
70-
// (__DATA_CONST,__got). There are no ICF opportunities here.
65+
// the application after dyld applies fixups to pointer data. We currently
66+
// fold only the __DATA_CONST,__cfstring section.
7167
//
7268
// The __DATA segment is read/write at the MMU, and as application-writeable
7369
// data, none of its sections are eligible for ICF.
@@ -84,12 +80,13 @@ static std::atomic<bool> icfRepeat{false};
8480
// Compare everything except the relocation referents
8581
static bool equalsConstant(const ConcatInputSection *ia,
8682
const ConcatInputSection *ib) {
83+
// We can only fold within the same OutputSection.
84+
if (ia->parent != ib->parent)
85+
return false;
8786
if (ia->data.size() != ib->data.size())
8887
return false;
8988
if (ia->data != ib->data)
9089
return false;
91-
if (ia->getFlags() != ib->getFlags())
92-
return false;
9390
if (ia->relocs.size() != ib->relocs.size())
9491
return false;
9592
auto f = [&](const Reloc &ra, const Reloc &rb) {
@@ -323,8 +320,6 @@ void macho::foldIdenticalSections() {
323320
// relocs to find every referenced InputSection, but that precludes easy
324321
// parallelization. Therefore, we hash every InputSection here where we have
325322
// them all accessible as simple vectors.
326-
std::vector<ConcatInputSection *> codeSections;
327-
std::vector<ConcatInputSection *> cfStringSections;
328323

329324
// ICF can't fold functions with unwind info
330325
DenseSet<const InputSection *> functionsWithUnwindInfo =
@@ -338,32 +333,22 @@ void macho::foldIdenticalSections() {
338333
// coexist with equivalence-class IDs, this is not necessary, but might help
339334
// someone keep the numbers straight in case we ever need to debug the
340335
// ICF::segregate()
336+
std::vector<ConcatInputSection *> hashable;
341337
uint64_t icfUniqueID = inputSections.size();
342338
for (ConcatInputSection *isec : inputSections) {
339+
// FIXME: consider non-code __text sections as hashable?
343340
bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
344341
!isec->shouldOmitFromOutput() &&
345342
!functionsWithUnwindInfo.contains(isec) &&
346343
isec->isHashableForICF();
347-
if (isHashable) {
348-
if (isCodeSection(isec))
349-
codeSections.push_back(isec);
350-
else {
351-
assert(isCfStringSection(isec));
352-
cfStringSections.push_back(isec);
353-
}
354-
} else {
344+
if (isHashable)
345+
hashable.push_back(isec);
346+
else
355347
isec->icfEqClass[0] = ++icfUniqueID;
356-
}
357348
}
358-
std::vector<ConcatInputSection *> hashable(codeSections);
359-
hashable.insert(hashable.end(), cfStringSections.begin(),
360-
cfStringSections.end());
361349
parallelForEach(hashable,
362350
[](ConcatInputSection *isec) { isec->hashForICF(); });
363351
// Now that every input section is either hashed or marked as unique, run the
364352
// segregation algorithm to detect foldable subsections.
365-
// We dedup cfStringSections first since code sections may refer to them, but
366-
// not vice-versa.
367-
ICF(cfStringSections).run();
368-
ICF(codeSections).run();
353+
ICF(hashable).run();
369354
}

lld/MachO/InputSection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "InputSection.h"
10+
#include "ConcatOutputSection.h"
1011
#include "Config.h"
1112
#include "InputFiles.h"
1213
#include "OutputSegment.h"

lld/MachO/SyntheticSections.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "OutputSegment.h"
1616
#include "SymbolTable.h"
1717
#include "Symbols.h"
18-
#include "Writer.h"
1918

2019
#include "lld/Common/ErrorHandler.h"
2120
#include "lld/Common/Memory.h"
@@ -596,6 +595,8 @@ void StubHelperSection::setup() {
596595

597596
in.got->addEntry(stubBinder);
598597

598+
in.imageLoaderCache->parent =
599+
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
599600
inputSections.push_back(in.imageLoaderCache);
600601
// Since this isn't in the symbol table or in any input file, the noDeadStrip
601602
// argument doesn't matter. It's kept alive by ImageLoaderCacheSection()

lld/MachO/SyntheticSections.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "OutputSection.h"
1616
#include "OutputSegment.h"
1717
#include "Target.h"
18+
#include "Writer.h"
1819

1920
#include "llvm/ADT/DenseMap.h"
2021
#include "llvm/ADT/Hashing.h"

lld/MachO/UnwindInfoSection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ template <class Ptr>
144144
void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
145145
assert(isec->getSegName() == segment_names::ld &&
146146
isec->getName() == section_names::compactUnwind);
147+
isec->parent = compactUnwindSection;
147148
compactUnwindSection->addInput(isec);
148149
}
149150

lld/MachO/Writer.cpp

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,6 @@ class Writer {
7676

7777
LCUuid *uuidCommand = nullptr;
7878
OutputSegment *linkEditSegment = nullptr;
79-
80-
// Output sections are added to output segments in iteration order
81-
// of ConcatOutputSection, so must have deterministic iteration order.
82-
MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
8379
};
8480

8581
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -879,16 +875,6 @@ static void sortSegmentsAndSections() {
879875
}
880876
}
881877

882-
NamePair macho::maybeRenameSection(NamePair key) {
883-
auto newNames = config->sectionRenameMap.find(key);
884-
if (newNames != config->sectionRenameMap.end())
885-
return newNames->second;
886-
auto newName = config->segmentRenameMap.find(key.first);
887-
if (newName != config->segmentRenameMap.end())
888-
return std::make_pair(newName->second, key.second);
889-
return key;
890-
}
891-
892878
template <class LP> void Writer::createOutputSections() {
893879
TimeTraceScope timeScope("Create output sections");
894880
// First, create hidden sections
@@ -919,10 +905,7 @@ template <class LP> void Writer::createOutputSections() {
919905
for (ConcatInputSection *isec : inputSections) {
920906
if (isec->shouldOmitFromOutput())
921907
continue;
922-
NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
923-
ConcatOutputSection *&osec = concatOutputSections[names];
924-
if (!osec)
925-
osec = make<ConcatOutputSection>(names.second);
908+
ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
926909
osec->addInput(isec);
927910
osec->inputOrder =
928911
std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
@@ -934,7 +917,8 @@ template <class LP> void Writer::createOutputSections() {
934917
StringRef segname = it.first.first;
935918
ConcatOutputSection *osec = it.second;
936919
assert(segname != segment_names::ld);
937-
getOrCreateOutputSegment(segname)->addOutputSection(osec);
920+
if (osec->isNeeded())
921+
getOrCreateOutputSegment(segname)->addOutputSection(osec);
938922
}
939923

940924
for (SyntheticSection *ssec : syntheticSections) {

lld/MachO/Writer.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
#ifndef LLD_MACHO_WRITER_H
1010
#define LLD_MACHO_WRITER_H
1111

12-
#include "Config.h"
13-
1412
#include <cstdint>
1513

1614
namespace lld {
@@ -29,8 +27,6 @@ class LoadCommand {
2927

3028
template <class LP> void writeResult();
3129

32-
NamePair maybeRenameSection(NamePair key);
33-
3430
void createSyntheticSections();
3531

3632
// Add bindings for symbols that need weak or non-lazy bindings.

lld/test/MachO/icf.s

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,17 @@
2525
# CHECK: [[#%x,SR]] g F __TEXT,__text _sr2
2626
# CHECK: [[#%x,MR:]] g F __TEXT,__text _mr1
2727
# CHECK: [[#%x,MR]] g F __TEXT,__text _mr2
28+
# CHECK: [[#%x,K1:]] g O __TEXT,__foo _k1
29+
# CHECK: [[#%x,A:]] g F __TEXT,__text _k2
2830
### FIXME: Mutually-recursive functions with identical bodies (see below)
2931
# COM: [[#%x,XR:]] g F __TEXT,__text _xr1
3032
# COM: [[#%x,XR]] g F __TEXT,__text _xr2
3133

3234
# CHECK-LABEL: Disassembly of section __TEXT,__text:
3335
# CHECK: [[#%x,MAIN]] <_main>:
34-
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
35-
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
36-
# CHECK-NEXT: callq 0x[[#%x,A]] <_a3>
36+
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
37+
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
38+
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
3739
# CHECK-NEXT: callq 0x[[#%x,B]] <_b>
3840
# CHECK-NEXT: callq 0x[[#%x,B2]] <_b2>
3941
# CHECK-NEXT: callq 0x[[#%x,C]] <_c>
@@ -44,6 +46,8 @@
4446
# CHECK-NEXT: callq 0x[[#%x,H]] <_h>
4547
# CHECK-NEXT: callq 0x[[#%x,I]] <_i>
4648
# CHECK-NEXT: callq 0x[[#%x,J]] <_j>
49+
# CHECK-NEXT: callq 0x[[#%x,K1]] <_k1>
50+
# CHECK-NEXT: callq 0x[[#%x,A]] <_k2>
4751
# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
4852
# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
4953
# CHECK-NEXT: callq 0x[[#%x,MR]] <_mr2>
@@ -232,8 +236,38 @@ _j:
232236
ret
233237
.cfi_endproc
234238

239+
### No fold: _k1 is in a different section from _a1
240+
.section __TEXT,__foo
241+
.globl _k1
242+
.p2align 2
243+
_k1:
244+
callq _d
245+
mov ___nan@GOTPCREL(%rip), %rax
246+
callq ___isnan
247+
movabs $_abs1a, %rdx
248+
movl $0, %eax
249+
ret
250+
nopl (%rax)
251+
252+
### Fold: _k2 is in a section that gets renamed and output as __text
253+
.section __TEXT,__StaticInit
254+
.globl _k2
255+
.p2align 2
256+
_k2:
257+
callq _d
258+
mov ___nan@GOTPCREL(%rip), %rax
259+
callq ___isnan
260+
movabs $_abs1a, %rdx
261+
movl $0, %eax
262+
ret
263+
## For some reason, llvm-mc generates different nop encodings when adding
264+
## padding for __StaticInit vs __text functions. So we explicitly specify the
265+
## nop here to make sure this function can be folded with _a1.
266+
nopl (%rax)
267+
235268
### Fold: Simple recursion
236269

270+
.text
237271
.globl _sr1
238272
.p2align 2
239273
_sr1:
@@ -311,6 +345,8 @@ _main:
311345
callq _h
312346
callq _i
313347
callq _j
348+
callq _k1
349+
callq _k2
314350
callq _sr1
315351
callq _sr2
316352
callq _mr1

0 commit comments

Comments
 (0)