Skip to content

Commit 1abc844

Browse files
author
Alex B
committed
[lld-macho] Implement ObjC category merging (-merge-objc-categories)
This change adds a flag to lld to enable category merging for MachoO + ObjC. If in the same link unit, multiple categories are extending the same class, then they get merged into a single cateogry. Notes on implemetation decisions made in this diff: 1. There is a possibility to even improve the current implementation by directly merging the category data into the base class (if the base class is present in the link unit) - this may be done as a follow-up. 2. We do the merging as early as possible, on the raw inputSections. 3. We add a new flag for ObjFile (isLinkerGenerated) and create such an ObjFile to which all new linker-generated date belongs. 4. We add a new flag (linkerOptimizeReason) to ConcatInputSection and StringPiece to mark that this data has been optimized away. Another way to do it would have been to just mark the pieces as not 'live' but this would require some work-arounds in the actual live symbol determination logic and would also cause symbols to incorrectly show up as 'dead-stripped' when that's not the cause that they are not present.
1 parent cb6c0f1 commit 1abc844

File tree

11 files changed

+1803
-33
lines changed

11 files changed

+1803
-33
lines changed

lld/MachO/Driver.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,15 +1973,20 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
19731973
}
19741974

19751975
gatherInputSections();
1976+
1977+
// Run category checking & merging before anything else, it operates
1978+
// directly on inputSections.
1979+
if (args.hasArg(OPT_check_category_conflicts))
1980+
objc::checkCategories();
1981+
1982+
if (args.hasArg(OPT_merge_objc_categories))
1983+
objc::mergeCategories();
1984+
19761985
if (config->callGraphProfileSort)
19771986
priorityBuilder.extractCallGraphProfile();
19781987

19791988
if (config->deadStrip)
19801989
markLive();
1981-
1982-
if (args.hasArg(OPT_check_category_conflicts))
1983-
objc::checkCategories();
1984-
19851990
// ICF assumes that all literals have been folded already, so we must run
19861991
// foldIdenticalLiterals before foldIdenticalSections.
19871992
foldIdenticalLiterals();

lld/MachO/InputFiles.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -965,21 +965,23 @@ void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
965965
SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
966966
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
967967
bool lazy, bool forceHidden, bool compatArch,
968-
bool builtFromBitcode)
968+
bool builtFromBitcode, bool isLinkerGenerated)
969969
: InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
970-
builtFromBitcode(builtFromBitcode) {
970+
builtFromBitcode(builtFromBitcode), isLinkerGenerated(isLinkerGenerated) {
971971
this->archiveName = std::string(archiveName);
972972
this->compatArch = compatArch;
973-
if (lazy) {
974-
if (target->wordSize == 8)
975-
parseLazy<LP64>();
976-
else
977-
parseLazy<ILP32>();
978-
} else {
979-
if (target->wordSize == 8)
980-
parse<LP64>();
981-
else
982-
parse<ILP32>();
973+
if (!isLinkerGenerated) {
974+
if (lazy) {
975+
if (target->wordSize == 8)
976+
parseLazy<LP64>();
977+
else
978+
parseLazy<ILP32>();
979+
} else {
980+
if (target->wordSize == 8)
981+
parse<LP64>();
982+
else
983+
parse<ILP32>();
984+
}
983985
}
984986
}
985987

@@ -1103,6 +1105,8 @@ void ObjFile::parseDebugInfo() {
11031105
}
11041106

11051107
ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
1108+
if (!mb.getBufferSize())
1109+
return {};
11061110
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
11071111
const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
11081112
if (!cmd)
@@ -1113,6 +1117,8 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
11131117
}
11141118

11151119
ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
1120+
if (!mb.getBufferSize())
1121+
return {};
11161122
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
11171123
if (auto *cmd =
11181124
findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))

lld/MachO/InputFiles.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class ObjFile final : public InputFile {
161161
public:
162162
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
163163
bool lazy = false, bool forceHidden = false, bool compatArch = true,
164-
bool builtFromBitcode = false);
164+
bool builtFromBitcode = false, bool isLinkerGenerated = false);
165165
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
166166
ArrayRef<uint8_t> getOptimizationHints() const;
167167
template <class LP> void parse();
@@ -181,6 +181,7 @@ class ObjFile final : public InputFile {
181181
const uint32_t modTime;
182182
bool forceHidden;
183183
bool builtFromBitcode;
184+
bool isLinkerGenerated;
184185
std::vector<ConcatInputSection *> debugSections;
185186
std::vector<CallGraphEntry> callGraph;
186187
llvm::DenseMap<ConcatInputSection *, FDE> fdes;

lld/MachO/InputSection.h

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
namespace lld {
2525
namespace macho {
2626

27+
enum LinkerOptReason : uint8_t {
28+
NotOptimized,
29+
CategoryMerging,
30+
};
31+
2732
class InputFile;
2833
class OutputSection;
2934

@@ -60,6 +65,7 @@ class InputSection {
6065
// Whether the data at \p off in this InputSection is live.
6166
virtual bool isLive(uint64_t off) const = 0;
6267
virtual void markLive(uint64_t off) = 0;
68+
virtual bool isLinkOptimizedAway() const { return false; }
6369
virtual InputSection *canonical() { return this; }
6470
virtual const InputSection *canonical() const { return this; }
6571

@@ -93,9 +99,9 @@ class InputSection {
9399
// .subsections_via_symbols, there is typically only one element here.
94100
llvm::TinyPtrVector<Defined *> symbols;
95101

96-
protected:
97102
const Section &section;
98103

104+
protected:
99105
const Defined *getContainingSymbol(uint64_t off) const;
100106
};
101107

@@ -114,7 +120,12 @@ class ConcatInputSection final : public InputSection {
114120
bool isLive(uint64_t off) const override { return live; }
115121
void markLive(uint64_t off) override { live = true; }
116122
bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
117-
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
123+
bool isLinkOptimizedAway() const override {
124+
return linkerOptimizeReason != LinkerOptReason::NotOptimized;
125+
}
126+
bool shouldOmitFromOutput() const {
127+
return isLinkOptimizedAway() || !live || isCoalescedWeak();
128+
}
118129
void writeTo(uint8_t *buf);
119130

120131
void foldIdentical(ConcatInputSection *redundant);
@@ -141,6 +152,11 @@ class ConcatInputSection final : public InputSection {
141152
// first and not copied to the output.
142153
bool wasCoalesced = false;
143154
bool live = !config->deadStrip;
155+
// Flag to specify if a linker optimzation flagged this section to be
156+
// discarded. Need a separate flag from live as live specifically means
157+
// 'dead-stripped' which is rellevant in contexts such as linker map
158+
// generation
159+
LinkerOptReason linkerOptimizeReason = LinkerOptReason::NotOptimized;
144160
bool hasCallSites = false;
145161
// This variable has two usages. Initially, it represents the input order.
146162
// After assignAddresses is called, it represents the offset from the
@@ -176,10 +192,18 @@ struct StringPiece {
176192
// Only set if deduplicating literals
177193
uint32_t hash : 31;
178194
// Offset from the start of the containing output section.
179-
uint64_t outSecOff = 0;
195+
uint64_t outSecOff : 56;
196+
LinkerOptReason linkerOptimizeReason : 8;
197+
198+
bool shouldOmitFromOutput() const {
199+
return !live || linkerOptimizeReason != LinkerOptReason::NotOptimized;
200+
}
180201

181202
StringPiece(uint64_t off, uint32_t hash)
182-
: inSecOff(off), live(!config->deadStrip), hash(hash) {}
203+
: inSecOff(off), live(!config->deadStrip), hash(hash) {
204+
outSecOff = 0;
205+
linkerOptimizeReason = LinkerOptReason::NotOptimized;
206+
}
183207
};
184208

185209
static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");

lld/MachO/MapFile.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ static MapInfo gatherMapInfo() {
8080
if (d->isec && d->getFile() == file &&
8181
!isa<CStringInputSection>(d->isec)) {
8282
isReferencedFile = true;
83-
if (!d->isLive())
83+
if (!d->isLive() && (!d->isec || !d->isec->isLinkOptimizedAway()))
8484
info.deadSymbols.push_back(d);
8585
}
8686
}
@@ -93,6 +93,8 @@ static MapInfo gatherMapInfo() {
9393
if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
9494
auto &liveCStrings = info.liveCStringsForSection[isec->parent];
9595
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
96+
if (piece.linkerOptimizeReason != LinkerOptReason::NotOptimized)
97+
continue;
9698
if (piece.live)
9799
liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
98100
{fileIndex, isec->getStringRef(i)}});
@@ -203,6 +205,8 @@ void macho::writeMapFile() {
203205
for (const OutputSection *osec : seg->getSections()) {
204206
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
205207
for (const InputSection *isec : concatOsec->inputs) {
208+
if (isec->isLinkOptimizedAway())
209+
continue;
206210
for (Defined *sym : isec->symbols)
207211
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
208212
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),

lld/MachO/MarkLive.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ void markLive() {
259259
dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
260260
marker->addSym(stubBinder);
261261
for (ConcatInputSection *isec : inputSections) {
262+
if (isec->isLinkOptimizedAway())
263+
continue;
262264
// Sections marked no_dead_strip
263265
if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) {
264266
marker->enqueue(isec, 0);

0 commit comments

Comments
 (0)