Skip to content

Commit 742a82a

Browse files
authored
[lld-macho] Implement support for ObjC relative method lists (#86231)
The MachO format supports relative offsets for ObjC method lists. This support is present already in ld64. With this change we implement this support in lld also. Relative method lists can be identified by a specific flag (0x80000000) in the method list header. When this flag is present, the method list will contain 32-bit relative offsets to the current Program Counter (PC), instead of absolute pointers. Additionally, when relative method lists are used, the offset to the selector name will now be relative and point to the selector reference (selref) instead of the name itself.
1 parent 552c8eb commit 742a82a

File tree

11 files changed

+583
-7
lines changed

11 files changed

+583
-7
lines changed

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ struct Configuration {
135135
bool emitEncryptionInfo = false;
136136
bool emitInitOffsets = false;
137137
bool emitChainedFixups = false;
138+
bool emitRelativeMethodLists = false;
138139
bool thinLTOEmitImportsFiles;
139140
bool thinLTOEmitIndexFiles;
140141
bool thinLTOIndexOnly;

lld/MachO/Driver.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,22 @@ static bool shouldEmitChainedFixups(const InputArgList &args) {
10861086
return isRequested;
10871087
}
10881088

1089+
static bool shouldEmitRelativeMethodLists(const InputArgList &args) {
1090+
const Arg *arg = args.getLastArg(OPT_objc_relative_method_lists,
1091+
OPT_no_objc_relative_method_lists);
1092+
if (arg && arg->getOption().getID() == OPT_objc_relative_method_lists)
1093+
return true;
1094+
if (arg && arg->getOption().getID() == OPT_no_objc_relative_method_lists)
1095+
return false;
1096+
1097+
// TODO: If no flag is specified, don't default to false, but instead:
1098+
// - default false on < ios14
1099+
// - default true on >= ios14
1100+
// For now, until this feature is confirmed stable, default to false if no
1101+
// flag is explicitly specified
1102+
return false;
1103+
}
1104+
10891105
void SymbolPatterns::clear() {
10901106
literals.clear();
10911107
globs.clear();
@@ -1630,6 +1646,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
16301646
config->emitChainedFixups = shouldEmitChainedFixups(args);
16311647
config->emitInitOffsets =
16321648
config->emitChainedFixups || args.hasArg(OPT_init_offsets);
1649+
config->emitRelativeMethodLists = shouldEmitRelativeMethodLists(args);
16331650
config->icfLevel = getICFLevel(args);
16341651
config->dedupStrings =
16351652
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);

lld/MachO/InputSection.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ void lld::macho::addInputSection(InputSection *inputSection) {
4646
if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
4747
if (isec->isCoalescedWeak())
4848
return;
49+
if (config->emitRelativeMethodLists &&
50+
ObjCMethListSection::isMethodList(isec)) {
51+
if (in.objcMethList->inputOrder == UnspecifiedInputOrder)
52+
in.objcMethList->inputOrder = inputSectionsOrder++;
53+
in.objcMethList->addInput(isec);
54+
isec->parent = in.objcMethList;
55+
return;
56+
}
4957
if (config->emitInitOffsets &&
5058
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
5159
in.initOffsets->addInput(isec);

lld/MachO/InputSection.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ constexpr const char moduleTermFunc[] = "__mod_term_func";
342342
constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
343343
constexpr const char objcCatList[] = "__objc_catlist";
344344
constexpr const char objcClassList[] = "__objc_classlist";
345+
constexpr const char objcMethList[] = "__objc_methlist";
345346
constexpr const char objcClassRefs[] = "__objc_classrefs";
346347
constexpr const char objcConst[] = "__objc_const";
347348
constexpr const char objCImageInfo[] = "__objc_imageinfo";

lld/MachO/MapFile.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,18 +197,24 @@ void macho::writeMapFile() {
197197
seg->name.str().c_str(), osec->name.str().c_str());
198198
}
199199

200+
// Shared function to print an array of symbols.
201+
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
202+
for (const ConcatInputSection *isec : arr) {
203+
for (Defined *sym : isec->symbols) {
204+
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
205+
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
206+
sym->size, readerToFileOrdinal[sym->getFile()],
207+
sym->getName().str().data());
208+
}
209+
}
210+
};
211+
200212
os << "# Symbols:\n";
201213
os << "# Address\tSize \tFile Name\n";
202214
for (const OutputSegment *seg : outputSegments) {
203215
for (const OutputSection *osec : seg->getSections()) {
204216
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
205-
for (const InputSection *isec : concatOsec->inputs) {
206-
for (Defined *sym : isec->symbols)
207-
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
208-
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
209-
sym->size, readerToFileOrdinal[sym->getFile()],
210-
sym->getName().str().data());
211-
}
217+
printIsecArrSyms(concatOsec->inputs);
212218
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
213219
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
214220
uint64_t lastAddr = 0; // strings will never start at address 0, so this
@@ -237,6 +243,8 @@ void macho::writeMapFile() {
237243
printNonLazyPointerSection(os, in.got);
238244
} else if (osec == in.tlvPointers) {
239245
printNonLazyPointerSection(os, in.tlvPointers);
246+
} else if (osec == in.objcMethList) {
247+
printIsecArrSyms(in.objcMethList->getInputs());
240248
}
241249
// TODO print other synthetic sections
242250
}

lld/MachO/ObjC.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ constexpr const char klassPropList[] = "__OBJC_$_CLASS_PROP_LIST_";
2222
constexpr const char metaclass[] = "_OBJC_METACLASS_$_";
2323
constexpr const char ehtype[] = "_OBJC_EHTYPE_$_";
2424
constexpr const char ivar[] = "_OBJC_IVAR_$_";
25+
constexpr const char instanceMethods[] = "__OBJC_$_INSTANCE_METHODS_";
26+
constexpr const char classMethods[] = "__OBJC_$_CLASS_METHODS_";
2527
constexpr const char listProprieties[] = "__OBJC_$_PROP_LIST_";
2628

2729
constexpr const char category[] = "__OBJC_$_CATEGORY_";

lld/MachO/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,12 @@ def fixup_chains_section : Flag<["-"], "fixup_chains_section">,
12841284
HelpText<"This option is undocumented in ld64">,
12851285
Flags<[HelpHidden]>,
12861286
Group<grp_undocumented>;
1287+
def objc_relative_method_lists : Flag<["-"], "objc_relative_method_lists">,
1288+
HelpText<"Emit relative method lists (more compact representation)">,
1289+
Group<grp_undocumented>;
1290+
def no_objc_relative_method_lists : Flag<["-"], "no_objc_relative_method_lists">,
1291+
HelpText<"Don't emit relative method lists (use traditional representation)">,
1292+
Group<grp_undocumented>;
12871293
def flto_codegen_only : Flag<["-"], "flto-codegen-only">,
12881294
HelpText<"This option is undocumented in ld64">,
12891295
Flags<[HelpHidden]>,

lld/MachO/SyntheticSections.cpp

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "ExportTrie.h"
1313
#include "InputFiles.h"
1414
#include "MachOStructs.h"
15+
#include "ObjC.h"
1516
#include "OutputSegment.h"
1617
#include "SymbolTable.h"
1718
#include "Symbols.h"
@@ -1975,6 +1976,241 @@ void InitOffsetsSection::setUp() {
19751976
}
19761977
}
19771978

1979+
ObjCMethListSection::ObjCMethListSection()
1980+
: SyntheticSection(segment_names::text, section_names::objcMethList) {
1981+
flags = S_ATTR_NO_DEAD_STRIP;
1982+
align = relativeOffsetSize;
1983+
}
1984+
1985+
// Go through all input method lists and ensure that we have selrefs for all
1986+
// their method names. The selrefs will be needed later by ::writeTo. We need to
1987+
// create them early on here to ensure they are processed correctly by the lld
1988+
// pipeline.
1989+
void ObjCMethListSection::setUp() {
1990+
for (const ConcatInputSection *isec : inputs) {
1991+
uint32_t structSizeAndFlags = 0, structCount = 0;
1992+
readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
1993+
uint32_t originalStructSize = structSizeAndFlags & structSizeMask;
1994+
// Method name is immediately after header
1995+
uint32_t methodNameOff = methodListHeaderSize;
1996+
1997+
// Loop through all methods, and ensure a selref for each of them exists.
1998+
while (methodNameOff < isec->data.size()) {
1999+
const Reloc *reloc = isec->getRelocAt(methodNameOff);
2000+
assert(reloc && "Relocation expected at method list name slot");
2001+
auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
2002+
assert(def && "Expected valid Defined at method list name slot");
2003+
auto *cisec = cast<CStringInputSection>(def->isec);
2004+
assert(cisec && "Expected method name to be in a CStringInputSection");
2005+
auto methname = cisec->getStringRefAtOffset(def->value);
2006+
if (!ObjCSelRefsHelper::getSelRef(methname))
2007+
ObjCSelRefsHelper::makeSelRef(methname);
2008+
2009+
// Jump to method name offset in next struct
2010+
methodNameOff += originalStructSize;
2011+
}
2012+
}
2013+
}
2014+
2015+
// Calculate section size and final offsets for where InputSection's need to be
2016+
// written.
2017+
void ObjCMethListSection::finalize() {
2018+
// sectionSize will be the total size of the __objc_methlist section
2019+
sectionSize = 0;
2020+
for (ConcatInputSection *isec : inputs) {
2021+
// We can also use sectionSize as write offset for isec
2022+
assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) &&
2023+
"expected __objc_methlist to be aligned by default with the "
2024+
"required section alignment");
2025+
isec->outSecOff = sectionSize;
2026+
2027+
isec->isFinal = true;
2028+
uint32_t relativeListSize =
2029+
computeRelativeMethodListSize(isec->data.size());
2030+
sectionSize += relativeListSize;
2031+
2032+
// If encoding the method list in relative offset format shrinks the size,
2033+
// then we also need to adjust symbol sizes to match the new size. Note that
2034+
// on 32bit platforms the size of the method list will remain the same when
2035+
// encoded in relative offset format.
2036+
if (relativeListSize != isec->data.size()) {
2037+
for (Symbol *sym : isec->symbols) {
2038+
assert(isa<Defined>(sym) &&
2039+
"Unexpected undefined symbol in ObjC method list");
2040+
auto *def = cast<Defined>(sym);
2041+
// There can be 0-size symbols, check if this is the case and ignore
2042+
// them.
2043+
if (def->size) {
2044+
assert(
2045+
def->size == isec->data.size() &&
2046+
"Invalid ObjC method list symbol size: expected symbol size to "
2047+
"match isec size");
2048+
def->size = relativeListSize;
2049+
}
2050+
}
2051+
}
2052+
}
2053+
}
2054+
2055+
void ObjCMethListSection::writeTo(uint8_t *bufStart) const {
2056+
uint8_t *buf = bufStart;
2057+
for (const ConcatInputSection *isec : inputs) {
2058+
assert(buf - bufStart == long(isec->outSecOff) &&
2059+
"Writing at unexpected offset");
2060+
uint32_t writtenSize = writeRelativeMethodList(isec, buf);
2061+
buf += writtenSize;
2062+
}
2063+
assert(buf - bufStart == sectionSize &&
2064+
"Written size does not match expected section size");
2065+
}
2066+
2067+
// Check if an InputSection is a method list. To do this we scan the
2068+
// InputSection for any symbols who's names match the patterns we expect clang
2069+
// to generate for method lists.
2070+
bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
2071+
const char *symPrefixes[] = {objc::symbol_names::classMethods,
2072+
objc::symbol_names::instanceMethods,
2073+
objc::symbol_names::categoryInstanceMethods,
2074+
objc::symbol_names::categoryClassMethods};
2075+
if (!isec)
2076+
return false;
2077+
for (const Symbol *sym : isec->symbols) {
2078+
auto *def = dyn_cast_or_null<Defined>(sym);
2079+
if (!def)
2080+
continue;
2081+
for (const char *prefix : symPrefixes) {
2082+
if (def->getName().starts_with(prefix)) {
2083+
assert(def->size == isec->data.size() &&
2084+
"Invalid ObjC method list symbol size: expected symbol size to "
2085+
"match isec size");
2086+
assert(def->value == 0 &&
2087+
"Offset of ObjC method list symbol must be 0");
2088+
return true;
2089+
}
2090+
}
2091+
}
2092+
2093+
return false;
2094+
}
2095+
2096+
// Encode a single relative offset value. The input is the data/symbol at
2097+
// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2098+
// 'createSelRef' indicates that we should not directly use the specified
2099+
// symbol, but instead get the selRef for the symbol and use that instead.
2100+
void ObjCMethListSection::writeRelativeOffsetForIsec(
2101+
const ConcatInputSection *isec, uint8_t *buf, uint32_t &inSecOff,
2102+
uint32_t &outSecOff, bool useSelRef) const {
2103+
const Reloc *reloc = isec->getRelocAt(inSecOff);
2104+
assert(reloc && "Relocation expected at __objc_methlist Offset");
2105+
auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
2106+
assert(def && "Expected all syms in __objc_methlist to be defined");
2107+
uint32_t symVA = def->getVA();
2108+
2109+
if (useSelRef) {
2110+
auto *cisec = cast<CStringInputSection>(def->isec);
2111+
auto methname = cisec->getStringRefAtOffset(def->value);
2112+
ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
2113+
assert(selRef && "Expected all selector names to already be already be "
2114+
"present in __objc_selrefs");
2115+
symVA = selRef->getVA();
2116+
assert(selRef->data.size() == sizeof(target->wordSize) &&
2117+
"Expected one selref per ConcatInputSection");
2118+
}
2119+
2120+
uint32_t currentVA = isec->getVA() + outSecOff;
2121+
uint32_t delta = symVA - currentVA;
2122+
write32le(buf + outSecOff, delta);
2123+
2124+
// Move one pointer forward in the absolute method list
2125+
inSecOff += target->wordSize;
2126+
// Move one relative offset forward in the relative method list (32 bits)
2127+
outSecOff += relativeOffsetSize;
2128+
}
2129+
2130+
// Write a relative method list to buf, return the size of the written
2131+
// information
2132+
uint32_t
2133+
ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
2134+
uint8_t *buf) const {
2135+
// Copy over the header, and add the "this is a relative method list" magic
2136+
// value flag
2137+
uint32_t structSizeAndFlags = 0, structCount = 0;
2138+
readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
2139+
// Set the struct size for the relative method list
2140+
uint32_t relativeStructSizeAndFlags =
2141+
(relativeOffsetSize * pointersPerStruct) & structSizeMask;
2142+
// Carry over the old flags from the input struct
2143+
relativeStructSizeAndFlags |= structSizeAndFlags & structFlagsMask;
2144+
// Set the relative method list flag
2145+
relativeStructSizeAndFlags |= relMethodHeaderFlag;
2146+
2147+
writeMethodListHeader(buf, relativeStructSizeAndFlags, structCount);
2148+
2149+
assert(methodListHeaderSize +
2150+
(structCount * pointersPerStruct * target->wordSize) ==
2151+
isec->data.size() &&
2152+
"Invalid computed ObjC method list size");
2153+
2154+
uint32_t inSecOff = methodListHeaderSize;
2155+
uint32_t outSecOff = methodListHeaderSize;
2156+
2157+
// Go through the method list and encode input absolute pointers as relative
2158+
// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2159+
// outSecOff
2160+
for (uint32_t i = 0; i < structCount; i++) {
2161+
// Write the name of the method
2162+
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, true);
2163+
// Write the type of the method
2164+
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
2165+
// Write reference to the selector of the method
2166+
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
2167+
}
2168+
2169+
// Expecting to have read all the data in the isec
2170+
assert(inSecOff == isec->data.size() &&
2171+
"Invalid actual ObjC method list size");
2172+
assert(
2173+
outSecOff == computeRelativeMethodListSize(inSecOff) &&
2174+
"Mismatch between input & output size when writing relative method list");
2175+
return outSecOff;
2176+
}
2177+
2178+
// Given the size of an ObjC method list InputSection, return the size of the
2179+
// method list when encoded in relative offsets format. We can do this without
2180+
// decoding the actual data, as it can be directly inferred from the size of the
2181+
// isec.
2182+
uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2183+
uint32_t absoluteMethodListSize) const {
2184+
uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize;
2185+
uint32_t pointerCount = oldPointersSize / target->wordSize;
2186+
assert(((pointerCount % pointersPerStruct) == 0) &&
2187+
"__objc_methlist expects method lists to have multiple-of-3 pointers");
2188+
2189+
uint32_t newPointersSize = pointerCount * relativeOffsetSize;
2190+
uint32_t newTotalSize = methodListHeaderSize + newPointersSize;
2191+
2192+
assert((newTotalSize <= absoluteMethodListSize) &&
2193+
"Expected relative method list size to be smaller or equal than "
2194+
"original size");
2195+
return newTotalSize;
2196+
}
2197+
2198+
// Read a method list header from buf
2199+
void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
2200+
uint32_t &structSizeAndFlags,
2201+
uint32_t &structCount) const {
2202+
structSizeAndFlags = read32le(buf);
2203+
structCount = read32le(buf + sizeof(uint32_t));
2204+
}
2205+
2206+
// Write a method list header to buf
2207+
void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
2208+
uint32_t structSizeAndFlags,
2209+
uint32_t structCount) const {
2210+
write32le(buf, structSizeAndFlags);
2211+
write32le(buf + sizeof(structSizeAndFlags), structCount);
2212+
}
2213+
19782214
void macho::createSyntheticSymbols() {
19792215
auto addHeaderSymbol = [](const char *name) {
19802216
symtab->addSynthetic(name, in.header->isec, /*value=*/0,

0 commit comments

Comments
 (0)