Skip to content

[lld-macho] Implement support for ObjC relative method lists #86231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ struct Configuration {
bool emitEncryptionInfo = false;
bool emitInitOffsets = false;
bool emitChainedFixups = false;
bool emitRelativeMethodLists = false;
bool thinLTOEmitImportsFiles;
bool thinLTOEmitIndexFiles;
bool thinLTOIndexOnly;
Expand Down
17 changes: 17 additions & 0 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,22 @@ static bool shouldEmitChainedFixups(const InputArgList &args) {
return isRequested;
}

static bool shouldEmitRelativeMethodLists(const InputArgList &args) {
const Arg *arg = args.getLastArg(OPT_objc_relative_method_lists,
OPT_no_objc_relative_method_lists);
if (arg && arg->getOption().getID() == OPT_objc_relative_method_lists)
return true;
if (arg && arg->getOption().getID() == OPT_no_objc_relative_method_lists)
return false;

// TODO: If no flag is specified, don't default to false, but instead:
// - default false on < ios14
// - default true on >= ios14
// For now, until this feature is confirmed stable, default to false if no
// flag is explicitly specified
return false;
}

void SymbolPatterns::clear() {
literals.clear();
globs.clear();
Expand Down Expand Up @@ -1630,6 +1646,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->emitChainedFixups = shouldEmitChainedFixups(args);
config->emitInitOffsets =
config->emitChainedFixups || args.hasArg(OPT_init_offsets);
config->emitRelativeMethodLists = shouldEmitRelativeMethodLists(args);
config->icfLevel = getICFLevel(args);
config->dedupStrings =
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
Expand Down
8 changes: 8 additions & 0 deletions lld/MachO/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ void lld::macho::addInputSection(InputSection *inputSection) {
if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
if (isec->isCoalescedWeak())
return;
if (config->emitRelativeMethodLists &&
ObjCMethListSection::isMethodList(isec)) {
if (in.objcMethList->inputOrder == UnspecifiedInputOrder)
in.objcMethList->inputOrder = inputSectionsOrder++;
in.objcMethList->addInput(isec);
isec->parent = in.objcMethList;
return;
}
if (config->emitInitOffsets &&
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
in.initOffsets->addInput(isec);
Expand Down
1 change: 1 addition & 0 deletions lld/MachO/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ constexpr const char moduleTermFunc[] = "__mod_term_func";
constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
constexpr const char objcCatList[] = "__objc_catlist";
constexpr const char objcClassList[] = "__objc_classlist";
constexpr const char objcMethList[] = "__objc_methlist";
constexpr const char objcClassRefs[] = "__objc_classrefs";
constexpr const char objcConst[] = "__objc_const";
constexpr const char objCImageInfo[] = "__objc_imageinfo";
Expand Down
22 changes: 15 additions & 7 deletions lld/MachO/MapFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,18 +197,24 @@ void macho::writeMapFile() {
seg->name.str().c_str(), osec->name.str().c_str());
}

// Shared function to print an array of symbols.
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
for (const ConcatInputSection *isec : arr) {
for (Defined *sym : isec->symbols) {
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
sym->size, readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
}
};

os << "# Symbols:\n";
os << "# Address\tSize \tFile Name\n";
for (const OutputSegment *seg : outputSegments) {
for (const OutputSection *osec : seg->getSections()) {
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
for (const InputSection *isec : concatOsec->inputs) {
for (Defined *sym : isec->symbols)
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
sym->size, readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
printIsecArrSyms(concatOsec->inputs);
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
uint64_t lastAddr = 0; // strings will never start at address 0, so this
Expand Down Expand Up @@ -237,6 +243,8 @@ void macho::writeMapFile() {
printNonLazyPointerSection(os, in.got);
} else if (osec == in.tlvPointers) {
printNonLazyPointerSection(os, in.tlvPointers);
} else if (osec == in.objcMethList) {
printIsecArrSyms(in.objcMethList->getInputs());
}
// TODO print other synthetic sections
}
Expand Down
2 changes: 2 additions & 0 deletions lld/MachO/ObjC.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ constexpr const char klassPropList[] = "__OBJC_$_CLASS_PROP_LIST_";
constexpr const char metaclass[] = "_OBJC_METACLASS_$_";
constexpr const char ehtype[] = "_OBJC_EHTYPE_$_";
constexpr const char ivar[] = "_OBJC_IVAR_$_";
constexpr const char instanceMethods[] = "__OBJC_$_INSTANCE_METHODS_";
constexpr const char classMethods[] = "__OBJC_$_CLASS_METHODS_";
constexpr const char listProprieties[] = "__OBJC_$_PROP_LIST_";

constexpr const char category[] = "__OBJC_$_CATEGORY_";
Expand Down
6 changes: 6 additions & 0 deletions lld/MachO/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,12 @@ def fixup_chains_section : Flag<["-"], "fixup_chains_section">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
Group<grp_undocumented>;
def objc_relative_method_lists : Flag<["-"], "objc_relative_method_lists">,
HelpText<"Emit relative method lists (more compact representation)">,
Group<grp_undocumented>;
def no_objc_relative_method_lists : Flag<["-"], "no_objc_relative_method_lists">,
HelpText<"Don't emit relative method lists (use traditional representation)">,
Group<grp_undocumented>;
def flto_codegen_only : Flag<["-"], "flto-codegen-only">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
Expand Down
236 changes: 236 additions & 0 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "ExportTrie.h"
#include "InputFiles.h"
#include "MachOStructs.h"
#include "ObjC.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
Expand Down Expand Up @@ -1975,6 +1976,241 @@ void InitOffsetsSection::setUp() {
}
}

ObjCMethListSection::ObjCMethListSection()
: SyntheticSection(segment_names::text, section_names::objcMethList) {
flags = S_ATTR_NO_DEAD_STRIP;
align = relativeOffsetSize;
}

// Go through all input method lists and ensure that we have selrefs for all
// their method names. The selrefs will be needed later by ::writeTo. We need to
// create them early on here to ensure they are processed correctly by the lld
// pipeline.
void ObjCMethListSection::setUp() {
for (const ConcatInputSection *isec : inputs) {
uint32_t structSizeAndFlags = 0, structCount = 0;
readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
uint32_t originalStructSize = structSizeAndFlags & structSizeMask;
// Method name is immediately after header
uint32_t methodNameOff = methodListHeaderSize;

// Loop through all methods, and ensure a selref for each of them exists.
while (methodNameOff < isec->data.size()) {
const Reloc *reloc = isec->getRelocAt(methodNameOff);
assert(reloc && "Relocation expected at method list name slot");
auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
assert(def && "Expected valid Defined at method list name slot");
auto *cisec = cast<CStringInputSection>(def->isec);
assert(cisec && "Expected method name to be in a CStringInputSection");
auto methname = cisec->getStringRefAtOffset(def->value);
if (!ObjCSelRefsHelper::getSelRef(methname))
ObjCSelRefsHelper::makeSelRef(methname);

// Jump to method name offset in next struct
methodNameOff += originalStructSize;
}
}
}

// Calculate section size and final offsets for where InputSection's need to be
// written.
void ObjCMethListSection::finalize() {
// sectionSize will be the total size of the __objc_methlist section
sectionSize = 0;
for (ConcatInputSection *isec : inputs) {
// We can also use sectionSize as write offset for isec
assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) &&
"expected __objc_methlist to be aligned by default with the "
"required section alignment");
isec->outSecOff = sectionSize;

isec->isFinal = true;
uint32_t relativeListSize =
computeRelativeMethodListSize(isec->data.size());
sectionSize += relativeListSize;

// If encoding the method list in relative offset format shrinks the size,
// then we also need to adjust symbol sizes to match the new size. Note that
// on 32bit platforms the size of the method list will remain the same when
// encoded in relative offset format.
if (relativeListSize != isec->data.size()) {
for (Symbol *sym : isec->symbols) {
assert(isa<Defined>(sym) &&
"Unexpected undefined symbol in ObjC method list");
auto *def = cast<Defined>(sym);
// There can be 0-size symbols, check if this is the case and ignore
// them.
if (def->size) {
assert(
def->size == isec->data.size() &&
"Invalid ObjC method list symbol size: expected symbol size to "
"match isec size");
def->size = relativeListSize;
}
}
}
}
}

void ObjCMethListSection::writeTo(uint8_t *bufStart) const {
uint8_t *buf = bufStart;
for (const ConcatInputSection *isec : inputs) {
assert(buf - bufStart == long(isec->outSecOff) &&
"Writing at unexpected offset");
uint32_t writtenSize = writeRelativeMethodList(isec, buf);
buf += writtenSize;
}
assert(buf - bufStart == sectionSize &&
"Written size does not match expected section size");
}

// Check if an InputSection is a method list. To do this we scan the
// InputSection for any symbols who's names match the patterns we expect clang
// to generate for method lists.
bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
const char *symPrefixes[] = {objc::symbol_names::classMethods,
objc::symbol_names::instanceMethods,
objc::symbol_names::categoryInstanceMethods,
objc::symbol_names::categoryClassMethods};
if (!isec)
return false;
for (const Symbol *sym : isec->symbols) {
auto *def = dyn_cast_or_null<Defined>(sym);
if (!def)
continue;
for (const char *prefix : symPrefixes) {
if (def->getName().starts_with(prefix)) {
assert(def->size == isec->data.size() &&
"Invalid ObjC method list symbol size: expected symbol size to "
"match isec size");
assert(def->value == 0 &&
"Offset of ObjC method list symbol must be 0");
return true;
}
}
}

return false;
}

// Encode a single relative offset value. The input is the data/symbol at
// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
// 'createSelRef' indicates that we should not directly use the specified
// symbol, but instead get the selRef for the symbol and use that instead.
void ObjCMethListSection::writeRelativeOffsetForIsec(
const ConcatInputSection *isec, uint8_t *buf, uint32_t &inSecOff,
uint32_t &outSecOff, bool useSelRef) const {
const Reloc *reloc = isec->getRelocAt(inSecOff);
assert(reloc && "Relocation expected at __objc_methlist Offset");
auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
assert(def && "Expected all syms in __objc_methlist to be defined");
uint32_t symVA = def->getVA();

if (useSelRef) {
auto *cisec = cast<CStringInputSection>(def->isec);
auto methname = cisec->getStringRefAtOffset(def->value);
ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
assert(selRef && "Expected all selector names to already be already be "
"present in __objc_selrefs");
symVA = selRef->getVA();
assert(selRef->data.size() == sizeof(target->wordSize) &&
"Expected one selref per ConcatInputSection");
}

uint32_t currentVA = isec->getVA() + outSecOff;
uint32_t delta = symVA - currentVA;
write32le(buf + outSecOff, delta);

// Move one pointer forward in the absolute method list
inSecOff += target->wordSize;
// Move one relative offset forward in the relative method list (32 bits)
outSecOff += relativeOffsetSize;
}

// Write a relative method list to buf, return the size of the written
// information
uint32_t
ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
uint8_t *buf) const {
// Copy over the header, and add the "this is a relative method list" magic
// value flag
uint32_t structSizeAndFlags = 0, structCount = 0;
readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
// Set the struct size for the relative method list
uint32_t relativeStructSizeAndFlags =
(relativeOffsetSize * pointersPerStruct) & structSizeMask;
// Carry over the old flags from the input struct
relativeStructSizeAndFlags |= structSizeAndFlags & structFlagsMask;
// Set the relative method list flag
relativeStructSizeAndFlags |= relMethodHeaderFlag;

writeMethodListHeader(buf, relativeStructSizeAndFlags, structCount);

assert(methodListHeaderSize +
(structCount * pointersPerStruct * target->wordSize) ==
isec->data.size() &&
"Invalid computed ObjC method list size");

uint32_t inSecOff = methodListHeaderSize;
uint32_t outSecOff = methodListHeaderSize;

// Go through the method list and encode input absolute pointers as relative
// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
// outSecOff
for (uint32_t i = 0; i < structCount; i++) {
// Write the name of the method
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, true);
// Write the type of the method
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
// Write reference to the selector of the method
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
}

// Expecting to have read all the data in the isec
assert(inSecOff == isec->data.size() &&
"Invalid actual ObjC method list size");
assert(
outSecOff == computeRelativeMethodListSize(inSecOff) &&
"Mismatch between input & output size when writing relative method list");
return outSecOff;
}

// Given the size of an ObjC method list InputSection, return the size of the
// method list when encoded in relative offsets format. We can do this without
// decoding the actual data, as it can be directly inferred from the size of the
// isec.
uint32_t ObjCMethListSection::computeRelativeMethodListSize(
uint32_t absoluteMethodListSize) const {
uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize;
uint32_t pointerCount = oldPointersSize / target->wordSize;
assert(((pointerCount % pointersPerStruct) == 0) &&
"__objc_methlist expects method lists to have multiple-of-3 pointers");

uint32_t newPointersSize = pointerCount * relativeOffsetSize;
uint32_t newTotalSize = methodListHeaderSize + newPointersSize;

assert((newTotalSize <= absoluteMethodListSize) &&
"Expected relative method list size to be smaller or equal than "
"original size");
return newTotalSize;
}

// Read a method list header from buf
void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
uint32_t &structSizeAndFlags,
uint32_t &structCount) const {
structSizeAndFlags = read32le(buf);
structCount = read32le(buf + sizeof(uint32_t));
}

// Write a method list header to buf
void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
uint32_t structSizeAndFlags,
uint32_t structCount) const {
write32le(buf, structSizeAndFlags);
write32le(buf + sizeof(structSizeAndFlags), structCount);
}

void macho::createSyntheticSymbols() {
auto addHeaderSymbol = [](const char *name) {
symtab->addSynthetic(name, in.header->isec, /*value=*/0,
Expand Down
Loading