Skip to content

[lld-macho][arm64] Enhance safe ICF with thunk-based deduplication #106573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions lld/MachO/Arch/ARM64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ struct ARM64 : ARM64Common {
Symbol *objcMsgSend) const override;
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;

void initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const override;
uint32_t getICFSafeThunkSize() const override;
};

} // namespace
Expand Down Expand Up @@ -175,6 +179,25 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
/*offset=*/0, /*addend=*/0,
/*referent=*/funcSym);
}
// Just a single direct branch to the target function.
static constexpr uint32_t icfSafeThunkCode[] = {
0x14000000, // 08: b target
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a binary is large, we may not reach the target using this direct branch. What's the plan for this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLD needs generic support for such scenarios - not only relating to this feature, but relating to code generation in general. So this is a general problem - and yes, we don't introduce any special feature to mitigate this for this situation. When generic support will be added for this scenario, we will be inheriting it.

};

void ARM64::initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const {
// The base data here will not be itself modified, we'll just be adding a
// reloc below. So we can directly use the constexpr above as the data.
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
sizeof(icfSafeThunkCode)};

thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
/*pcrel=*/true, /*length=*/2,
/*offset=*/0, /*addend=*/0,
/*referent=*/branchTarget);
}

uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }

ARM64::ARM64() : ARM64Common(LP64()) {
cpuType = CPU_TYPE_ARM64;
Expand Down
1 change: 1 addition & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ enum class ICFLevel {
unknown,
none,
safe,
safe_thunks,
all,
};

Expand Down
9 changes: 8 additions & 1 deletion lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -847,8 +847,14 @@ static ICFLevel getICFLevel(const ArgList &args) {
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
.Cases("none", "", ICFLevel::none)
.Case("safe", ICFLevel::safe)
.Case("safe_thunks", ICFLevel::safe_thunks)
.Case("all", ICFLevel::all)
.Default(ICFLevel::unknown);

if ((icfLevel == ICFLevel::safe_thunks) && (config->arch() != AK_arm64)) {
error("--icf=safe_thunks is only supported on arm64 targets");
}

if (icfLevel == ICFLevel::unknown) {
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
"', defaulting to `none'");
Expand Down Expand Up @@ -2104,7 +2110,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
// foldIdenticalLiterals before foldIdenticalSections.
foldIdenticalLiterals();
if (config->icfLevel != ICFLevel::none) {
if (config->icfLevel == ICFLevel::safe)
if (config->icfLevel == ICFLevel::safe ||
config->icfLevel == ICFLevel::safe_thunks)
markAddrSigSymbols();
foldIdenticalSections(/*onlyCfStrings=*/false);
} else if (config->dedupStrings) {
Expand Down
92 changes: 89 additions & 3 deletions lld/MachO/ICF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class ICF {
const ConcatInputSection *ib);
bool equalsVariable(const ConcatInputSection *ia,
const ConcatInputSection *ib);
void applySafeThunksToRange(size_t begin, size_t end);

// ICF needs a copy of the inputs vector because its equivalence-class
// segregation algorithm destroys the proper sequence.
Expand Down Expand Up @@ -251,6 +252,50 @@ void ICF::forEachClassRange(size_t begin, size_t end,
}
}

// Given a range of identical icfInputs, replace address significant functions
// with a thunk that is just a direct branch to the first function in the
// series. This way we keep only one main body of the function but we still
// retain the address uniqueness of relevant functions by having them be a
// direct branch thunk rather than containing a full copy of the actual function
// body.
void ICF::applySafeThunksToRange(size_t begin, size_t end) {
// If the functions we're dealing with are smaller than the thunk size, then
// just leave them all as-is - creating thunks would be a net loss.
uint32_t thunkSize = target->getICFSafeThunkSize();
if (icfInputs[begin]->data.size() <= thunkSize)
return;

// When creating a unique ICF thunk, use the first section as the section that
// all thunks will branch to.
ConcatInputSection *masterIsec = icfInputs[begin];

for (size_t i = begin + 1; i < end; ++i) {
ConcatInputSection *isec = icfInputs[i];
// When we're done processing keepUnique entries, we can stop. Sorting
// guaratees that all keepUnique will be at the front.
if (!isec->keepUnique)
break;

ConcatInputSection *thunk =
makeSyntheticInputSection(isec->getSegName(), isec->getName());
addInputSection(thunk);

target->initICFSafeThunkBody(thunk, masterIsec);
thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);

// Since we're folding the target function into a thunk, we need to adjust
// the symbols that now got relocated from the target function to the thunk.
// Since the thunk is only one branch, we move all symbols to offset 0 and
// make sure that the size of all non-zero-size symbols is equal to the size
// of the branch.
for (auto *sym : thunk->symbols) {
sym->value = 0;
if (sym->size != 0)
sym->size = thunkSize;
}
}
}

// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
// with matching equivalence class
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
Expand Down Expand Up @@ -312,6 +357,12 @@ void ICF::run() {

llvm::stable_sort(
icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
// When using safe_thunks, ensure that we first sort by icfEqClass and
// then by keepUnique (descending). This guarantees that within an
// equivalence class, the keepUnique inputs are always first.
if (config->icfLevel == ICFLevel::safe_thunks)
if (a->icfEqClass[0] == b->icfEqClass[0])
return a->keepUnique > b->keepUnique;
return a->icfEqClass[0] < b->icfEqClass[0];
});
forEachClass([&](size_t begin, size_t end) {
Expand All @@ -331,13 +382,37 @@ void ICF::run() {
log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
}

// When using safe_thunks, we need to create thunks for all keepUnique
// functions that can be deduplicated. Since we're creating / adding new
// InputSections, we can't paralellize this.
if (config->icfLevel == ICFLevel::safe_thunks)
forEachClassRange(0, icfInputs.size(), [&](size_t begin, size_t end) {
applySafeThunksToRange(begin, end);
});

// Fold sections within equivalence classes
forEachClass([&](size_t begin, size_t end) {
if (end - begin < 2)
return;
bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;

// For ICF level safe_thunks, replace keepUnique function bodies with
// thunks. For all other ICF levles, directly merge the functions.

ConcatInputSection *beginIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i)
for (size_t i = begin + 1; i < end; ++i) {
// Skip keepUnique inputs when using safe_thunks (already handeled above)
if (useSafeThunks && icfInputs[i]->keepUnique) {
// Assert keepUnique sections are either small or replaced with thunks.
assert(!icfInputs[i]->live ||
icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
assert(!icfInputs[i]->replacement ||
icfInputs[i]->replacement->data.size() ==
target->getICFSafeThunkSize());
continue;
}
beginIsec->foldIdentical(icfInputs[i]);
}
});
}

Expand Down Expand Up @@ -421,11 +496,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
// can still fold it.
bool hasFoldableFlags = (isSelRefsSection(isec) ||
sectionType(isec->getFlags()) == MachO::S_REGULAR);

bool isCodeSec = isCodeSection(isec);

// When keepUnique is true, the section is not foldable. Unless we are at
// icf level safe_thunks, in which case we still want to fold code sections.
// When using safe_thunks we'll apply the safe_thunks logic at merge time
// based on the 'keepUnique' flag.
bool noUniqueRequirement =
!isec->keepUnique ||
((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);

// FIXME: consider non-code __text sections as foldable?
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
(isCodeSection(isec) || isFoldableWithAddendsRemoved ||
(isCodeSec || isFoldableWithAddendsRemoved ||
isGccExceptTabSection(isec)) &&
!isec->keepUnique && !isec->hasAltEntry &&
noUniqueRequirement && !isec->hasAltEntry &&
!isec->shouldOmitFromOutput() && hasFoldableFlags;
if (isFoldable) {
foldable.push_back(isec);
Expand Down
5 changes: 3 additions & 2 deletions lld/MachO/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,14 @@ const Reloc *InputSection::getRelocAt(uint32_t off) const {
return &*it;
}

void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
Symbol::ICFFoldKind foldKind) {
align = std::max(align, copy->align);
copy->live = false;
copy->wasCoalesced = true;
copy->replacement = this;
for (auto &copySym : copy->symbols)
copySym->wasIdenticalCodeFolded = true;
copySym->identicalCodeFoldingKind = foldKind;

symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
copy->symbols.clear();
Expand Down
3 changes: 2 additions & 1 deletion lld/MachO/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ class ConcatInputSection final : public InputSection {
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
void writeTo(uint8_t *buf);

void foldIdentical(ConcatInputSection *redundant);
void foldIdentical(ConcatInputSection *redundant,
Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
Expand Down
2 changes: 1 addition & 1 deletion lld/MachO/MapFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
}

static uint64_t getSymSizeForMap(Defined *sym) {
if (sym->wasIdenticalCodeFolded)
if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
return 0;
return sym->size;
}
Expand Down
2 changes: 1 addition & 1 deletion lld/MachO/Symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
bool interposable)
: Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
wasIdenticalCodeFolded(false),
identicalCodeFoldingKind(ICFFoldKind::None),
referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
weakDef(isWeakDef), external(isExternal), originalIsec(isec),
Expand Down
13 changes: 11 additions & 2 deletions lld/MachO/Symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ class Symbol {
AliasKind,
};

// Enum that describes the type of Identical Code Folding (ICF) applied to a
// symbol. This information is crucial for accurately representing symbol
// sizes in the map file.
enum ICFFoldKind {
None, // No folding is applied.
Body, // The entire body (function or data) is folded.
Thunk // The function body is folded into a single branch thunk.
};

virtual ~Symbol() {}

Kind kind() const { return symbolKind; }
Expand Down Expand Up @@ -154,8 +163,8 @@ class Defined : public Symbol {
bool privateExtern : 1;
// Whether this symbol should appear in the output symbol table.
bool includeInSymtab : 1;
// Whether this symbol was folded into a different symbol during ICF.
bool wasIdenticalCodeFolded : 1;
// The ICF folding kind of this symbol: None / Body / Thunk.
ICFFoldKind identicalCodeFoldingKind : 2;
// Symbols marked referencedDynamically won't be removed from the output's
// symbol table by tools like strip. In theory, this could be set on arbitrary
// symbols in input object files. In practice, it's used solely for the
Expand Down
3 changes: 2 additions & 1 deletion lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1231,7 +1231,8 @@ void SymtabSection::emitStabs() {

// Constant-folded symbols go in the executable's symbol table, but don't
// get a stabs entry unless --keep-icf-stabs flag is specified
if (!config->keepICFStabs && defined->wasIdenticalCodeFolded)
if (!config->keepICFStabs &&
defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
continue;

ObjFile *file = defined->getObjectFile();
Expand Down
10 changes: 10 additions & 0 deletions lld/MachO/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ class TargetInfo {
uint64_t selrefVA,
Symbol *objcMsgSend) const = 0;

// Init 'thunk' so that it be a direct jump to 'branchTarget'.
virtual void initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const {
llvm_unreachable("target does not support ICF safe thunks");
}

virtual uint32_t getICFSafeThunkSize() const {
llvm_unreachable("target does not support ICF safe thunks");
}

// Symbols may be referenced via either the GOT or the stubs section,
// depending on the relocation type. prepareSymbolRelocation() will set up the
// GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
Expand Down
Loading
Loading