-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[lld][elf] add safe-thunks in ELF #126695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lld][elf] add safe-thunks in ELF #126695
Conversation
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Pengying Xu (Colibrow) ChangesSee https://discourse.llvm.org/t/rfc-add-icf-safe-thunks-in-elf-like-macho/84584 for more detail. Patch is 28.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126695.diff 8 Files Affected:
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9538dd4a70baeb4..2c6b225963ae6c6 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -83,6 +83,8 @@ class AArch64 : public TargetInfo {
uint64_t val) const override;
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
+ void initICFSafeThunkBody(InputSection *thunk, Symbol *target) const override;
+ uint32_t getICFSafeThunkSize() const override;
private:
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -926,6 +928,18 @@ static bool needsGotForMemtag(const Relocation &rel) {
return rel.sym->isTagged() && needsGot(rel.expr);
}
+static constexpr uint8_t icfSafeThunkCode[] = {0x00, 0x00, 0x00, 0x14};
+
+void AArch64::initICFSafeThunkBody(InputSection *thunk, Symbol *target) const {
+ thunk->content_ = icfSafeThunkCode;
+ thunk->size = sizeof(icfSafeThunkCode);
+ thunk->relocations.push_back({R_PC, R_AARCH64_JUMP26, 0, 0, target});
+}
+
+uint32_t AArch64::getICFSafeThunkSize() const {
+ return sizeof(icfSafeThunkCode);
+}
+
void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
uint64_t secAddr = sec.getOutputSection()->addr;
if (auto *s = dyn_cast<InputSection>(&sec))
@@ -1060,7 +1074,7 @@ AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) {
}
void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
- const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
+ const uint8_t btiData[] = {0x5f, 0x24, 0x03, 0xd5}; // bti c
const uint8_t pltData[] = {
0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
@@ -1070,7 +1084,7 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
0x1f, 0x20, 0x03, 0xd5, // nop
0x1f, 0x20, 0x03, 0xd5 // nop
};
- const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
+ const uint8_t nopData[] = {0x1f, 0x20, 0x03, 0xd5}; // nop
uint64_t got = ctx.in.gotPlt->getVA();
uint64_t plt = ctx.in.plt->getVA();
@@ -1097,11 +1111,11 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
uint64_t pltEntryAddr) const {
// The PLT entry is of the form:
// [btiData] addrInst (pacBr | stdBr) [nopData]
- const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
+ const uint8_t btiData[] = {0x5f, 0x24, 0x03, 0xd5}; // bti c
const uint8_t addrInst[] = {
- 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
- 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
- 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
+ 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
+ 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
+ 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
};
const uint8_t pacHintBr[] = {
0x9f, 0x21, 0x03, 0xd5, // autia1716
@@ -1112,10 +1126,10 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
0x1f, 0x20, 0x03, 0xd5 // nop
};
const uint8_t stdBr[] = {
- 0x20, 0x02, 0x1f, 0xd6, // br x17
- 0x1f, 0x20, 0x03, 0xd5 // nop
+ 0x20, 0x02, 0x1f, 0xd6, // br x17
+ 0x1f, 0x20, 0x03, 0xd5 // nop
};
- const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
+ const uint8_t nopData[] = {0x1f, 0x20, 0x03, 0xd5}; // nop
// NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
// escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
@@ -1222,11 +1236,11 @@ void elf::createTaggedSymbols(Ctx &ctx) {
for (Symbol *symbol : file->getSymbols()) {
// See `addTaggedSymbolReferences` for more details.
- if (symbol->type != STT_OBJECT ||
- symbol->binding == STB_LOCAL)
+ if (symbol->type != STT_OBJECT || symbol->binding == STB_LOCAL)
continue;
auto it = taggedSymbolReferenceCount.find(symbol);
- if (it == taggedSymbolReferenceCount.end()) continue;
+ if (it == taggedSymbolReferenceCount.end())
+ continue;
unsigned &remainingAllowedTaggedRefs = it->second;
if (remainingAllowedTaggedRefs == 0) {
taggedSymbolReferenceCount.erase(it);
@@ -1247,7 +1261,7 @@ void elf::createTaggedSymbols(Ctx &ctx) {
// uses are tagged.
for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
assert(remainingTaggedRefs == 0 &&
- "Symbol is defined as tagged more times than it's used");
+ "Symbol is defined as tagged more times than it's used");
symbol->setIsTagged(true);
}
}
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index f132b11b20c631e..6cb72de29caa750 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -97,7 +97,7 @@ enum class CGProfileSortKind { None, Hfsort, Cdsort };
enum class DiscardPolicy { Default, All, Locals, None };
// For --icf={none,safe,all}.
-enum class ICFLevel { None, Safe, All };
+enum class ICFLevel { None, Safe, SafeThunks, All };
// For --strip-{all,debug}.
enum class StripPolicy { None, All, Debug };
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7d14180a4992622..af653ce37550c2a 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -808,11 +808,14 @@ static int getMemtagMode(Ctx &ctx, opt::InputArgList &args) {
}
static ICFLevel getICF(opt::InputArgList &args) {
- auto *arg = args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_all);
+ auto *arg = args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_safe_thunks,
+ OPT_icf_all);
if (!arg || arg->getOption().getID() == OPT_icf_none)
return ICFLevel::None;
if (arg->getOption().getID() == OPT_icf_safe)
return ICFLevel::Safe;
+ if (arg->getOption().getID() == OPT_icf_safe_thunks)
+ return ICFLevel::SafeThunks;
return ICFLevel::All;
}
@@ -1813,7 +1816,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
getPackDynRelocs(ctx, args);
}
- if (auto *arg = args.getLastArg(OPT_symbol_ordering_file)){
+ if (auto *arg = args.getLastArg(OPT_symbol_ordering_file)) {
if (args.hasArg(OPT_call_graph_ordering_file))
ErrAlways(ctx) << "--symbol-ordering-file and --call-graph-order-file "
"may not be used together";
@@ -2474,7 +2477,8 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) {
// Symbols in the dynsym could be address-significant in other executables
// or DSOs, so we conservatively mark them as address-significant.
- bool icfSafe = ctx.arg.icf == ICFLevel::Safe;
+ bool icfSafe =
+ (ctx.arg.icf == ICFLevel::Safe || ctx.arg.icf == ICFLevel::SafeThunks);
for (Symbol *sym : ctx.symtab->getSymbols())
if (sym->isExported)
markAddrsig(icfSafe, sym);
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 1cdcf6be9d8a93d..0c9e2161ed4eb61 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -77,9 +77,11 @@
#include "InputFiles.h"
#include "LinkerScript.h"
#include "OutputSections.h"
+#include "Relocations.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/Parallel.h"
@@ -121,6 +123,8 @@ template <class ELFT> class ICF {
void forEachClass(llvm::function_ref<void(size_t, size_t)> fn);
+ void applySafeThunksToRange(size_t begin, size_t end);
+
Ctx &ctx;
SmallVector<InputSection *, 0> sections;
@@ -160,10 +164,14 @@ template <class ELFT> class ICF {
}
// Returns true if section S is subject of ICF.
-static bool isEligible(InputSection *s) {
- if (!s->isLive() || s->keepUnique || !(s->flags & SHF_ALLOC))
+static bool isEligible(InputSection *s, bool safeThunksMode) {
+ if (!s->isLive() || (s->keepUnique && !safeThunksMode) ||
+ !(s->flags & SHF_ALLOC))
return false;
+ if (s->keepUnique)
+ return safeThunksMode && (s->flags & ELF::SHF_EXECINSTR);
+
// Don't merge writable sections. .data.rel.ro sections are marked as writable
// but are semantically read-only.
if ((s->flags & SHF_WRITE) && s->name != ".data.rel.ro" &&
@@ -459,6 +467,58 @@ static void combineRelocHashes(unsigned cnt, InputSection *isec,
isec->eqClass[(cnt + 1) % 2] = hash | (1U << 31);
}
+// Given a range of identical icfInputs, replace address significant functions
+// with a thunk that is just a direct branch to the first function in the
+// series. This way we keep only one main body of the function but we still
+// retain the address uniqueness of relevant functions by having them be a
+// direct branch thunk rather than containing a full copy of the actual function
+// body.
+template <class ELFT>
+void ICF<ELFT>::applySafeThunksToRange(size_t begin, size_t end) {
+ InputSection *masterIsec = sections[begin];
+
+ uint32_t thunkSize = ctx.target->getICFSafeThunkSize();
+ // If the functions we're dealing with are smaller than the thunk size, then
+ // just leave them all as-is - creating thunks would be a net loss.
+ if (masterIsec->getSize() <= thunkSize)
+ return;
+
+ // Find the symbol to create the thunk for.
+ Symbol *masterSym = nullptr;
+ for (Symbol *sym : masterIsec->file->getSymbols()) {
+ if (auto *d = dyn_cast<Defined>(sym)) {
+ if (d->section == masterIsec) {
+ masterSym = sym;
+ break;
+ }
+ }
+ }
+
+ if (!masterSym)
+ return;
+
+ for (size_t i = begin + 1; i < end; ++i) {
+ InputSection *isec = sections[i];
+ if (!isec->keepUnique)
+ break;
+
+ auto *thunk = make<InputSection>(*isec);
+ ctx.target->initICFSafeThunkBody(thunk, masterSym);
+ thunk->markLive();
+ auto *osec = isec->getParent();
+ auto *isd = cast<InputSectionDescription>(osec->commands.back());
+ isd->sections.push_back(thunk);
+ osec->commitSection(thunk);
+ isec->repl = thunk;
+ isec->markDead();
+
+ for (Symbol *sym : thunk->file->getSymbols())
+ if (auto *d = dyn_cast<Defined>(sym))
+ if (d->section == isec)
+ d->size = thunkSize;
+ }
+}
+
// The main function of ICF.
template <class ELFT> void ICF<ELFT>::run() {
// Two text sections may have identical content and relocations but different
@@ -475,10 +535,11 @@ template <class ELFT> void ICF<ELFT>::run() {
[&](InputSection &s) { s.eqClass[0] = s.eqClass[1] = ++uniqueId; });
// Collect sections to merge.
+ bool safeThunksMode = ctx.arg.icf == ICFLevel::SafeThunks;
for (InputSectionBase *sec : ctx.inputSections) {
auto *s = dyn_cast<InputSection>(sec);
if (s && s->eqClass[0] == 0) {
- if (isEligible(s))
+ if (isEligible(s, safeThunksMode))
sections.push_back(s);
else
// Ineligible sections are assigned unique IDs, i.e. each section
@@ -510,9 +571,13 @@ template <class ELFT> void ICF<ELFT>::run() {
// From now on, sections in Sections vector are ordered so that sections
// in the same equivalence class are consecutive in the vector.
- llvm::stable_sort(sections, [](const InputSection *a, const InputSection *b) {
- return a->eqClass[0] < b->eqClass[0];
- });
+ llvm::stable_sort(
+ sections, [safeThunksMode](const InputSection *a, const InputSection *b) {
+ if (safeThunksMode)
+ if (a->eqClass[0] == b->eqClass[0])
+ return a->keepUnique > b->keepUnique;
+ return a->eqClass[0] < b->eqClass[0];
+ });
// Compare static contents and assign unique equivalence class IDs for each
// static content. Use a base offset for these IDs to ensure no overlap with
@@ -535,12 +600,19 @@ template <class ELFT> void ICF<ELFT>::run() {
auto print = [&ctx = ctx]() -> ELFSyncStream {
return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None};
};
+ if (safeThunksMode)
+ forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) {
+ applySafeThunksToRange(begin, end);
+ });
+
// Merge sections by the equivalence class.
forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) {
if (end - begin == 1)
return;
print() << "selected section " << sections[begin];
for (size_t i = begin + 1; i < end; ++i) {
+ if (safeThunksMode && sections[i]->keepUnique)
+ continue;
print() << " removing identical section " << sections[i];
sections[begin]->replace(sections[i]);
diff --git a/lld/ELF/ICF.h b/lld/ELF/ICF.h
index b126c889ea8636d..1f4b96ca1de2f64 100644
--- a/lld/ELF/ICF.h
+++ b/lld/ELF/ICF.h
@@ -9,8 +9,10 @@
#ifndef LLD_ELF_ICF_H
#define LLD_ELF_ICF_H
+#include "Target.h"
namespace lld::elf {
struct Ctx;
+class TargetInfo;
template <class ELFT> void doIcf(Ctx &);
}
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index b3b12a064687580..9f9eeb87656e98a 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -293,6 +293,8 @@ def icf_all: F<"icf=all">, HelpText<"Enable identical code folding">;
def icf_safe: F<"icf=safe">, HelpText<"Enable safe identical code folding">;
+def icf_safe_thunks: F<"icf=safe_thunks">, HelpText<"Enable safe identical code folding with thunks">;
+
def icf_none: F<"icf=none">, HelpText<"Disable identical code folding (default)">;
def ignore_function_address_equality: FF<"ignore-function-address-equality">,
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index fd1e5d33c438af6..150357a17f20e2f 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -16,6 +16,7 @@
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <array>
@@ -102,6 +103,13 @@ class TargetInfo {
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
JumpModType val) const {}
+ // Initialize the body of the safe thunk in ICF for the target.
+ virtual void initICFSafeThunkBody(InputSection *thunk, Symbol *target) const {
+ llvm_unreachable("target does not support ICF safe thunks");
+ }
+ // Returns the size of the safe thunk in ICF for the target.
+ virtual uint32_t getICFSafeThunkSize() const { return 0; }
+
virtual ~TargetInfo();
// This deletes a jump insn at the end of the section if it is a fall thru to
diff --git a/lld/test/ELF/icf-safe-thunks.s b/lld/test/ELF/icf-safe-thunks.s
new file mode 100644
index 000000000000000..12edf4ae76e2a7b
--- /dev/null
+++ b/lld/test/ELF/icf-safe-thunks.s
@@ -0,0 +1,397 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
+# RUN: ld.lld a.o -o a.so --icf=safe_thunks --print-icf-sections | FileCheck %s
+# RUN: llvm-objdump a.so -d | FileCheck %s --check-prefixes=CHECK-ARM64
+
+# CHECK: selected section a.o:(.text.func_3identical_v1_canmerge)
+# CHECK: selected section a.o:(.text.func_call_thunked_1_nomerge)
+# CHECK: selected section a.o:(.text.func_unique_2_canmerge)
+# CHECK: selected section a.o:(.text.func_3identical_v1)
+
+# CHECK-ARM64: func_unique_1
+# CHECK-ARM64-NEXT: adrp x8, {{.*}}
+# CHECK-ARM64-NEXT: mov w9, #0x1
+
+# CHECK-ARM64: func_unique_2_canmerge
+# CHECK-ARM64-NEXT: adrp x8, {{.*}}
+# CHECK-ARM64-NEXT: mov w9, #0x2
+
+# CHECK-ARM64: func_3identical_v1
+# CHECK-ARM64-NEXT: adrp x8, {{.*}}
+# CHECK-ARM64-NEXT: mov w9, #0x3
+
+# CHECK-ARM64: func_3identical_v1_canmerge
+# CHECK-ARM64-NEXT: adrp x8, {{.*}}
+
+# CHECK-ARM64: func_call_thunked_1_nomerge
+# CHECK-ARM64-NEXT: stp x29, x30, [sp, #-0x10]!
+
+# CHECK-ARM64: <func_3identical_v2_canmerge>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_3identical_v1_canmerge>
+# CHECK-ARM64: <func_3identical_v3_canmerge>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_3identical_v1_canmerge>
+# CHECK-ARM64: <func_call_thunked_2_nomerge>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_call_thunked_1_nomerge>
+# CHECK-ARM64: <func_call_thunked_2_merge>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_call_thunked_1_nomerge>
+# CHECK-ARM64: <func_2identical_v1>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_unique_2_canmerge>
+# CHECK-ARM64: <func_2identical_v2>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_unique_2_canmerge>
+# CHECK-ARM64: <func_3identical_v2>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_3identical_v1>
+# CHECK-ARM64: <func_3identical_v3>:
+# CHECK-ARM64-NEXT: b 0x[[#%.6x,]] <func_3identical_v1>
+
+;--- a.cc
+#define ATTR __attribute__((noinline,used,retain)) extern "C"
+typedef unsigned long long ULL;
+
+volatile char g_val = 0;
+void *volatile g_ptr = 0;
+
+ATTR void func_unique_1() { g_val = 1; }
+
+ATTR void func_unique_2_canmerge() { g_val = 2; }
+
+ATTR void func_2identical_v1() { g_val = 2; }
+
+ATTR void func_2identical_v2() { g_val = 2; }
+
+ATTR void func_3identical_v1() { g_val = 3; }
+
+ATTR void func_3identical_v2() { g_val = 3; }
+
+ATTR void func_3identical_v3() { g_val = 3; }
+
+ATTR void func_3identical_v1_canmerge() { g_val = 33; }
+
+ATTR void func_3identical_v2_canmerge() { g_val = 33; }
+
+ATTR void func_3identical_v3_canmerge() { g_val = 33; }
+
+ATTR void func_call_thunked_1_nomerge() {
+ func_2identical_v1();
+ g_val = 77;
+}
+
+ATTR void func_call_thunked_2_nomerge() {
+ func_2identical_v2();
+ g_val = 77;
+}
+
+ATTR void func_call_thunked_2_merge() {
+ func_2identical_v2();
+ g_val = 77;
+}
+
+ATTR void call_all_funcs() {
+ func_unique_1();
+ func_unique_2_canmerge();
+ func_2identical_v1();
+ func_2identical_v2();
+ func_3identical_v1();
+ func_3identical_v2();
+ func_3identical_v3();
+ func_3identical_v1_canmerge();
+ func_3identical_v2_canmerge();
+ func_3identical_v3_canmerge();
+}
+
+ATTR void take_func_addr() {
+ g_ptr = (void*)func_unique_1;
+ g_ptr = (void*)func_2identical_v1;
+ g_ptr = (void*)func_2identical_v2;
+ g_ptr = (void*)func_3identical_v1;
+ g_ptr = (void*)func_3identical_v2;
+ g_ptr = (void*)func_3identical_v3;
+}
+
+ATTR int _start() { return 0; }
+
+;--- gen
+clang --target=aarch64-linux-gnu -O3 -ffunction-sections -fdata-sections -fno-asynchronous-unwind-tables -S a.cc -o -
+
+;--- a.s
+ .file "a.cc"
+ .section .text.func_unique_1,"axR",@progbits
+ .globl func_unique_1 // -- Begin function func_unique_1
+ .p2align 2
+ .type func_unique_1,@function
+func_unique_1: // @func_unique_1
+// %bb.0: // %entry
+ adrp x8, g_val
+ mov w9, #1 // =0x1
+ strb w9, [x8, :lo12:g_val]
+ ret
+.Lfunc_end0:
+ .size func_unique_1, .Lfunc_end0-func_unique_1
+ // -- End function
+ .section .text.func_unique_2_canmerge,"axR",@progbits
+ .globl func_unique_2_canmerge // -- Begin function func_unique_2_canmerge
+ .p2align 2
+ .type func_unique_2_canmerge,@function
+func_unique_2_canmerge: // @func_unique_2_canmerge
+// %bb.0: // %entry
+ adrp x8, g_val
+ mov w9, #2 // =0x2
+ strb w9, [x8, :lo12:g_val]
+ ret
+.Lfunc_end1:
+ .size func_unique_2_canmerge, .Lfunc_end1-func_unique_2_canmerge
+ // -- End function
+ .section .text.func_2identical_v1,"axR",@progbits
+ .globl func_2identical_v1 // -- Begin function func_2identical_v1
+ .p2align 2
+ .type func_2identical_v1,@function
+func_2identical_v1: // @func_2identical_v1
+// %bb.0: // %entry
+ adrp x8, g_val
+ mov w9, #2 // =0x2
+ strb w9, [x8, :lo12:g_val]
+ ret
+.Lfunc_end2:
+ .size func_2identical_v1, .Lfunc_end2-func_2identical_v1
+ // -- End function
+ .section .text.func_2identical_v2,"axR",@progbits
+ .globl func_2identical_v2 // -- Begin function func_2identical_v2
+ .p2align 2
+ .type func_2identical_v2,@function
+func_2identical_v2: // @func_2identical_v2
+// %bb.0: // %entry
+ adrp x8, g_val
+ mov w9, #2 // =0x2
+ strb w9, [x8, :lo12:g_val]
+ ret
+.Lfunc_end3:
+ .size func_2identical_v2, .Lfunc_end3-func_2identical_v2
+ // -- End function
...
[truncated]
|
40efd49
to
af4668c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've not gone over the code changes in detail. Mainly wanted to offer the perspective that this may get better results if it is integrated with the existing thunk generation, or at least delayed until addresses have been assigned.
Right now if you have a particularly large binary, for example a fully instrumented chrome can get close to 200 MiB in size, then it is possible for the icf thunk to be out of range of the preserved function. This will cause another thunk to be generated using an indirect branch. We may have replaced bl foo
in the original to bl foo_icf
; b foo_icf_longbranch
, adrp; add; br
. Which is 16 extra bytes rather than 4, as well as an indirect branch (which may require a BTI to be inserted for another 4-bytes).
If handled later when addresses are known, then we would have more information on whether it is profitable to do the transformation or not. There's also the possibility of using the existing thunk generation code.
I think this would require something like ICF to label candidates that could be handled with thunks, but not removing them early, then handling them after addresses had been generated.
That's definitely more work to do, but I think it would get better results for the largest of binaries. An alternative perspective is that we advise that the option is not used on binaries over 128-MiB in size.
ATTR int _start() { return 0; } | ||
|
||
;--- gen | ||
clang --target=aarch64-linux-gnu -O3 -ffunction-sections -fdata-sections -fno-asynchronous-unwind-tables -S a.cc -o - |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The lld tests can't use clang as the tests can be run without building clang. Essentially anything that is built as a result of -DLLVM_ENABLE_PROJECTS="lld"
(and no runtimes) can be used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the safe_thunks mechanism has limitations on jump range handling in large binary builds (e.g., #106573 (comment), #106573 (comment)). A more general implementation approach might be preferable – since the primary goal is mobile build size reduction, suggesting a limit not exceeding 128MB could be reasonable.
The mentioned clang command relates to LLVM's elaborated testing framework and isn't part of actual lld test deployments.
lld/ELF/ICF.cpp
Outdated
|
||
// Find the symbol to create the thunk for. | ||
Symbol *masterSym = nullptr; | ||
for (Symbol *sym : masterIsec->file->getSymbols()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks sslllooowwww - especially if creating lots of thunks. Would be curious on how much this slows down processing.
lld/ELF/ICF.cpp
Outdated
Symbol *masterSym = nullptr; | ||
for (Symbol *sym : masterIsec->file->getSymbols()) { | ||
if (auto *d = dyn_cast<Defined>(sym)) { | ||
if (d->section == masterIsec) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is enough, we also want to check that the offset is 0 within the section - we don't want the thunk to be pointing to the middle of the function. Can we just use getEnclosingSymbol
?
lld/ELF/ICF.cpp
Outdated
for (Symbol *sym : thunk->file->getSymbols()) | ||
if (auto *d = dyn_cast<Defined>(sym)) | ||
if (d->section == isec) | ||
d->size = thunkSize; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we also want to set offset to 0.
lld/ELF/ICF.h
Outdated
@@ -9,8 +9,10 @@ | |||
#ifndef LLD_ELF_ICF_H | |||
#define LLD_ELF_ICF_H | |||
|
|||
#include "Target.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need to touch this file ?
lld/ELF/Target.h
Outdated
llvm_unreachable("target does not support ICF safe thunks"); | ||
} | ||
// Returns the size of the safe thunk in ICF for the target. | ||
virtual uint32_t getICFSafeThunkSize() const { return 0; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this one should also be llvm_unreachable
The safe_thunks mode (a more canonical spelling should be Regarding the range extension thunk framework: The current implementation, using an R_AARCH64_JUMP26 relocation within With address information, we could replace Frankly, I am not convinced that adding this safe_thunks feature is the right move. ( Debug information associated to folded functions is essentially redirected. safe_thunks might be worse as the debugger will not stop at the first instruction (the branch). (Pasted to https://discourse.llvm.org/t/rfc-add-icf-safe-thunks-in-elf-like-macho/84584) |
Yes. I understand the concerns about the limitations of Update: (Pasted to https://discourse.llvm.org/t/rfc-add-icf-safe-thunks-in-elf-like-macho/84584) |
See https://discourse.llvm.org/t/rfc-add-icf-safe-thunks-in-elf-like-macho/84584 for more detail.
Port from #106573.