Skip to content

Commit 27084e6

Browse files
smithp35Sterling-Augustine
authored andcommitted
[LLD][ELF][AArch64] Add BTI Aware long branch thunks (llvm#108989)
When Branch Target Identification BTI is enabled all indirect branches must target a BTI instruction. A long branch thunk is a source of indirect branches. To date LLD has been assuming that the object producer is responsible for putting a BTI instruction at all places the linker might generate an indirect branch to. This is true for clang, but not for GCC. GCC will elide the BTI instruction when it can prove that there are no indirect branches from outside the translation unit(s). GNU ld was fixed to generate a landing pad stub (gnu ld speak for thunk) for the destination when a long range stub was needed [1]. This means that using GCC compiled objects with LLD may lead to LLD generating an indirect branch to a location without a BTI. The ABI [2] has also been clarified to say that it is a static linker's responsibility to generate a landing pad when the target does not have a BTI. This patch implements the same mechansim as GNU ld. When the output ELF file is setting the GNU_PROPERTY_AARCH64_FEATURE_1_BTI property, then we check the destination to see if it has a BTI instruction. If it does not we generate a landing pad consisting of: BTI c B <destination> The B <destination> can be elided if the thunk can be placed so that control flow drops through. For example: BTI c <destination>: This will be common when -ffunction-sections is used. The landing pad thunks are effectively alternative entry points for the function. Direct branches are unaffected but any linker generated indirect branch needs to use the alternative. We place these as close as possible to the destination section. There is some further optimization possible. Consider the case: .text fn1 ... fn2 ... If we need landing pad thunks for both fn1 and fn2 we could order them so that the thunk for fn1 immediately precedes fn1. This could save a single branch. However I didn't think that would be worth the additional complexity. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106671 [2] ARM-software/abi-aa#196
1 parent 1072611 commit 27084e6

File tree

7 files changed

+669
-12
lines changed

7 files changed

+669
-12
lines changed

lld/ELF/Arch/AArch64.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,38 @@ uint64_t elf::getAArch64Page(uint64_t expr) {
2828
return expr & ~static_cast<uint64_t>(0xFFF);
2929
}
3030

31+
// A BTI landing pad is a valid target for an indirect branch when the Branch
32+
// Target Identification has been enabled. As linker generated branches are
33+
// via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
34+
// PACIBSP.
35+
bool elf::isAArch64BTILandingPad(Symbol &s, int64_t a) {
36+
// PLT entries accessed indirectly have a BTI c.
37+
if (s.isInPlt())
38+
return true;
39+
Defined *d = dyn_cast<Defined>(&s);
40+
if (!isa_and_nonnull<InputSection>(d->section))
41+
// All places that we cannot disassemble are responsible for making
42+
// the target a BTI landing pad.
43+
return true;
44+
InputSection *isec = cast<InputSection>(d->section);
45+
uint64_t off = d->value + a;
46+
// Likely user error, but protect ourselves against out of bounds
47+
// access.
48+
if (off >= isec->getSize())
49+
return true;
50+
const uint8_t *buf = isec->content().begin();
51+
const uint32_t instr = read32le(buf + off);
52+
// All BTI instructions are HINT instructions which all have same encoding
53+
// apart from bits [11:5]
54+
if ((instr & 0xd503201f) == 0xd503201f &&
55+
is_contained({/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
56+
/*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
57+
/*BTI JC*/ 0xd50324df},
58+
instr))
59+
return true;
60+
return false;
61+
}
62+
3163
namespace {
3264
class AArch64 : public TargetInfo {
3365
public:

lld/ELF/Relocations.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2265,6 +2265,15 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
22652265
return std::make_pair(t, true);
22662266
}
22672267

2268+
std::pair<Thunk *, bool> ThunkCreator::getSyntheticLandingPad(Defined &d,
2269+
int64_t a) {
2270+
auto [it, isNew] = landingPadsBySectionAndAddend.try_emplace(
2271+
{{d.section, d.value}, a}, nullptr);
2272+
if (isNew)
2273+
it->second = addLandingPadThunk(ctx, d, a);
2274+
return {it->second, isNew};
2275+
}
2276+
22682277
// Return true if the relocation target is an in range Thunk.
22692278
// Return false if the relocation is not to a Thunk. If the relocation target
22702279
// was originally to a Thunk, but is no longer in range we revert the
@@ -2348,6 +2357,20 @@ bool ThunkCreator::createThunks(uint32_t pass,
23482357
ts = getISDThunkSec(os, isec, isd, rel, src);
23492358
ts->addThunk(t);
23502359
thunks[t->getThunkTargetSym()] = t;
2360+
2361+
// When indirect branches are restricted, such as AArch64 BTI
2362+
// Thunks may need to target a linker generated landing pad
2363+
// instead of the target.
2364+
if (t->needsSyntheticLandingPad()) {
2365+
Thunk *lpt;
2366+
auto &dr = cast<Defined>(t->destination);
2367+
std::tie(lpt, isNew) = getSyntheticLandingPad(dr, t->addend);
2368+
if (isNew) {
2369+
ts = getISThunkSec(cast<InputSection>(dr.section));
2370+
ts->addThunk(lpt);
2371+
}
2372+
t->landingPad = lpt->getThunkTargetSym();
2373+
}
23512374
}
23522375

23532376
// Redirect relocation to Thunk, we never go via the PLT to a Thunk

lld/ELF/Relocations.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
namespace lld::elf {
1919
struct Ctx;
20+
class Defined;
2021
class Symbol;
2122
class InputSection;
2223
class InputSectionBase;
@@ -175,6 +176,8 @@ class ThunkCreator {
175176
std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
176177
uint64_t src);
177178

179+
std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
180+
178181
ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
179182
uint64_t off);
180183

@@ -201,9 +204,18 @@ class ThunkCreator {
201204
// Track InputSections that have an inline ThunkSection placed in front
202205
// an inline ThunkSection may have control fall through to the section below
203206
// so we need to make sure that there is only one of them.
204-
// The Mips LA25 Thunk is an example of an inline ThunkSection.
207+
// The Mips LA25 Thunk is an example of an inline ThunkSection, as is
208+
// the AArch64BTLandingPadThunk.
205209
llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
206210

211+
// Record landing pads, generated for a section + offset destination.
212+
// Landling pads are alternative entry points for destinations that need
213+
// to be reached via thunks that use indirect branches. A destination
214+
// needs at most one landing pad as that can be reused by all callers.
215+
llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
216+
Thunk *>
217+
landingPadsBySectionAndAddend;
218+
207219
// The number of completed passes of createThunks this permits us
208220
// to do one time initialization on Pass 0 and put a limit on the
209221
// number of times it can be called to prevent infinite loops.

lld/ELF/Target.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn);
232232
void addPPC64SaveRestore();
233233
uint64_t getPPC64TocBase();
234234
uint64_t getAArch64Page(uint64_t expr);
235+
bool isAArch64BTILandingPad(Symbol &s, int64_t a);
235236
template <typename ELFT> void writeARMCmseImportLib();
236237
uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type);
237238
void riscvFinalizeRelax(int passes);

lld/ELF/Thunks.cpp

Lines changed: 107 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,20 @@ namespace {
5151
// distance from the thunk to the target is less than 128MB. Long thunks can
5252
// branch to any virtual address and they are implemented in the derived
5353
// classes. This class tries to create a short thunk if the target is in range,
54-
// otherwise it creates a long thunk.
54+
// otherwise it creates a long thunk. When BTI is enabled indirect branches
55+
// must land on a BTI instruction. If the destination does not have a BTI
56+
// instruction mayNeedLandingPad is set to true and Thunk::landingPad points
57+
// to an alternative entry point with a BTI.
5558
class AArch64Thunk : public Thunk {
5659
public:
57-
AArch64Thunk(Ctx &ctx, Symbol &dest, int64_t addend)
58-
: Thunk(ctx, dest, addend) {}
60+
AArch64Thunk(Ctx &ctx, Symbol &dest, int64_t addend, bool mayNeedLandingPad)
61+
: Thunk(ctx, dest, addend), mayNeedLandingPad(mayNeedLandingPad) {}
5962
bool getMayUseShortThunk();
6063
void writeTo(uint8_t *buf) override;
64+
bool needsSyntheticLandingPad() override;
65+
66+
protected:
67+
bool mayNeedLandingPad;
6168

6269
private:
6370
bool mayUseShortThunk = true;
@@ -67,8 +74,9 @@ class AArch64Thunk : public Thunk {
6774
// AArch64 long range Thunks.
6875
class AArch64ABSLongThunk final : public AArch64Thunk {
6976
public:
70-
AArch64ABSLongThunk(Ctx &ctx, Symbol &dest, int64_t addend)
71-
: AArch64Thunk(ctx, dest, addend) {}
77+
AArch64ABSLongThunk(Ctx &ctx, Symbol &dest, int64_t addend,
78+
bool mayNeedLandingPad)
79+
: AArch64Thunk(ctx, dest, addend, mayNeedLandingPad) {}
7280
uint32_t size() override { return getMayUseShortThunk() ? 4 : 16; }
7381
void addSymbols(ThunkSection &isec) override;
7482

@@ -78,15 +86,36 @@ class AArch64ABSLongThunk final : public AArch64Thunk {
7886

7987
class AArch64ADRPThunk final : public AArch64Thunk {
8088
public:
81-
AArch64ADRPThunk(Ctx &ctx, Symbol &dest, int64_t addend)
82-
: AArch64Thunk(ctx, dest, addend) {}
89+
AArch64ADRPThunk(Ctx &ctx, Symbol &dest, int64_t addend,
90+
bool mayNeedLandingPad)
91+
: AArch64Thunk(ctx, dest, addend, mayNeedLandingPad) {}
8392
uint32_t size() override { return getMayUseShortThunk() ? 4 : 12; }
8493
void addSymbols(ThunkSection &isec) override;
8594

8695
private:
8796
void writeLong(uint8_t *buf) override;
8897
};
8998

99+
// AArch64 BTI Landing Pad
100+
// When BTI is enabled indirect branches must land on a BTI
101+
// compatible instruction. When the destination does not have a
102+
// BTI compatible instruction a Thunk doing an indirect branch
103+
// targets a Landing Pad Thunk that direct branches to the target.
104+
class AArch64BTILandingPadThunk final : public Thunk {
105+
public:
106+
AArch64BTILandingPadThunk(Ctx &ctx, Symbol &dest, int64_t addend)
107+
: Thunk(ctx, dest, addend) {}
108+
109+
uint32_t size() override { return getMayUseShortThunk() ? 4 : 8; }
110+
void addSymbols(ThunkSection &isec) override;
111+
void writeTo(uint8_t *buf) override;
112+
113+
private:
114+
bool getMayUseShortThunk();
115+
void writeLong(uint8_t *buf);
116+
bool mayUseShortThunk = true;
117+
};
118+
90119
// Base class for ARM thunks.
91120
//
92121
// An ARM thunk may be either short or long. A short thunk is simply a branch
@@ -545,6 +574,12 @@ void AArch64Thunk::writeTo(uint8_t *buf) {
545574
ctx.target->relocateNoSym(buf, R_AARCH64_CALL26, s - p);
546575
}
547576

577+
bool AArch64Thunk::needsSyntheticLandingPad() {
578+
// Short Thunks use a direct branch, no synthetic landing pad
579+
// required.
580+
return mayNeedLandingPad && !getMayUseShortThunk();
581+
}
582+
548583
// AArch64 long range Thunks.
549584
void AArch64ABSLongThunk::writeLong(uint8_t *buf) {
550585
const uint8_t data[] = {
@@ -553,7 +588,11 @@ void AArch64ABSLongThunk::writeLong(uint8_t *buf) {
553588
0x00, 0x00, 0x00, 0x00, // L0: .xword S
554589
0x00, 0x00, 0x00, 0x00,
555590
};
556-
uint64_t s = getAArch64ThunkDestVA(destination, addend);
591+
// If mayNeedLandingPad is true then destination is an
592+
// AArch64BTILandingPadThunk that defines landingPad.
593+
assert(!mayNeedLandingPad || landingPad != nullptr);
594+
uint64_t s = mayNeedLandingPad ? landingPad->getVA(0)
595+
: getAArch64ThunkDestVA(destination, addend);
557596
memcpy(buf, data, sizeof(data));
558597
ctx.target->relocateNoSym(buf + 8, R_AARCH64_ABS64, s);
559598
}
@@ -577,7 +616,11 @@ void AArch64ADRPThunk::writeLong(uint8_t *buf) {
577616
0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest)
578617
0x00, 0x02, 0x1f, 0xd6, // br x16
579618
};
580-
uint64_t s = getAArch64ThunkDestVA(destination, addend);
619+
// if mayNeedLandingPad is true then destination is an
620+
// AArch64BTILandingPadThunk that defines landingPad.
621+
assert(!mayNeedLandingPad || landingPad != nullptr);
622+
uint64_t s = mayNeedLandingPad ? landingPad->getVA(0)
623+
: getAArch64ThunkDestVA(destination, addend);
581624
uint64_t p = getThunkTargetSym()->getVA();
582625
memcpy(buf, data, sizeof(data));
583626
ctx.target->relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
@@ -591,6 +634,47 @@ void AArch64ADRPThunk::addSymbols(ThunkSection &isec) {
591634
addSymbol("$x", STT_NOTYPE, 0, isec);
592635
}
593636

637+
void AArch64BTILandingPadThunk::addSymbols(ThunkSection &isec) {
638+
addSymbol(saver().save("__AArch64BTIThunk_" + destination.getName()),
639+
STT_FUNC, 0, isec);
640+
addSymbol("$x", STT_NOTYPE, 0, isec);
641+
}
642+
643+
void AArch64BTILandingPadThunk::writeTo(uint8_t *buf) {
644+
if (!getMayUseShortThunk()) {
645+
writeLong(buf);
646+
return;
647+
}
648+
write32(buf, 0xd503245f); // BTI c
649+
// Control falls through to target in following section.
650+
}
651+
652+
bool AArch64BTILandingPadThunk::getMayUseShortThunk() {
653+
if (!mayUseShortThunk)
654+
return false;
655+
// If the target is the following instruction then we can fall
656+
// through without the indirect branch.
657+
uint64_t s = destination.getVA(addend);
658+
uint64_t p = getThunkTargetSym()->getVA();
659+
// This function is called before addresses are stable. We need to
660+
// work out the range from the thunk to the next section but the
661+
// address of the start of the next section depends on the size of
662+
// the thunks in the previous pass. s - p + offset == 0 represents
663+
// the first pass where the Thunk and following section are assigned
664+
// the same offset. s - p <= 4 is the last Thunk in the Thunk
665+
// Section.
666+
mayUseShortThunk = (s - p + offset == 0 || s - p <= 4);
667+
return mayUseShortThunk;
668+
}
669+
670+
void AArch64BTILandingPadThunk::writeLong(uint8_t *buf) {
671+
uint64_t s = destination.getVA(addend);
672+
uint64_t p = getThunkTargetSym()->getVA() + 4;
673+
write32(buf, 0xd503245f); // BTI c
674+
write32(buf + 4, 0x14000000); // B S
675+
ctx.target->relocateNoSym(buf + 4, R_AARCH64_CALL26, s - p);
676+
}
677+
594678
// ARM Target Thunks
595679
static uint64_t getARMThunkDestVA(const Symbol &s) {
596680
uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA();
@@ -1279,9 +1363,12 @@ static Thunk *addThunkAArch64(Ctx &ctx, RelType type, Symbol &s, int64_t a) {
12791363
if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
12801364
type != R_AARCH64_PLT32)
12811365
fatal("unrecognized relocation type");
1366+
bool mayNeedLandingPad =
1367+
(ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) &&
1368+
!isAArch64BTILandingPad(s, a);
12821369
if (ctx.arg.picThunk)
1283-
return make<AArch64ADRPThunk>(ctx, s, a);
1284-
return make<AArch64ABSLongThunk>(ctx, s, a);
1370+
return make<AArch64ADRPThunk>(ctx, s, a, mayNeedLandingPad);
1371+
return make<AArch64ABSLongThunk>(ctx, s, a, mayNeedLandingPad);
12851372
}
12861373

12871374
// Creates a thunk for long branches or Thumb-ARM interworking.
@@ -1495,3 +1582,12 @@ Thunk *elf::addThunk(Ctx &ctx, const InputSection &isec, Relocation &rel) {
14951582
llvm_unreachable("add Thunk only supported for ARM, AVR, Mips and PowerPC");
14961583
}
14971584
}
1585+
1586+
Thunk *elf::addLandingPadThunk(Ctx &ctx, Symbol &s, int64_t a) {
1587+
switch (ctx.arg.emachine) {
1588+
case EM_AARCH64:
1589+
return make<AArch64BTILandingPadThunk>(ctx, s, a);
1590+
default:
1591+
llvm_unreachable("add landing pad only supported for AArch64");
1592+
}
1593+
}

lld/ELF/Thunks.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,18 @@ class Thunk {
5555
return true;
5656
}
5757

58+
// Thunks that indirectly branch to targets may need a synthetic landing
59+
// pad generated close to the target. For example AArch64 when BTI is
60+
// enabled.
61+
virtual bool needsSyntheticLandingPad() { return false; }
62+
5863
Defined *getThunkTargetSym() const { return syms[0]; }
5964

6065
Ctx &ctx;
6166
Symbol &destination;
6267
int64_t addend;
68+
// Alternative target when indirect branch to destination can't be used.
69+
Symbol *landingPad = nullptr;
6370
llvm::SmallVector<Defined *, 3> syms;
6471
uint64_t offset = 0;
6572
// The alignment requirement for this Thunk, defaults to the size of the
@@ -71,6 +78,10 @@ class Thunk {
7178
// ThunkSection.
7279
Thunk *addThunk(Ctx &, const InputSection &isec, Relocation &rel);
7380

81+
// Create a landing pad Thunk for use when indirect branches from Thunks
82+
// are restricted.
83+
Thunk *addLandingPadThunk(Ctx &, Symbol &s, int64_t a);
84+
7485
void writePPC32PltCallStub(Ctx &, uint8_t *buf, uint64_t gotPltVA,
7586
const InputFile *file, int64_t addend);
7687
void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset);

0 commit comments

Comments
 (0)