Skip to content

Commit cc53b95

Browse files
authored
[AArch64] When hardening against SLS, only create called thunks (#97472)
In preparation for implementing hardening of BLRA* instructions, restrict thunk function generation to only the thunks being actually called from any function. As described in the existing comments, emitting all possible thunks for BLRAA and BLRAB instructions would mean adding about 1800 functions in total, most of which are likely not to be called. This commit merges AArch64SLSHardening class into SLSBLRThunkInserter, so thunks can be created as needed while rewriting a machine function. The usages of TII, TRI and ST fields of AArch64SLSHardening class are replaced with requesting them in-place, as ThunkInserter assumes multiple "entry points" in contrast to the only runOnMachineFunction method of AArch64SLSHardening. The runOnMachineFunction method essentially replaces pre-existing insertThunks implementation as there is no more need to insert all possible thunks unconditionally. Instead, thunks are created on first use from inside of insertThunks method.
1 parent 6fbd26b commit cc53b95

File tree

8 files changed

+123
-200
lines changed

8 files changed

+123
-200
lines changed

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
4040
FunctionPass *createAArch64StorePairSuppressPass();
4141
FunctionPass *createAArch64ExpandPseudoPass();
4242
FunctionPass *createAArch64SLSHardeningPass();
43-
FunctionPass *createAArch64IndirectThunks();
4443
FunctionPass *createAArch64SpeculationHardeningPass();
4544
FunctionPass *createAArch64LoadStoreOptimizationPass();
4645
ModulePass *createAArch64LowerHomogeneousPrologEpilogPass();

llvm/lib/Target/AArch64/AArch64SLSHardening.cpp

Lines changed: 105 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,16 @@
1313

1414
#include "AArch64InstrInfo.h"
1515
#include "AArch64Subtarget.h"
16-
#include "Utils/AArch64BaseInfo.h"
1716
#include "llvm/CodeGen/IndirectThunks.h"
1817
#include "llvm/CodeGen/MachineBasicBlock.h"
1918
#include "llvm/CodeGen/MachineFunction.h"
20-
#include "llvm/CodeGen/MachineFunctionPass.h"
2119
#include "llvm/CodeGen/MachineInstr.h"
2220
#include "llvm/CodeGen/MachineInstrBuilder.h"
2321
#include "llvm/CodeGen/MachineOperand.h"
24-
#include "llvm/CodeGen/MachineRegisterInfo.h"
2522
#include "llvm/CodeGen/RegisterScavenging.h"
2623
#include "llvm/IR/DebugLoc.h"
2724
#include "llvm/Pass.h"
28-
#include "llvm/Support/CodeGen.h"
29-
#include "llvm/Support/Debug.h"
25+
#include "llvm/Support/ErrorHandling.h"
3026
#include "llvm/Target/TargetMachine.h"
3127
#include <cassert>
3228

@@ -36,38 +32,42 @@ using namespace llvm;
3632

3733
#define AARCH64_SLS_HARDENING_NAME "AArch64 sls hardening pass"
3834

35+
static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
36+
3937
namespace {
4038

41-
class AArch64SLSHardening : public MachineFunctionPass {
42-
public:
43-
const TargetInstrInfo *TII;
44-
const TargetRegisterInfo *TRI;
45-
const AArch64Subtarget *ST;
39+
// Set of inserted thunks: bitmask with bits corresponding to
40+
// indexes in SLSBLRThunks array.
41+
typedef uint32_t ThunksSet;
4642

47-
static char ID;
48-
49-
AArch64SLSHardening() : MachineFunctionPass(ID) {
50-
initializeAArch64SLSHardeningPass(*PassRegistry::getPassRegistry());
43+
struct SLSHardeningInserter : ThunkInserter<SLSHardeningInserter, ThunksSet> {
44+
public:
45+
const char *getThunkPrefix() { return SLSBLRNamePrefix; }
46+
bool mayUseThunk(const MachineFunction &MF) {
47+
ComdatThunks &= !MF.getSubtarget<AArch64Subtarget>().hardenSlsNoComdat();
48+
// We are inserting barriers aside from thunk calls, so
49+
// check hardenSlsRetBr() as well.
50+
return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr() ||
51+
MF.getSubtarget<AArch64Subtarget>().hardenSlsRetBr();
5152
}
53+
ThunksSet insertThunks(MachineModuleInfo &MMI, MachineFunction &MF,
54+
ThunksSet ExistingThunks);
55+
void populateThunk(MachineFunction &MF);
5256

53-
bool runOnMachineFunction(MachineFunction &Fn) override;
57+
private:
58+
bool ComdatThunks = true;
5459

55-
StringRef getPassName() const override { return AARCH64_SLS_HARDENING_NAME; }
60+
bool hardenReturnsAndBRs(MachineModuleInfo &MMI, MachineBasicBlock &MBB);
61+
bool hardenBLRs(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
62+
ThunksSet &Thunks);
5663

57-
private:
58-
bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
59-
bool hardenBLRs(MachineBasicBlock &MBB) const;
60-
MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB,
61-
MachineBasicBlock::instr_iterator) const;
64+
void convertBLRToBL(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
65+
MachineBasicBlock::instr_iterator MBBI,
66+
ThunksSet &Thunks);
6267
};
6368

6469
} // end anonymous namespace
6570

66-
char AArch64SLSHardening::ID = 0;
67-
68-
INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening",
69-
AARCH64_SLS_HARDENING_NAME, false, false)
70-
7171
static void insertSpeculationBarrier(const AArch64Subtarget *ST,
7272
MachineBasicBlock &MBB,
7373
MachineBasicBlock::iterator MBBI,
@@ -90,18 +90,18 @@ static void insertSpeculationBarrier(const AArch64Subtarget *ST,
9090
BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
9191
}
9292

93-
bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) {
94-
ST = &MF.getSubtarget<AArch64Subtarget>();
95-
TII = MF.getSubtarget().getInstrInfo();
96-
TRI = MF.getSubtarget().getRegisterInfo();
93+
ThunksSet SLSHardeningInserter::insertThunks(MachineModuleInfo &MMI,
94+
MachineFunction &MF,
95+
ThunksSet ExistingThunks) {
96+
const AArch64Subtarget *ST = &MF.getSubtarget<AArch64Subtarget>();
9797

98-
bool Modified = false;
9998
for (auto &MBB : MF) {
100-
Modified |= hardenReturnsAndBRs(MBB);
101-
Modified |= hardenBLRs(MBB);
99+
if (ST->hardenSlsRetBr())
100+
hardenReturnsAndBRs(MMI, MBB);
101+
if (ST->hardenSlsBlr())
102+
hardenBLRs(MMI, MBB, ExistingThunks);
102103
}
103-
104-
return Modified;
104+
return ExistingThunks;
105105
}
106106

107107
static bool isBLR(const MachineInstr &MI) {
@@ -120,9 +120,10 @@ static bool isBLR(const MachineInstr &MI) {
120120
return false;
121121
}
122122

123-
bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
124-
if (!ST->hardenSlsRetBr())
125-
return false;
123+
bool SLSHardeningInserter::hardenReturnsAndBRs(MachineModuleInfo &MMI,
124+
MachineBasicBlock &MBB) {
125+
const AArch64Subtarget *ST =
126+
&MBB.getParent()->getSubtarget<AArch64Subtarget>();
126127
bool Modified = false;
127128
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end();
128129
MachineBasicBlock::iterator NextMBBI;
@@ -138,78 +139,55 @@ bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
138139
return Modified;
139140
}
140141

141-
static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
142-
142+
static const unsigned NumPermittedRegs = 29;
143143
static const struct ThunkNameAndReg {
144144
const char* Name;
145145
Register Reg;
146-
} SLSBLRThunks[] = {
147-
{ "__llvm_slsblr_thunk_x0", AArch64::X0},
148-
{ "__llvm_slsblr_thunk_x1", AArch64::X1},
149-
{ "__llvm_slsblr_thunk_x2", AArch64::X2},
150-
{ "__llvm_slsblr_thunk_x3", AArch64::X3},
151-
{ "__llvm_slsblr_thunk_x4", AArch64::X4},
152-
{ "__llvm_slsblr_thunk_x5", AArch64::X5},
153-
{ "__llvm_slsblr_thunk_x6", AArch64::X6},
154-
{ "__llvm_slsblr_thunk_x7", AArch64::X7},
155-
{ "__llvm_slsblr_thunk_x8", AArch64::X8},
156-
{ "__llvm_slsblr_thunk_x9", AArch64::X9},
157-
{ "__llvm_slsblr_thunk_x10", AArch64::X10},
158-
{ "__llvm_slsblr_thunk_x11", AArch64::X11},
159-
{ "__llvm_slsblr_thunk_x12", AArch64::X12},
160-
{ "__llvm_slsblr_thunk_x13", AArch64::X13},
161-
{ "__llvm_slsblr_thunk_x14", AArch64::X14},
162-
{ "__llvm_slsblr_thunk_x15", AArch64::X15},
163-
// X16 and X17 are deliberately missing, as the mitigation requires those
164-
// register to not be used in BLR. See comment in ConvertBLRToBL for more
165-
// details.
166-
{ "__llvm_slsblr_thunk_x18", AArch64::X18},
167-
{ "__llvm_slsblr_thunk_x19", AArch64::X19},
168-
{ "__llvm_slsblr_thunk_x20", AArch64::X20},
169-
{ "__llvm_slsblr_thunk_x21", AArch64::X21},
170-
{ "__llvm_slsblr_thunk_x22", AArch64::X22},
171-
{ "__llvm_slsblr_thunk_x23", AArch64::X23},
172-
{ "__llvm_slsblr_thunk_x24", AArch64::X24},
173-
{ "__llvm_slsblr_thunk_x25", AArch64::X25},
174-
{ "__llvm_slsblr_thunk_x26", AArch64::X26},
175-
{ "__llvm_slsblr_thunk_x27", AArch64::X27},
176-
{ "__llvm_slsblr_thunk_x28", AArch64::X28},
177-
{ "__llvm_slsblr_thunk_x29", AArch64::FP},
178-
// X30 is deliberately missing, for similar reasons as X16 and X17 are
179-
// missing.
180-
{ "__llvm_slsblr_thunk_x31", AArch64::XZR},
146+
} SLSBLRThunks[NumPermittedRegs] = {
147+
{"__llvm_slsblr_thunk_x0", AArch64::X0},
148+
{"__llvm_slsblr_thunk_x1", AArch64::X1},
149+
{"__llvm_slsblr_thunk_x2", AArch64::X2},
150+
{"__llvm_slsblr_thunk_x3", AArch64::X3},
151+
{"__llvm_slsblr_thunk_x4", AArch64::X4},
152+
{"__llvm_slsblr_thunk_x5", AArch64::X5},
153+
{"__llvm_slsblr_thunk_x6", AArch64::X6},
154+
{"__llvm_slsblr_thunk_x7", AArch64::X7},
155+
{"__llvm_slsblr_thunk_x8", AArch64::X8},
156+
{"__llvm_slsblr_thunk_x9", AArch64::X9},
157+
{"__llvm_slsblr_thunk_x10", AArch64::X10},
158+
{"__llvm_slsblr_thunk_x11", AArch64::X11},
159+
{"__llvm_slsblr_thunk_x12", AArch64::X12},
160+
{"__llvm_slsblr_thunk_x13", AArch64::X13},
161+
{"__llvm_slsblr_thunk_x14", AArch64::X14},
162+
{"__llvm_slsblr_thunk_x15", AArch64::X15},
163+
// X16 and X17 are deliberately missing, as the mitigation requires those
164+
// register to not be used in BLR. See comment in ConvertBLRToBL for more
165+
// details.
166+
{"__llvm_slsblr_thunk_x18", AArch64::X18},
167+
{"__llvm_slsblr_thunk_x19", AArch64::X19},
168+
{"__llvm_slsblr_thunk_x20", AArch64::X20},
169+
{"__llvm_slsblr_thunk_x21", AArch64::X21},
170+
{"__llvm_slsblr_thunk_x22", AArch64::X22},
171+
{"__llvm_slsblr_thunk_x23", AArch64::X23},
172+
{"__llvm_slsblr_thunk_x24", AArch64::X24},
173+
{"__llvm_slsblr_thunk_x25", AArch64::X25},
174+
{"__llvm_slsblr_thunk_x26", AArch64::X26},
175+
{"__llvm_slsblr_thunk_x27", AArch64::X27},
176+
{"__llvm_slsblr_thunk_x28", AArch64::X28},
177+
{"__llvm_slsblr_thunk_x29", AArch64::FP},
178+
// X30 is deliberately missing, for similar reasons as X16 and X17 are
179+
// missing.
180+
{"__llvm_slsblr_thunk_x31", AArch64::XZR},
181181
};
182182

183-
namespace {
184-
struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
185-
const char *getThunkPrefix() { return SLSBLRNamePrefix; }
186-
bool mayUseThunk(const MachineFunction &MF) {
187-
ComdatThunks &= !MF.getSubtarget<AArch64Subtarget>().hardenSlsNoComdat();
188-
return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr();
189-
}
190-
bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF,
191-
bool ExistingThunks);
192-
void populateThunk(MachineFunction &MF);
193-
194-
private:
195-
bool ComdatThunks = true;
196-
};
197-
} // namespace
198-
199-
bool SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
200-
MachineFunction &MF,
201-
bool ExistingThunks) {
202-
if (ExistingThunks)
203-
return false;
204-
// FIXME: It probably would be possible to filter which thunks to produce
205-
// based on which registers are actually used in BLR instructions in this
206-
// function. But would that be a worthwhile optimization?
207-
for (auto T : SLSBLRThunks)
208-
createThunkFunction(MMI, T.Name, ComdatThunks);
209-
return true;
183+
unsigned getThunkIndex(Register Reg) {
184+
for (unsigned I = 0; I < NumPermittedRegs; ++I)
185+
if (SLSBLRThunks[I].Reg == Reg)
186+
return I;
187+
llvm_unreachable("Unexpected register");
210188
}
211189

212-
void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
190+
void SLSHardeningInserter::populateThunk(MachineFunction &MF) {
213191
assert(MF.getFunction().hasComdat() == ComdatThunks &&
214192
"ComdatThunks value changed since MF creation");
215193
// FIXME: How to better communicate Register number, rather than through
@@ -258,8 +236,9 @@ void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
258236
Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
259237
}
260238

261-
MachineBasicBlock &AArch64SLSHardening::ConvertBLRToBL(
262-
MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator MBBI) const {
239+
void SLSHardeningInserter::convertBLRToBL(
240+
MachineModuleInfo &MMI, MachineBasicBlock &MBB,
241+
MachineBasicBlock::instr_iterator MBBI, ThunksSet &Thunks) {
263242
// Transform a BLR to a BL as follows:
264243
// Before:
265244
// |-----------------------------|
@@ -285,7 +264,6 @@ MachineBasicBlock &AArch64SLSHardening::ConvertBLRToBL(
285264
// | barrierInsts |
286265
// |-----------------------------|
287266
//
288-
// The __llvm_slsblr_thunk_xN thunks are created by the SLSBLRThunkInserter.
289267
// This function merely needs to transform BLR xN into BL
290268
// __llvm_slsblr_thunk_xN.
291269
//
@@ -318,37 +296,16 @@ MachineBasicBlock &AArch64SLSHardening::ConvertBLRToBL(
318296
}
319297
DebugLoc DL = BLR.getDebugLoc();
320298

321-
// If we'd like to support also BLRAA and BLRAB instructions, we'd need
322-
// a lot more different kind of thunks.
323-
// For example, a
324-
//
325-
// BLRAA xN, xM
326-
//
327-
// instruction probably would need to be transformed to something like:
328-
//
329-
// BL __llvm_slsblraa_thunk_x<N>_x<M>
330-
//
331-
// __llvm_slsblraa_thunk_x<N>_x<M>:
332-
// BRAA x<N>, x<M>
333-
// barrierInsts
334-
//
335-
// Given that about 30 different values of N are possible and about 30
336-
// different values of M are possible in the above, with the current way
337-
// of producing indirect thunks, we'd be producing about 30 times 30, i.e.
338-
// about 900 thunks (where most might not be actually called). This would
339-
// multiply further by two to support both BLRAA and BLRAB variants of those
340-
// instructions.
341-
// If we'd want to support this, we'd probably need to look into a different
342-
// way to produce thunk functions, based on which variants are actually
343-
// needed, rather than producing all possible variants.
344-
// So far, LLVM does never produce BLRA* instructions, so let's leave this
345-
// for the future when LLVM can start producing BLRA* instructions.
346299
MachineFunction &MF = *MBBI->getMF();
347300
MCContext &Context = MBB.getParent()->getContext();
348-
auto ThunkIt =
349-
llvm::find_if(SLSBLRThunks, [Reg](auto T) { return T.Reg == Reg; });
350-
assert (ThunkIt != std::end(SLSBLRThunks));
351-
MCSymbol *Sym = Context.getOrCreateSymbol(ThunkIt->Name);
301+
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
302+
unsigned ThunkIndex = getThunkIndex(Reg);
303+
StringRef ThunkName = SLSBLRThunks[ThunkIndex].Name;
304+
MCSymbol *Sym = Context.getOrCreateSymbol(ThunkName);
305+
if (!(Thunks & (1u << ThunkIndex))) {
306+
Thunks |= 1u << ThunkIndex;
307+
createThunkFunction(MMI, ThunkName, ComdatThunks);
308+
}
352309

353310
MachineInstr *BL = BuildMI(MBB, MBBI, DL, TII->get(BLOpcode)).addSym(Sym);
354311

@@ -386,13 +343,11 @@ MachineBasicBlock &AArch64SLSHardening::ConvertBLRToBL(
386343
RegIsKilled /*isKill*/));
387344
// Remove BLR instruction
388345
MBB.erase(MBBI);
389-
390-
return MBB;
391346
}
392347

393-
bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const {
394-
if (!ST->hardenSlsBlr())
395-
return false;
348+
bool SLSHardeningInserter::hardenBLRs(MachineModuleInfo &MMI,
349+
MachineBasicBlock &MBB,
350+
ThunksSet &Thunks) {
396351
bool Modified = false;
397352
MachineBasicBlock::instr_iterator MBBI = MBB.instr_begin(),
398353
E = MBB.instr_end();
@@ -401,31 +356,30 @@ bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const {
401356
MachineInstr &MI = *MBBI;
402357
NextMBBI = std::next(MBBI);
403358
if (isBLR(MI)) {
404-
ConvertBLRToBL(MBB, MBBI);
359+
convertBLRToBL(MMI, MBB, MBBI, Thunks);
405360
Modified = true;
406361
}
407362
}
408363
return Modified;
409364
}
410365

411-
FunctionPass *llvm::createAArch64SLSHardeningPass() {
412-
return new AArch64SLSHardening();
413-
}
414-
415366
namespace {
416-
class AArch64IndirectThunks : public ThunkInserterPass<SLSBLRThunkInserter> {
367+
class AArch64SLSHardening : public ThunkInserterPass<SLSHardeningInserter> {
417368
public:
418369
static char ID;
419370

420-
AArch64IndirectThunks() : ThunkInserterPass(ID) {}
371+
AArch64SLSHardening() : ThunkInserterPass(ID) {}
421372

422-
StringRef getPassName() const override { return "AArch64 Indirect Thunks"; }
373+
StringRef getPassName() const override { return AARCH64_SLS_HARDENING_NAME; }
423374
};
424375

425376
} // end anonymous namespace
426377

427-
char AArch64IndirectThunks::ID = 0;
378+
char AArch64SLSHardening::ID = 0;
379+
380+
INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening",
381+
AARCH64_SLS_HARDENING_NAME, false, false)
428382

429-
FunctionPass *llvm::createAArch64IndirectThunks() {
430-
return new AArch64IndirectThunks();
383+
FunctionPass *llvm::createAArch64SLSHardeningPass() {
384+
return new AArch64SLSHardening();
431385
}

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,6 @@ void AArch64PassConfig::addPreEmitPass() {
861861
}
862862

863863
void AArch64PassConfig::addPostBBSections() {
864-
addPass(createAArch64IndirectThunks());
865864
addPass(createAArch64SLSHardeningPass());
866865
addPass(createAArch64PointerAuthPass());
867866
if (EnableBranchTargets)

llvm/test/CodeGen/AArch64/O0-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@
7474
; CHECK-NEXT: StackMap Liveness Analysis
7575
; CHECK-NEXT: Live DEBUG_VALUE analysis
7676
; CHECK-NEXT: Machine Sanitizer Binary Metadata
77-
; CHECK-NEXT: AArch64 Indirect Thunks
7877
; CHECK-NEXT: AArch64 sls hardening pass
7978
; CHECK-NEXT: AArch64 Pointer Authentication
8079
; CHECK-NEXT: AArch64 Branch Targets

0 commit comments

Comments
 (0)