Skip to content

Commit 7a994d5

Browse files
authored
Merge pull request #2670 from TNorthover/swifttail-pacibsp
arm64e: authenticate LR against the correct SP when tail calling.
2 parents 6e7e13b + 1a3fec2 commit 7a994d5

File tree

3 files changed

+183
-51
lines changed

3 files changed

+183
-51
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 130 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,132 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
15971597
}
15981598
}
15991599

1600+
// Check if *II is a register update that can be merged into STGloop that ends
1601+
// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
1602+
// end of the loop.
1603+
static bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
1604+
int64_t Size, int64_t *TotalOffset) {
1605+
MachineInstr &MI = *II;
1606+
if ((MI.getOpcode() == AArch64::ADDXri ||
1607+
MI.getOpcode() == AArch64::SUBXri) &&
1608+
MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
1609+
unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
1610+
int64_t Offset = MI.getOperand(2).getImm() << Shift;
1611+
if (MI.getOpcode() == AArch64::SUBXri)
1612+
Offset = -Offset;
1613+
int64_t AbsPostOffset = std::abs(Offset - Size);
1614+
const int64_t kMaxOffset =
1615+
0xFFF; // Max encoding for unshifted ADDXri / SUBXri
1616+
if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
1617+
*TotalOffset = Offset;
1618+
return true;
1619+
}
1620+
}
1621+
return false;
1622+
}
1623+
1624+
// If we're restoring LR, authenticate it before returning.
1625+
void AArch64FrameLowering::insertAuthLR(MachineBasicBlock &MBB,
1626+
int64_t ArgumentStackToRestore,
1627+
DebugLoc DL) const {
1628+
MachineFunction &MF = *MBB.getParent();
1629+
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1630+
MachineFrameInfo &MFI = MF.getFrameInfo();
1631+
1632+
if (!shouldAuthenticateLR(MF))
1633+
return;
1634+
1635+
bool IsLRVulnerable = false;
1636+
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
1637+
if (Info.getReg() != AArch64::LR)
1638+
continue;
1639+
IsLRVulnerable = true;
1640+
break;
1641+
}
1642+
1643+
if(!IsLRVulnerable)
1644+
return;
1645+
1646+
if (LLVM_UNLIKELY(!Subtarget.hasPA()))
1647+
report_fatal_error("arm64e LR authentication requires ptrauth");
1648+
1649+
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1650+
MachineBasicBlock::iterator TI = MBB.getFirstTerminator();
1651+
if (TI != MBB.end() && TI->getOpcode() == AArch64::RET_ReallyLR &&
1652+
ArgumentStackToRestore == 0) {
1653+
// If there is a terminator and it's a RET, we can fold AUTH into it.
1654+
// Be careful to keep the implicitly returned registers.
1655+
// By now, we don't need the ReallyLR pseudo, since it's only there
1656+
// to make it possible for LR to be used for non-RET purposes, and
1657+
// that happens in RA and PEI.
1658+
BuildMI(MBB, TI, DL, TII->get(AArch64::RETAB))
1659+
.copyImplicitOps(*TI)
1660+
.setMIFlag(MachineInstr::FrameDestroy);
1661+
MBB.erase(TI);
1662+
return;
1663+
}
1664+
1665+
// Otherwise, we could be in a shrink-wrapped or tail-calling block.
1666+
if (ArgumentStackToRestore >= 0) {
1667+
// We can safely move sp to where it was on entry, execute autibsp to
1668+
// authenticate LR, and deallocate the extra incoming argument stack.
1669+
int64_t Offset = 0;
1670+
MachineBasicBlock::iterator FrameI =
1671+
TI == MBB.end() ? MBB.end() : std::prev(TI);
1672+
if (FrameI != MBB.end() &&
1673+
canMergeRegUpdate(FrameI, AArch64::SP, ArgumentStackToRestore,
1674+
&Offset)) {
1675+
Offset -= ArgumentStackToRestore;
1676+
if (Offset == 0)
1677+
MBB.erase(FrameI);
1678+
else {
1679+
FrameI->setDesc(
1680+
TII->get(Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri));
1681+
FrameI->getOperand(2).setImm(std::abs(Offset));
1682+
FrameI->getOperand(3).setImm(0);
1683+
}
1684+
} else {
1685+
emitFrameOffset(MBB, TI, DL, AArch64::SP, AArch64::SP,
1686+
StackOffset::getFixed(-ArgumentStackToRestore), TII,
1687+
MachineInstr::FrameDestroy);
1688+
}
1689+
BuildMI(MBB, TI, DL, TII->get(AArch64::AUTIBSP))
1690+
.setMIFlag(MachineInstr::FrameDestroy);
1691+
emitFrameOffset(MBB, TI, DL, AArch64::SP, AArch64::SP,
1692+
StackOffset::getFixed(ArgumentStackToRestore), TII,
1693+
MachineInstr::FrameDestroy);
1694+
return;
1695+
}
1696+
1697+
// SP is going to be below where it was on entry, so trying to use AUTIBSP
1698+
// risks leaving live arguments outside the redzone. We need to find a spare
1699+
// register for the discriminator and execute an AUTIB instead.
1700+
RegScavenger RS;
1701+
RS.enterBasicBlockEnd(MBB);
1702+
RS.backward(TI);
1703+
1704+
// Prefer x16 or x17 since if we get interrupted they have better protection
1705+
// in the kernel.
1706+
Register TmpReg;
1707+
if (!RS.isRegUsed(AArch64::X16))
1708+
TmpReg = AArch64::X16;
1709+
else if (!RS.isRegUsed(AArch64::X17))
1710+
TmpReg = AArch64::X17;
1711+
else
1712+
TmpReg = RS.scavengeRegisterBackwards(AArch64::GPR64commonRegClass, TI,
1713+
false, 0, false);
1714+
if (TmpReg == AArch64::NoRegister)
1715+
report_fatal_error("unable to claim register to authenticate LR");
1716+
1717+
emitFrameOffset(MBB, TI, DL, TmpReg, AArch64::SP,
1718+
StackOffset::getFixed(-ArgumentStackToRestore), TII,
1719+
MachineInstr::FrameDestroy);
1720+
BuildMI(MBB, TI, DL, TII->get(AArch64::AUTIB), AArch64::LR)
1721+
.addUse(AArch64::LR)
1722+
.addUse(TmpReg)
1723+
.setMIFlag(MachineInstr::FrameDestroy);
1724+
}
1725+
16001726
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
16011727
MachineBasicBlock &MBB) const {
16021728
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -1623,35 +1749,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
16231749
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
16241750
return;
16251751

1626-
// If we're restoring LR, authenticate it before returning.
1627-
// Use scope_exit to ensure we do that last on all return paths.
1628-
auto InsertAuthLROnExit = make_scope_exit([&]() {
1629-
if (shouldAuthenticateLR(MF)) {
1630-
if (LLVM_UNLIKELY(!Subtarget.hasPA()))
1631-
report_fatal_error("arm64e LR authentication requires ptrauth");
1632-
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
1633-
if (Info.getReg() != AArch64::LR)
1634-
continue;
1635-
MachineBasicBlock::iterator TI = MBB.getFirstTerminator();
1636-
if (TI != MBB.end() && TI->getOpcode() == AArch64::RET_ReallyLR) {
1637-
// If there is a terminator and it's a RET, we can fold AUTH into it.
1638-
// Be careful to keep the implicitly returned registers.
1639-
// By now, we don't need the ReallyLR pseudo, since it's only there
1640-
// to make it possible for LR to be used for non-RET purposes, and
1641-
// that happens in RA and PEI.
1642-
BuildMI(MBB, TI, DL, TII->get(AArch64::RETAB)).copyImplicitOps(*TI);
1643-
MBB.erase(TI);
1644-
} else {
1645-
// Otherwise, we could be in a shrink-wrapped or tail-calling block.
1646-
BuildMI(MBB, TI, DL, TII->get(AArch64::AUTIBSP));
1647-
}
1648-
}
1649-
}
1650-
});
1651-
16521752
// How much of the stack used by incoming arguments this function is expected
16531753
// to restore in this particular epilogue.
1654-
int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
1754+
const int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
1755+
1756+
auto InsertAuthLROnExit =
1757+
make_scope_exit([&]() { insertAuthLR(MBB, ArgumentStackToRestore, DL); });
16551758

16561759
// The stack frame should be like below,
16571760
//
@@ -3184,30 +3287,6 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
31843287
}
31853288
}
31863289

3187-
// Check if *II is a register update that can be merged into STGloop that ends
3188-
// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
3189-
// end of the loop.
3190-
bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
3191-
int64_t Size, int64_t *TotalOffset) {
3192-
MachineInstr &MI = *II;
3193-
if ((MI.getOpcode() == AArch64::ADDXri ||
3194-
MI.getOpcode() == AArch64::SUBXri) &&
3195-
MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
3196-
unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
3197-
int64_t Offset = MI.getOperand(2).getImm() << Shift;
3198-
if (MI.getOpcode() == AArch64::SUBXri)
3199-
Offset = -Offset;
3200-
int64_t AbsPostOffset = std::abs(Offset - Size);
3201-
const int64_t kMaxOffset =
3202-
0xFFF; // Max encoding for unshifted ADDXri / SUBXri
3203-
if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
3204-
*TotalOffset = Offset;
3205-
return true;
3206-
}
3207-
}
3208-
return false;
3209-
}
3210-
32113290
void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
32123291
SmallVectorImpl<MachineMemOperand *> &MemRefs) {
32133292
MemRefs.clear();

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ class AArch64FrameLowering : public TargetFrameLowering {
6161
MutableArrayRef<CalleeSavedInfo> CSI,
6262
const TargetRegisterInfo *TRI) const override;
6363

64+
void insertAuthLR(MachineBasicBlock &MBB,
65+
int64_t ArgumentStackToRestore,
66+
DebugLoc DL) const;
67+
6468
/// Can this function use the red zone for local allocations.
6569
bool canUseRedZone(const MachineFunction &MF) const;
6670

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; RUN: llc -verify-machineinstrs < %s -mtriple=arm64e-apple-macosx | FileCheck %s
2+
3+
declare swifttailcc void @callee_stack0()
4+
declare swifttailcc void @callee_stack8([8 x i64], i64)
5+
declare swifttailcc void @callee_stack16([8 x i64], i64, i64)
6+
declare extern_weak swifttailcc void @callee_weak()
7+
8+
define swifttailcc void @caller_to0_from0() "ptrauth-returns" "frame-pointer"="all" nounwind {
9+
; CHECK-LABEL: caller_to0_from0:
10+
; CHECK: stp x29, x30, [sp, #-16]!
11+
; [...]
12+
; CHECK: ldp x29, x30, [sp], #16
13+
; CHECK-NEXT: autibsp
14+
; CHECK-NOT: add sp
15+
; CHECK-NOT: sub sp
16+
musttail call swifttailcc void @callee_stack0()
17+
ret void
18+
19+
}
20+
21+
define swifttailcc void @caller_to0_from8([8 x i64], i64) "ptrauth-returns" "frame-pointer"="all" {
22+
; CHECK-LABEL: caller_to0_from8:
23+
; CHECK: stp x29, x30, [sp, #-16]!
24+
; [...]
25+
; CHECK: ldp x29, x30, [sp], #16
26+
; CHECK-NEXT: autibsp
27+
; CHECK: add sp, sp, #16
28+
29+
musttail call swifttailcc void @callee_stack0()
30+
ret void
31+
32+
}
33+
34+
define swifttailcc void @caller_to8_from0() "ptrauth-returns" "frame-pointer"="all" {
35+
; CHECK-LABEL: caller_to8_from0:
36+
; CHECK: stp x29, x30, [sp, #-32]!
37+
; [...]
38+
; CHECK: ldp x29, x30, [sp], #16
39+
; CHECK-NEXT: add x16, sp, #16
40+
; CHECK-NEXT: autib x30, x16
41+
; CHECK-NOT: add sp
42+
; CHECK-NOT: sub sp
43+
44+
; Key point is that we don't move sp then autibsp because that leaves live
45+
; arguments below sp, potentially outside the redzone.
46+
musttail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
47+
ret void
48+
49+
}

0 commit comments

Comments
 (0)