Skip to content

Commit 4f7188a

Browse files
committed
[AArch64] Add check that prologue insertion doesn't clobber live regs. (llvm#71826)
This patch extends AArch64FrameLowering::emitProglogue to check if the inserted prologue clobbers live registers. It updates `llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir` with an extra load to make x9 live before the store, preserving the original test. It uses the original `llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir` as `llvm/test/CodeGen/AArch64/emit-prologue-clobber-verification.mir`, because there x9 is marked as live on entry, but used as scratch reg as it is not callee saved. The new assertion catches a mis-compile in `store-swift-async-context-clobber-live-reg.ll` on https://github.com/apple/llvm-project/tree/next
1 parent 8db03ad commit 4f7188a

File tree

5 files changed

+164
-1
lines changed

5 files changed

+164
-1
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,18 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
13951395
.setMIFlags(MachineInstr::FrameSetup);
13961396
}
13971397

1398+
/// Collect live registers from the end of \p MI's parent up to (including) \p
1399+
/// MI in \p LiveRegs.
1400+
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
1401+
LivePhysRegs &LiveRegs) {
1402+
1403+
MachineBasicBlock &MBB = *MI.getParent();
1404+
LiveRegs.addLiveOuts(MBB);
1405+
for (const MachineInstr &MI :
1406+
reverse(make_range(MI.getIterator(), MBB.instr_end())))
1407+
LiveRegs.stepBackward(MI);
1408+
}
1409+
13981410
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
13991411
MachineBasicBlock &MBB) const {
14001412
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1403,6 +1415,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
14031415
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
14041416
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
14051417
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1418+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1419+
14061420
MachineModuleInfo &MMI = MF.getMMI();
14071421
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
14081422
bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
@@ -1412,6 +1426,39 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
14121426
bool HasWinCFI = false;
14131427
auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
14141428

1429+
MachineBasicBlock::iterator End = MBB.end();
1430+
#ifndef NDEBUG
1431+
// Collect live register from the end of MBB up to the start of the existing
1432+
// frame setup instructions.
1433+
MachineBasicBlock::iterator NonFrameStart = MBB.begin();
1434+
while (NonFrameStart != End &&
1435+
NonFrameStart->getFlag(MachineInstr::FrameSetup))
1436+
++NonFrameStart;
1437+
1438+
LivePhysRegs LiveRegs(*TRI);
1439+
if (NonFrameStart != MBB.end()) {
1440+
getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
1441+
// Ignore registers used for stack management for now.
1442+
LiveRegs.removeReg(AArch64::SP);
1443+
LiveRegs.removeReg(AArch64::X19);
1444+
LiveRegs.removeReg(AArch64::FP);
1445+
LiveRegs.removeReg(AArch64::LR);
1446+
}
1447+
1448+
auto VerifyClobberOnExit = make_scope_exit([&]() {
1449+
if (NonFrameStart == MBB.end())
1450+
return;
1451+
// Check if any of the newly instructions clobber any of the live registers.
1452+
for (MachineInstr &MI :
1453+
make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
1454+
for (auto &Op : MI.operands())
1455+
if (Op.isReg() && Op.isDef())
1456+
assert(!LiveRegs.contains(Op.getReg()) &&
1457+
"live register clobbered by inserted prologue instructions");
1458+
}
1459+
});
1460+
#endif
1461+
14151462
bool IsFunclet = MBB.isEHFuncletEntry();
14161463

14171464
// At this point, we're going to decide whether or not the function uses a
@@ -1610,7 +1657,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
16101657
// Move past the saves of the callee-saved registers, fixing up the offsets
16111658
// and pre-inc if we decided to combine the callee-save and local stack
16121659
// pointer bump above.
1613-
MachineBasicBlock::iterator End = MBB.end();
16141660
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
16151661
!IsSVECalleeSave(MBBI)) {
16161662
if (CombineSPBump)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# RUN: not --crash llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o -
2+
#
3+
# REQUIRES: asserts
4+
#
5+
---
6+
# x9 is marked as live on function entry, but it will be used as scratch
7+
# register for prologue computations at the beginning of the prologue.
8+
# Use this to check we catch that the prologue clobbers $x9.
9+
name: x9_clobbered_on_fn_entry
10+
tracksRegLiveness: true
11+
frameInfo:
12+
isFrameAddressTaken: true
13+
stack:
14+
- { id: 0, size: 16, alignment: 16 }
15+
- { id: 1, size: 32768, alignment: 32 }
16+
body: |
17+
bb.0:
18+
liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
19+
STRXui $x0, %stack.0, 0
20+
B %bb.1
21+
bb.1:
22+
liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
23+
RET_ReallyLR implicit $lr
24+
...

llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ stack:
1919
body: |
2020
bb.0:
2121
liveins: $x0, $x8
22+
$x9 = LDRXui $x0, 0 :: (load (s64))
2223
STRXui $x0, %stack.0, 0
2324
B %bb.1
2425
bb.1:

llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
; CHECK-NEXT: ret
3131
...
3232
name: fix_restorepoint_p4
33+
tracksRegLiveness: true
3334
stack:
3435
- { id: 0, stack-id: scalable-vector, size: 16, alignment: 16 }
3536
body: |
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -o - -mtriple=arm64e-apple-macosx %s | FileCheck %s
3+
4+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5+
6+
define swifttailcc void @test_async_with_jumptable(ptr %src, ptr swiftasync %as) #0 {
7+
; CHECK-LABEL: test_async_with_jumptable:
8+
; CHECK: ; %bb.0: ; %entry
9+
; CHECK-NEXT: orr x29, x29, #0x1000000000000000
10+
; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
11+
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
12+
; CHECK-NEXT: add x16, sp, #8
13+
; CHECK-NEXT: movk x16, #49946, lsl #48
14+
; CHECK-NEXT: mov x17, x22
15+
; CHECK-NEXT: pacdb x17, x16
16+
; CHECK-NEXT: str x17, [sp, #8]
17+
; CHECK-NEXT: add x29, sp, #16
18+
; CHECK-NEXT: .cfi_def_cfa w29, 16
19+
; CHECK-NEXT: .cfi_offset w30, -8
20+
; CHECK-NEXT: .cfi_offset w29, -16
21+
; CHECK-NEXT: .cfi_offset w19, -32
22+
; CHECK-NEXT: ldr x16, [x0]
23+
; CHECK-NEXT: mov x20, x22
24+
; CHECK-NEXT: mov x22, x0
25+
; CHECK-NEXT: mov x19, x20
26+
; CHECK-NEXT: cmp x16, #3
27+
; CHECK-NEXT: csel x16, x16, xzr, ls
28+
; CHECK-NEXT: Lloh0:
29+
; CHECK-NEXT: adrp x17, LJTI0_0@PAGE
30+
; CHECK-NEXT: Lloh1:
31+
; CHECK-NEXT: add x17, x17, LJTI0_0@PAGEOFF
32+
; CHECK-NEXT: ldrsw x16, [x17, x16, lsl #2]
33+
; CHECK-NEXT: Ltmp0:
34+
; CHECK-NEXT: adr x17, Ltmp0
35+
; CHECK-NEXT: add x16, x17, x16
36+
; CHECK-NEXT: br x16
37+
; CHECK-NEXT: LBB0_1: ; %then.2
38+
; CHECK-NEXT: mov x19, #0 ; =0x0
39+
; CHECK-NEXT: b LBB0_3
40+
; CHECK-NEXT: LBB0_2: ; %then.3
41+
; CHECK-NEXT: mov x19, x22
42+
; CHECK-NEXT: LBB0_3: ; %exit
43+
; CHECK-NEXT: bl _foo
44+
; CHECK-NEXT: mov x2, x0
45+
; CHECK-NEXT: mov x0, x19
46+
; CHECK-NEXT: mov x1, x20
47+
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
48+
; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
49+
; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
50+
; CHECK-NEXT: br x2
51+
; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
52+
; CHECK-NEXT: .cfi_endproc
53+
; CHECK-NEXT: .section __TEXT,__const
54+
; CHECK-NEXT: .p2align 2, 0x0
55+
; CHECK-NEXT: LJTI0_0:
56+
; CHECK-NEXT: .long LBB0_3-Ltmp0
57+
; CHECK-NEXT: .long LBB0_1-Ltmp0
58+
; CHECK-NEXT: .long LBB0_1-Ltmp0
59+
; CHECK-NEXT: .long LBB0_2-Ltmp0
60+
entry:
61+
%l = load i64, ptr %src, align 8
62+
switch i64 %l, label %dead [
63+
i64 0, label %exit
64+
i64 1, label %then.1
65+
i64 2, label %then.2
66+
i64 3, label %then.3
67+
]
68+
69+
then.1:
70+
br label %exit
71+
72+
then.2:
73+
br label %exit
74+
75+
then.3:
76+
br label %exit
77+
78+
dead: ; preds = %entryresume.5
79+
unreachable
80+
81+
exit:
82+
%p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ]
83+
%r = call i64 @foo()
84+
%fn = inttoptr i64 %r to ptr
85+
musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as)
86+
ret void
87+
}
88+
89+
declare i64 @foo()
90+
91+
attributes #0 = { "frame-pointer"="non-leaf" }

0 commit comments

Comments
 (0)