Skip to content

Commit dff114b

Browse files
authored
[ARM] Optimise non-ABI frame pointers (#110286)
With -fomit-frame-pointer, even if we set up a frame pointer for other reasons (e.g. variable-sized or over-aligned stack allocations), we don't need to create an ABI-compliant frame record. This means that we can save all of the general-purpose registers in one push, instead of splitting it to ensure that the frame pointer and link register are adjacent on the stack, saving two instructions per function.
1 parent f6b513a commit dff114b

14 files changed

+1089
-481
lines changed

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3003,6 +3003,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
30033003
// on the stack.
30043004
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
30053005
break;
3006+
case ARMSubtarget::NoSplit:
3007+
assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
3008+
"ABI-required frame pointers need a CSR split when signing return "
3009+
"address.");
3010+
CSI.insert(find_if(CSI,
3011+
[=](const auto &CS) {
3012+
Register Reg = CS.getReg();
3013+
return Reg != ARM::LR;
3014+
}),
3015+
CalleeSavedInfo(ARM::R12));
3016+
break;
30063017
default:
30073018
llvm_unreachable("Unexpected CSR split with return address signing");
30083019
}

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
492492
const std::vector<CalleeSavedInfo> CSI =
493493
MF.getFrameInfo().getCalleeSavedInfo();
494494

495-
// Returns SplitR7 if the frame setup must be split into two separate pushes
496-
// of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
497-
// always required on Thumb1-only targets, as the push and pop instructions
498-
// can't access the high registers. This is also required when R7 is the frame
499-
// pointer and frame pointer elimiination is disabled, or branch signing is
500-
// enabled and AAPCS is disabled.
501-
if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
502-
!createAAPCSFrameChain()) ||
503-
(getFramePointerReg() == ARM::R7 &&
504-
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
505-
isThumb1Only())
495+
// Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
496+
// cannot use high registers except for lr.
497+
if (isThumb1Only())
498+
return SplitR7;
499+
500+
// If R7 is the frame pointer, we must split at R7 to ensure that the
501+
// previous frame pointer (R7) and return address (LR) are adjacent on the
502+
// stack, to form a valid frame record.
503+
if (getFramePointerReg() == ARM::R7 &&
504+
MF.getTarget().Options.FramePointerIsReserved(MF))
506505
return SplitR7;
507506

508507
// Returns SplitR11WindowsSEH when the stack pointer needs to be
@@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
515514
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
516515
return SplitR11WindowsSEH;
517516

518-
// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
519-
// other in the list of callee saved registers in a frame, and branch
520-
// signing is enabled.
517+
// Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
518+
// and LR to be adjacent on the stack, and branch signing is enabled,
519+
// requiring R12 to be on the stack.
521520
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
522-
getFramePointerReg() == ARM::R11)
521+
getFramePointerReg() == ARM::R11 &&
522+
MF.getTarget().Options.FramePointerIsReserved(MF))
523523
return SplitR11AAPCSSignRA;
524524
return NoSplit;
525525
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
9595
/// push {r0-r7, lr}
9696
/// push {r8-r12}
9797
/// vpush {d8-d15}
98+
/// Note that Thumb1 changes this layout when the frame pointer is R11,
99+
/// using a longer sequence of instructions because R11 can't be used by a
100+
/// Thumb1 push instruction. This doesn't currently have a separate enum
101+
/// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
98102
SplitR7,
99103

100104
/// When the stack frame size is not known (because of variable-sized
Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
23
; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND
34
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
45
target triple = "thumbv8.1m.main-arm-unknown-eabi"
56

7+
; Check the function starts with `pacbti` and correct unwind info is emitted
68
define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "branch-target-enforcement" {
9+
; CHECK-LABEL: _Z1fi:
10+
; CHECK: .cfi_sections .debug_frame
11+
; CHECK-NEXT: .cfi_startproc
12+
; CHECK-NEXT: @ %bb.0: @ %entry
13+
; CHECK-NEXT: pacbti r12, lr, sp
14+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
15+
; CHECK-NEXT: push.w {r7, r12, lr}
16+
; CHECK-NEXT: .cfi_def_cfa_offset 12
17+
; CHECK-NEXT: .cfi_offset lr, -4
18+
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
19+
; CHECK-NEXT: .cfi_offset r7, -12
20+
; CHECK-NEXT: .pad #4
21+
; CHECK-NEXT: sub sp, #4
22+
; CHECK-NEXT: .cfi_def_cfa_offset 16
23+
; CHECK-NEXT: adds r0, #1
24+
; CHECK-NEXT: bl _Z1gi
25+
; CHECK-NEXT: subs r0, #1
26+
; CHECK-NEXT: add sp, #4
27+
; CHECK-NEXT: pop.w {r7, r12, lr}
28+
; CHECK-NEXT: aut r12, lr, sp
29+
; CHECK-NEXT: bx lr
730
entry:
831
%add = add nsw i32 %x, 1
932
%call = tail call i32 @_Z1gi(i32 %add)
@@ -13,26 +36,10 @@ entry:
1336

1437
declare dso_local i32 @_Z1gi(i32)
1538

16-
; Check the function starts with `pacbti` and correct unwind info is emitted
17-
; CHECK-LABEL: _Z1fi:
18-
; ...
19-
; CHECK: pacbti r12, lr, sp
20-
; CHECK-NEXT: .save {r7, lr}
21-
; CHECK-NEXT: push {r7, lr}
22-
; CHECK-NEXT: .cfi_def_cfa_offset 8
23-
; CHECK-NEXT: .cfi_offset lr, -4
24-
; CHECK-NEXT: .cfi_offset r7, -8
25-
; CHECK-NEXT: .save {ra_auth_code}
26-
; CHECK-NEXT: str r12, [sp, #-4]!
27-
; CHECK-NEXT: .cfi_def_cfa_offset 12
28-
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
29-
; CHECK-NEXT: .pad #4
30-
; CHECK-NEXT: sub sp, #4
31-
; CHECK-NEXT: .cfi_def_cfa_offset 16
32-
; ...
33-
3439
; UNWIND-LABEL: Opcodes [
3540
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
41+
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
3642
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
37-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
38-
; UNWIND-NEXT: 0xB0 ; finish
43+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
44+
45+
Lines changed: 97 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
23
; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
34
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -16,79 +17,112 @@ target triple = "thumbv8m.main-none-none-eabi"
1617
; }
1718

1819
define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf" {
20+
; CHECK-LABEL: f0:
21+
; CHECK: .cfi_sections .debug_frame
22+
; CHECK-NEXT: .cfi_startproc
23+
; CHECK-NEXT: @ %bb.0: @ %entry
24+
; CHECK-NEXT: pac r12, lr, sp
25+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
26+
; CHECK-NEXT: push.w {r7, r12, lr}
27+
; CHECK-NEXT: .cfi_def_cfa_offset 12
28+
; CHECK-NEXT: .cfi_offset lr, -4
29+
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
30+
; CHECK-NEXT: .cfi_offset r7, -12
31+
; CHECK-NEXT: .pad #4
32+
; CHECK-NEXT: sub sp, #4
33+
; CHECK-NEXT: .cfi_def_cfa_offset 16
34+
; CHECK-NEXT: subs r0, #1
35+
; CHECK-NEXT: bl g
36+
; CHECK-NEXT: adds r0, #1
37+
; CHECK-NEXT: add sp, #4
38+
; CHECK-NEXT: pop.w {r7, r12, lr}
39+
; CHECK-NEXT: aut r12, lr, sp
40+
; CHECK-NEXT: bx lr
1941
entry:
2042
%sub = add nsw i32 %x, -1
2143
%call = tail call i32 @g(i32 %sub)
2244
%add = add nsw i32 %call, 1
2345
ret i32 %add
2446
}
2547

26-
; CHECK-LABEL: f0:
27-
; CHECK: pac r12, lr, sp
28-
; CHECK-NEXT: .save {r7, lr}
29-
; CHECK-NEXT: push {r7, lr}
30-
; CHECK-NEXT: .cfi_def_cfa_offset 8
31-
; CHECK-NEXT: .cfi_offset lr, -4
32-
; CHECK-NEXT: .cfi_offset r7, -8
33-
; CHECK-NEXT: .save {ra_auth_code}
34-
; CHECK-NEXT: str r12, [sp, #-4]!
35-
; CHECK-NEXT: .cfi_def_cfa_offset 12
36-
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
37-
; CHECK-NEXT: .pad #4
38-
; CHECK-NEXT: sub sp, #4
39-
; ...
40-
; CHECK: add sp, #4
41-
; CHECK-NEXT: ldr r12, [sp], #4
42-
; CHECK-NEXT: pop.w {r7, lr}
43-
; CHECK-NEXT: aut r12, lr, sp
44-
; CHECK-NEXT: bx lr
45-
4648
define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
49+
; CHECK-LABEL: f1:
50+
; CHECK: .cfi_startproc
51+
; CHECK-NEXT: @ %bb.0: @ %entry
52+
; CHECK-NEXT: pac r12, lr, sp
53+
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
54+
; CHECK-NEXT: .cfi_def_cfa_offset 4
55+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
56+
; CHECK-NEXT: push.w {r7, r12, lr}
57+
; CHECK-NEXT: .cfi_def_cfa_offset 16
58+
; CHECK-NEXT: .cfi_offset lr, -8
59+
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
60+
; CHECK-NEXT: .cfi_offset r7, -16
61+
; CHECK-NEXT: subs r0, #1
62+
; CHECK-NEXT: bl g
63+
; CHECK-NEXT: adds r0, #1
64+
; CHECK-NEXT: pop.w {r7, r12, lr}
65+
; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
66+
; CHECK-NEXT: vldr fpcxtns, [sp], #4
67+
; CHECK-NEXT: aut r12, lr, sp
68+
; CHECK-NEXT: clrm {r1, r2, r3, r12, apsr}
69+
; CHECK-NEXT: bxns lr
4770
entry:
4871
%sub = add nsw i32 %x, -1
4972
%call = tail call i32 @g(i32 %sub)
5073
%add = add nsw i32 %call, 1
5174
ret i32 %add
5275
}
5376

54-
; CHECK-LABEL: f1:
55-
; CHECK: pac r12, lr, sp
56-
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
57-
; CHECK-NEXT: .cfi_def_cfa_offset 4
58-
; CHECK-NEXT: .save {r7, lr}
59-
; CHECK-NEXT: push {r7, lr}
60-
; CHECK: vldr fpcxtns, [sp], #4
61-
; CHECK: aut r12, lr, sp
62-
6377
define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
78+
; CHECK-LABEL: f2:
79+
; CHECK: .cfi_startproc
80+
; CHECK-NEXT: @ %bb.0: @ %entry
81+
; CHECK-NEXT: pac r12, lr, sp
82+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
83+
; CHECK-NEXT: push.w {r7, r12, lr}
84+
; CHECK-NEXT: .cfi_def_cfa_offset 12
85+
; CHECK-NEXT: .cfi_offset lr, -4
86+
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
87+
; CHECK-NEXT: .cfi_offset r7, -12
88+
; CHECK-NEXT: .pad #4
89+
; CHECK-NEXT: sub sp, #4
90+
; CHECK-NEXT: .cfi_def_cfa_offset 16
91+
; CHECK-NEXT: subs r0, #1
92+
; CHECK-NEXT: bl g
93+
; CHECK-NEXT: adds r0, #1
94+
; CHECK-NEXT: add sp, #4
95+
; CHECK-NEXT: pop.w {r7, r12, lr}
96+
; CHECK-NEXT: aut r12, lr, sp
97+
; CHECK-NEXT: mrs r12, control
98+
; CHECK-NEXT: tst.w r12, #8
99+
; CHECK-NEXT: beq .LBB2_2
100+
; CHECK-NEXT: @ %bb.1: @ %entry
101+
; CHECK-NEXT: vmrs r12, fpscr
102+
; CHECK-NEXT: vmov d0, lr, lr
103+
; CHECK-NEXT: vmov d1, lr, lr
104+
; CHECK-NEXT: vmov d2, lr, lr
105+
; CHECK-NEXT: vmov d3, lr, lr
106+
; CHECK-NEXT: vmov d4, lr, lr
107+
; CHECK-NEXT: vmov d5, lr, lr
108+
; CHECK-NEXT: vmov d6, lr, lr
109+
; CHECK-NEXT: vmov d7, lr, lr
110+
; CHECK-NEXT: bic r12, r12, #159
111+
; CHECK-NEXT: bic r12, r12, #4026531840
112+
; CHECK-NEXT: vmsr fpscr, r12
113+
; CHECK-NEXT: .LBB2_2: @ %entry
114+
; CHECK-NEXT: mov r1, lr
115+
; CHECK-NEXT: mov r2, lr
116+
; CHECK-NEXT: mov r3, lr
117+
; CHECK-NEXT: mov r12, lr
118+
; CHECK-NEXT: msr apsr_nzcvq, lr
119+
; CHECK-NEXT: bxns lr
64120
entry:
65121
%sub = add nsw i32 %x, -1
66122
%call = tail call i32 @g(i32 %sub)
67123
%add = add nsw i32 %call, 1
68124
ret i32 %add
69125
}
70-
; CHECK-LABEL: f2:
71-
; CHECK: pac r12, lr, sp
72-
; CHECK-NEXT: .save {r7, lr}
73-
; CHECK-NEXT: push {r7, lr}
74-
; CHECK-NEXT: .cfi_def_cfa_offset 8
75-
; CHECK-NEXT: .cfi_offset lr, -4
76-
; CHECK-NEXT: .cfi_offset r7, -8
77-
; CHECK-NEXT: .save {ra_auth_code}
78-
; CHECK-NEXT: str r12, [sp, #-4]!
79-
; CHECK-NEXT: .cfi_def_cfa_offset 12
80-
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
81-
; CHECK-NEXT: .pad #4
82-
; CHECK-NEXT: sub sp, #4
83-
; CHECK-NEXT: .cfi_def_cfa_offset 16
84-
; ...
85-
; CHECK: add sp, #4
86-
; CHECK-NEXT: ldr r12, [sp], #4
87-
; CHECK-NEXT: pop.w {r7, lr}
88-
; CHECK-NEXT: aut r12, lr, sp
89-
; CHECK-NEXT: mrs r12, control
90-
; ...
91-
; CHECK: bxns lr
92126

93127
declare dso_local i32 @g(i32) local_unnamed_addr
94128

@@ -103,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe
103137

104138
; UNWIND-LABEL: FunctionAddress: 0x0
105139
; UNWIND: 0x00 ; vsp = vsp + 4
140+
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
106141
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
107-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
108-
; UNWIND-NEXT: 0xB0 ; finish
109-
; UNWIND-NEXT: 0xB0 ; finish
142+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
110143

111-
; UNWIND-LABEL: FunctionAddress: 0x24
112-
; UNWIND: 0xB4 ; pop ra_auth_code
113-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
114144

115-
; UNWIND-LABEL: FunctionAddress: 0x54
145+
; UNWIND-LABEL: FunctionAddress: 0x1E
146+
; UNWIND: 0x80 0x08 ; pop {r7}
147+
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
148+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
149+
150+
; UNWIND-LABEL: FunctionAddress: 0x48
116151
; UNWIND: 0x00 ; vsp = vsp + 4
152+
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
117153
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
118-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
119-
; UNWIND-NEXT: 0xB0 ; finish
120-
; UNWIND-NEXT: 0xB0 ; finish
154+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
121155

122156
; UNWIND-LABEL: 00000001 {{.*}} f0
123-
; UNWIND-LABEL: 00000025 {{.*}} f1
124-
; UNWIND-LABEL: 00000055 {{.*}} f2
157+
; UNWIND-LABEL: 0000001f {{.*}} f1
158+
; UNWIND-LABEL: 00000049 {{.*}} f2

0 commit comments

Comments
 (0)