Skip to content

Commit 86f60b7

Browse files
committed
ARM: use AAPCS-style prologues for embedded MachO.
Darwin prologues save their GPRs in two stages: a narrow push of r0-r7 & lr, followed by a wide push of the remaining registers if there are any. AAPCS uses a single push.w instruction. It turns out that, on average, enough registers get pushed that code is smaller in the AAPCS prologue, which is a nice property for M-class programmers. They also have other options available for back-traces, so can hopefully deal with the fact that FP & LR aren't adjacent in memory. rdar://problem/15909583 llvm-svn: 209895
1 parent eaef074 commit 86f60b7

File tree

5 files changed

+29
-28
lines changed

5 files changed

+29
-28
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,12 @@ using namespace llvm;
4545

4646
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
4747
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
48-
if (STI.isTargetMachO())
49-
FramePtr = ARM::R7;
50-
else if (STI.isTargetWindows())
48+
if (STI.isTargetMachO()) {
49+
if (STI.isTargetDarwin() || STI.isThumb1Only())
50+
FramePtr = ARM::R7;
51+
else
52+
FramePtr = ARM::R11;
53+
} else if (STI.isTargetWindows())
5154
FramePtr = ARM::R11;
5255
else // ARM EABI
5356
FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
220220
case ARM::R10:
221221
case ARM::R11:
222222
case ARM::R12:
223-
if (STI.isTargetMachO()) {
223+
if (STI.isTargetDarwin()) {
224224
GPRCS2Size += 4;
225225
break;
226226
}
@@ -380,7 +380,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
380380
case ARM::R10:
381381
case ARM::R11:
382382
case ARM::R12:
383-
if (STI.isTargetMachO())
383+
if (STI.isTargetDarwin())
384384
break;
385385
// fallthrough
386386
case ARM::R0:
@@ -445,7 +445,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
445445
case ARM::R10:
446446
case ARM::R11:
447447
case ARM::R12:
448-
if (STI.isTargetMachO()) {
448+
if (STI.isTargetDarwin()) {
449449
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
450450
unsigned Offset = MFI->getObjectOffset(FI);
451451
unsigned CFIIndex = MMI.addFrameInst(
@@ -810,7 +810,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
810810
unsigned LastReg = 0;
811811
for (; i != 0; --i) {
812812
unsigned Reg = CSI[i-1].getReg();
813-
if (!(Func)(Reg, STI.isTargetMachO())) continue;
813+
if (!(Func)(Reg, STI.isTargetDarwin())) continue;
814814

815815
// D-registers in the aligned area DPRCS2 are NOT spilled here.
816816
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -888,7 +888,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
888888
bool DeleteRet = false;
889889
for (; i != 0; --i) {
890890
unsigned Reg = CSI[i-1].getReg();
891-
if (!(Func)(Reg, STI.isTargetMachO())) continue;
891+
if (!(Func)(Reg, STI.isTargetDarwin())) continue;
892892

893893
// The aligned reloads from area DPRCS2 are not inserted here.
894894
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1438,7 +1438,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
14381438
if (Spilled) {
14391439
NumGPRSpills++;
14401440

1441-
if (!STI.isTargetMachO()) {
1441+
if (!STI.isTargetDarwin()) {
14421442
if (Reg == ARM::LR)
14431443
LRSpilled = true;
14441444
CS1Spilled = true;
@@ -1460,7 +1460,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
14601460
break;
14611461
}
14621462
} else {
1463-
if (!STI.isTargetMachO()) {
1463+
if (!STI.isTargetDarwin()) {
14641464
UnspilledCS1GPRs.push_back(Reg);
14651465
continue;
14661466
}

llvm/test/CodeGen/ARM/fold-stack-adjust.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ declare void @bar(i8*)
1212

1313
define void @check_simple() minsize {
1414
; CHECK-LABEL: check_simple:
15-
; CHECK: push {r3, r4, r5, r6, r7, lr}
15+
; CHECK: push.w {r7, r8, r9, r10, r11, lr}
1616
; CHECK-NOT: sub sp, sp,
1717
; ...
1818
; CHECK-NOT: add sp, sp,
19-
; CHECK: pop {r0, r1, r2, r3, r7, pc}
19+
; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
2020

2121
; CHECK-T1-LABEL: check_simple:
2222
; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
@@ -44,11 +44,11 @@ define void @check_simple() minsize {
4444

4545
define void @check_simple_too_big() minsize {
4646
; CHECK-LABEL: check_simple_too_big:
47-
; CHECK: push {r7, lr}
47+
; CHECK: push.w {r11, lr}
4848
; CHECK: sub sp,
4949
; ...
5050
; CHECK: add sp,
51-
; CHECK: pop {r7, pc}
51+
; CHECK: pop.w {r11, pc}
5252
%var = alloca i8, i32 64
5353
call void @bar(i8* %var)
5454
ret void
@@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize {
9393
; folded in except that doing so would clobber the value being returned.
9494
define i64 @check_no_return_clobber() minsize {
9595
; CHECK-LABEL: check_no_return_clobber:
96-
; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
96+
; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr}
9797
; CHECK-NOT: sub sp,
9898
; ...
9999
; CHECK: add sp, #24
100-
; CHECK: pop {r7, pc}
100+
; CHECK: pop.w {r11, pc}
101101

102102
; Just to keep iOS FileCheck within previous function:
103103
; CHECK-IOS-LABEL: check_no_return_clobber:
@@ -176,9 +176,9 @@ define void @test_varsize(...) minsize {
176176

177177
; CHECK-LABEL: test_varsize:
178178
; CHECK: sub sp, #16
179-
; CHECK: push {r5, r6, r7, lr}
179+
; CHECK: push.w {r9, r10, r11, lr}
180180
; ...
181-
; CHECK: pop.w {r2, r3, r7, lr}
181+
; CHECK: pop.w {r2, r3, r11, lr}
182182
; CHECK: add sp, #16
183183
; CHECK: bx lr
184184

llvm/test/CodeGen/ARM/interrupt-attr.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
3535
; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
3636
; appropriate sentinel so no special return needed).
3737
; CHECK-M-LABEL: irq_fn:
38-
; CHECK-M: push {r4, r6, r7, lr}
39-
; CHECK-M: add r7, sp, #8
38+
; CHECK-M: push.w {r4, r10, r11, lr}
39+
; CHECK-M: add.w r11, sp, #8
4040
; CHECK-M: mov r4, sp
4141
; CHECK-M: bic r4, r4, #7
4242
; CHECK-M: mov sp, r4
4343
; CHECK-M: blx _bar
44-
; CHECK-M: sub.w r4, r7, #8
44+
; CHECK-M: sub.w r4, r11, #8
4545
; CHECK-M: mov sp, r4
46-
; CHECK-M: pop {r4, r6, r7, pc}
46+
; CHECK-M: pop.w {r4, r10, r11, pc}
4747

4848
call arm_aapcscc void @bar()
4949
ret void

llvm/test/CodeGen/ARM/none-macho.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ define i32 @test_frame_ptr() {
4848
; CHECK-LABEL: test_frame_ptr:
4949
call void @test_trap()
5050

51-
; Frame pointer is r7 as for Darwin
52-
; CHECK: mov r7, sp
51+
; Frame pointer is r11.
52+
; CHECK: mov r11, sp
5353
ret i32 42
5454
}
5555

@@ -63,11 +63,9 @@ define void @test_two_areas(%big_arr* %addr) {
6363
; This goes with the choice of r7 as FP (largely). FP and LR have to be stored
6464
; consecutively on the stack for the frame record to be valid, which means we
6565
; need the 2 register-save areas employed by iOS.
66-
; CHECK-NON-FAST: push {r4, r5, r6, r7, lr}
67-
; CHECK-NON-FAST: push.w {r8, r9, r10, r11}
66+
; CHECK-NON-FAST: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
6867
; ...
69-
; CHECK-NON-FAST: pop.w {r8, r9, r10, r11}
70-
; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc}
68+
; CHECK-NON-FAST: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
7169
ret void
7270
}
7371

0 commit comments

Comments
 (0)