Skip to content

Commit 2a146e8

Browse files
committed
[ARM] Fix llvm.returnaddress for Thumb1 with R11 frame-pointer
When the llvm.returnaddress intrinsic is used, the LR is marked as live-in to the function, so it must be preserved through the prologue. This is normally fine, but there is one case for Thumb1 where we use LR as a temporary in the prologue to set up a frame chain using r11 as the frame pointer. There are no other registers guaranteed to be free to do this, so we have to re-load LR from the stack after pushing the callee saved registers.
1 parent 3dd3280 commit 2a146e8

File tree

3 files changed

+72
-6
lines changed

3 files changed

+72
-6
lines changed

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,10 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
13091309
default:
13101310
MI->print(errs());
13111311
llvm_unreachable("Unsupported opcode for unwinding information");
1312+
case ARM::tLDRspi:
1313+
// Used to restore LR in a prologue which uses it as a temporary, has
1314+
// no effect on unwind tables.
1315+
return;
13121316
case ARM::MOVr:
13131317
case ARM::tMOVr:
13141318
Offset = 0;

llvm/lib/Target/ARM/Thumb1FrameLowering.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <iterator>
4444
#include <vector>
4545

46+
#define DEBUG_TYPE "arm-frame-lowering"
47+
4648
using namespace llvm;
4749

4850
Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
@@ -277,6 +279,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
277279
}
278280
}
279281

282+
// Skip past this code sequence, which is emitted to restore the LR if it is
283+
// live-in and clobbered by the frame record setup code:
284+
// ldr rX, [sp, #Y]
285+
// mov lr, rX
286+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tLDRspi &&
287+
MBBI->getFlag(MachineInstr::FrameSetup)) {
288+
++MBBI;
289+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr &&
290+
MBBI->getOperand(0).getReg() == ARM::LR &&
291+
MBBI->getFlag(MachineInstr::FrameSetup)) {
292+
++MBBI;
293+
}
294+
}
295+
280296
// Determine starting offsets of spill areas.
281297
unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize -
282298
(FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize);
@@ -857,7 +873,8 @@ static void pushRegsToStack(MachineBasicBlock &MBB,
857873
MachineBasicBlock::iterator MI,
858874
const TargetInstrInfo &TII,
859875
const std::set<Register> &RegsToSave,
860-
const std::set<Register> &CopyRegs) {
876+
const std::set<Register> &CopyRegs,
877+
bool &UsedLRAsTemp) {
861878
MachineFunction &MF = *MBB.getParent();
862879
const MachineRegisterInfo &MRI = MF.getRegInfo();
863880
DebugLoc DL;
@@ -914,6 +931,8 @@ static void pushRegsToStack(MachineBasicBlock &MBB,
914931
bool isKill = !MRI.isLiveIn(*HiRegToSave);
915932
if (isKill && !MRI.isReserved(*HiRegToSave))
916933
MBB.addLiveIn(*HiRegToSave);
934+
if (*CopyRegIt == ARM::LR)
935+
UsedLRAsTemp = true;
917936

918937
// Emit a MOV from the high reg to the low reg.
919938
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
@@ -1093,6 +1112,8 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
10931112
// In case FP is a high reg, we need a separate push sequence to generate
10941113
// a correct Frame Record
10951114
bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
1115+
bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM::LR);
1116+
bool UsedLRAsTemp = false;
10961117

10971118
std::set<Register> FrameRecord;
10981119
std::set<Register> SpilledGPRs;
@@ -1104,7 +1125,7 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
11041125
SpilledGPRs.insert(Reg);
11051126
}
11061127

1107-
pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
1128+
pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}, UsedLRAsTemp);
11081129

11091130
// Determine intermediate registers which can be used for pushing high regs:
11101131
// - Spilled low regs
@@ -1118,7 +1139,34 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
11181139
if (!MF.getRegInfo().isLiveIn(ArgReg))
11191140
CopyRegs.insert(ArgReg);
11201141

1121-
pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
1142+
pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs, UsedLRAsTemp);
1143+
1144+
// If the push sequence used LR as a temporary, and LR is live-in (for
1145+
// example because it is used by the llvm.returnaddress intrinsic), then we
1146+
// need to reload it from the stack. Thumb1 does not have a load instruction
1147+
// which can use LR, so we need to load into a temporary low register and
1148+
// copy to LR.
1149+
if (LRLiveIn && UsedLRAsTemp) {
1150+
auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
1151+
OrderedCopyRegs.rend(),
1152+
CopyRegs);
1153+
assert(CopyRegIt != OrderedCopyRegs.rend());
1154+
unsigned NumRegsPushed = FrameRecord.size() + SpilledGPRs.size();
1155+
LLVM_DEBUG(
1156+
dbgs() << "LR is live-in but clobbered in prologue, restoring via "
1157+
<< RegInfo->getName(*CopyRegIt) << "\n");
1158+
1159+
BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tLDRspi), *CopyRegIt)
1160+
.addReg(ARM::SP)
1161+
.addImm(NumRegsPushed - 1)
1162+
.add(predOps(ARMCC::AL))
1163+
.setMIFlags(MachineInstr::FrameSetup);
1164+
1165+
BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tMOVr), ARM::LR)
1166+
.addReg(*CopyRegIt)
1167+
.add(predOps(ARMCC::AL))
1168+
.setMIFlags(MachineInstr::FrameSetup);
1169+
}
11221170

11231171
return true;
11241172
}

llvm/test/CodeGen/Thumb/returnaddress.ll

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=thumbv6m-none-eabi -frame-pointer=none | FileCheck %s --check-prefix=FP-NONE
3-
; RUN: llc < %s -mtriple=thumbv6m-none-eabi -frame-pointer=all | FileCheck %s --check-prefix=FP-ALL
4-
; RUN: llc < %s -mtriple=thumbv6m-none-eabi -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=FP-AAPCS
2+
; RUN: llc < %s -mtriple=thumbv6m-none-eabi -frame-pointer=none -verify-machineinstrs | FileCheck %s --check-prefix=FP-NONE
3+
; RUN: llc < %s -mtriple=thumbv6m-none-eabi -frame-pointer=all -verify-machineinstrs | FileCheck %s --check-prefix=FP-ALL
4+
; RUN: llc < %s -mtriple=thumbv6m-none-eabi -frame-pointer=all -mattr=+aapcs-frame-chain -verify-machineinstrs | FileCheck %s --check-prefix=FP-AAPCS
55

66
define void @ra_call() {
77
; FP-NONE-LABEL: ra_call:
@@ -31,6 +31,8 @@ define void @ra_call() {
3131
; FP-AAPCS-NEXT: push {lr}
3232
; FP-AAPCS-NEXT: .setfp r11, sp
3333
; FP-AAPCS-NEXT: mov r11, sp
34+
; FP-AAPCS-NEXT: ldr r3, [sp, #4]
35+
; FP-AAPCS-NEXT: mov lr, r3
3436
; FP-AAPCS-NEXT: mov r0, lr
3537
; FP-AAPCS-NEXT: bl sink_ptr
3638
; FP-AAPCS-NEXT: pop {r0}
@@ -66,6 +68,8 @@ define ptr @ra_return() {
6668
; FP-AAPCS-NEXT: push {lr}
6769
; FP-AAPCS-NEXT: .setfp r11, sp
6870
; FP-AAPCS-NEXT: mov r11, sp
71+
; FP-AAPCS-NEXT: ldr r3, [sp, #4]
72+
; FP-AAPCS-NEXT: mov lr, r3
6973
; FP-AAPCS-NEXT: mov r0, lr
7074
; FP-AAPCS-NEXT: pop {r1}
7175
; FP-AAPCS-NEXT: mov r11, r1
@@ -107,6 +111,8 @@ define ptr @callee_saved_low() {
107111
; FP-AAPCS-NEXT: mov r11, sp
108112
; FP-AAPCS-NEXT: .save {r4, r5}
109113
; FP-AAPCS-NEXT: push {r4, r5}
114+
; FP-AAPCS-NEXT: ldr r5, [sp, #12]
115+
; FP-AAPCS-NEXT: mov lr, r5
110116
; FP-AAPCS-NEXT: mov r0, lr
111117
; FP-AAPCS-NEXT: @APP
112118
; FP-AAPCS-NEXT: @NO_APP
@@ -166,6 +172,8 @@ define ptr @callee_saved_high() {
166172
; FP-AAPCS-NEXT: mov r2, r8
167173
; FP-AAPCS-NEXT: .save {r8, r9}
168174
; FP-AAPCS-NEXT: push {r2, r3}
175+
; FP-AAPCS-NEXT: ldr r3, [sp, #12]
176+
; FP-AAPCS-NEXT: mov lr, r3
169177
; FP-AAPCS-NEXT: mov r0, lr
170178
; FP-AAPCS-NEXT: @APP
171179
; FP-AAPCS-NEXT: @NO_APP
@@ -236,6 +244,8 @@ define ptr @large_alloca() {
236244
; FP-AAPCS-NEXT: mov r11, sp
237245
; FP-AAPCS-NEXT: .save {r4, r7}
238246
; FP-AAPCS-NEXT: push {r4, r7}
247+
; FP-AAPCS-NEXT: ldr r7, [sp, #12]
248+
; FP-AAPCS-NEXT: mov lr, r7
239249
; FP-AAPCS-NEXT: ldr r7, .LCPI4_0
240250
; FP-AAPCS-NEXT: .pad #2000
241251
; FP-AAPCS-NEXT: add sp, r7
@@ -315,6 +325,8 @@ define ptr @var_alloca(i32 %size) {
315325
; FP-AAPCS-NEXT: mov r11, sp
316326
; FP-AAPCS-NEXT: .save {r4, r6}
317327
; FP-AAPCS-NEXT: push {r4, r6}
328+
; FP-AAPCS-NEXT: ldr r6, [sp, #12]
329+
; FP-AAPCS-NEXT: mov lr, r6
318330
; FP-AAPCS-NEXT: mov r6, sp
319331
; FP-AAPCS-NEXT: mov r4, lr
320332
; FP-AAPCS-NEXT: adds r0, r0, #7
@@ -375,6 +387,8 @@ define i32 @all_arg_regs(i32 %a, i32 %b, i32 %c, i32 %d) {
375387
; FP-AAPCS-NEXT: mov r11, sp
376388
; FP-AAPCS-NEXT: .save {r4, r7}
377389
; FP-AAPCS-NEXT: push {r4, r7}
390+
; FP-AAPCS-NEXT: ldr r7, [sp, #12]
391+
; FP-AAPCS-NEXT: mov lr, r7
378392
; FP-AAPCS-NEXT: mov r4, lr
379393
; FP-AAPCS-NEXT: adds r0, r0, r1
380394
; FP-AAPCS-NEXT: adds r0, r0, r2

0 commit comments

Comments
 (0)