Skip to content

Commit 0c0f765

Browse files
authored
[ARM] Fix llvm.returnaddress for Thumb1 with R11 frame-pointer (#117735)
When the llvm.returnaddress intrinsic is used, the LR is marked as live-in to the function, so it must be preserved through the prologue. This is normally fine, but there is one case for Thumb1 where we use LR as a temporary in the prologue to set up a frame chain using r11 as the frame pointer. There are no other registers guaranteed to be free to do this, so we have to re-load LR from the stack after pushing the callee saved registers.
1 parent 3b8426d commit 0c0f765

File tree

4 files changed

+517
-11
lines changed

4 files changed

+517
-11
lines changed

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,10 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
13091309
default:
13101310
MI->print(errs());
13111311
llvm_unreachable("Unsupported opcode for unwinding information");
1312+
case ARM::tLDRspi:
1313+
// Used to restore LR in a prologue which uses it as a temporary, has
1314+
// no effect on unwind tables.
1315+
return;
13121316
case ARM::MOVr:
13131317
case ARM::tMOVr:
13141318
Offset = 0;

llvm/lib/Target/ARM/Thumb1FrameLowering.cpp

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <iterator>
4444
#include <vector>
4545

46+
#define DEBUG_TYPE "arm-frame-lowering"
47+
4648
using namespace llvm;
4749

4850
Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
@@ -277,6 +279,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
277279
}
278280
}
279281

282+
// Skip past this code sequence, which is emitted to restore the LR if it is
283+
// live-in and clobbered by the frame record setup code:
284+
// ldr rX, [sp, #Y]
285+
// mov lr, rX
286+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tLDRspi &&
287+
MBBI->getFlag(MachineInstr::FrameSetup)) {
288+
++MBBI;
289+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr &&
290+
MBBI->getOperand(0).getReg() == ARM::LR &&
291+
MBBI->getFlag(MachineInstr::FrameSetup)) {
292+
++MBBI;
293+
}
294+
}
295+
280296
// Determine starting offsets of spill areas.
281297
unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize -
282298
(FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize);
@@ -857,7 +873,8 @@ static void pushRegsToStack(MachineBasicBlock &MBB,
857873
MachineBasicBlock::iterator MI,
858874
const TargetInstrInfo &TII,
859875
const std::set<Register> &RegsToSave,
860-
const std::set<Register> &CopyRegs) {
876+
const std::set<Register> &CopyRegs,
877+
bool &UsedLRAsTemp) {
861878
MachineFunction &MF = *MBB.getParent();
862879
const MachineRegisterInfo &MRI = MF.getRegInfo();
863880
DebugLoc DL;
@@ -914,6 +931,8 @@ static void pushRegsToStack(MachineBasicBlock &MBB,
914931
bool isKill = !MRI.isLiveIn(*HiRegToSave);
915932
if (isKill && !MRI.isReserved(*HiRegToSave))
916933
MBB.addLiveIn(*HiRegToSave);
934+
if (*CopyRegIt == ARM::LR)
935+
UsedLRAsTemp = true;
917936

918937
// Emit a MOV from the high reg to the low reg.
919938
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
@@ -1093,6 +1112,8 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
10931112
// In case FP is a high reg, we need a separate push sequence to generate
10941113
// a correct Frame Record
10951114
bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
1115+
bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM::LR);
1116+
bool UsedLRAsTemp = false;
10961117

10971118
std::set<Register> FrameRecord;
10981119
std::set<Register> SpilledGPRs;
@@ -1104,7 +1125,22 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
11041125
SpilledGPRs.insert(Reg);
11051126
}
11061127

1107-
pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
1128+
// Determine intermediate registers which can be used for pushing the frame
1129+
// record:
1130+
// - Unused argument registers
1131+
// - LR: This is possible because the first PUSH will save it on the stack,
1132+
// so it is free to be used as a temporary for the second. However, it
1133+
// is possible for LR to be live-in to the function, in which case we
1134+
// will need to restore it later in the prologue, so we only use this
1135+
// if there are no free argument registers.
1136+
std::set<Register> FrameRecordCopyRegs;
1137+
for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
1138+
if (!MF.getRegInfo().isLiveIn(ArgReg))
1139+
FrameRecordCopyRegs.insert(ArgReg);
1140+
if (FrameRecordCopyRegs.empty())
1141+
FrameRecordCopyRegs.insert(ARM::LR);
1142+
1143+
pushRegsToStack(MBB, MI, TII, FrameRecord, FrameRecordCopyRegs, UsedLRAsTemp);
11081144

11091145
// Determine intermediate registers which can be used for pushing high regs:
11101146
// - Spilled low regs
@@ -1118,7 +1154,33 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
11181154
if (!MF.getRegInfo().isLiveIn(ArgReg))
11191155
CopyRegs.insert(ArgReg);
11201156

1121-
pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
1157+
pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs, UsedLRAsTemp);
1158+
1159+
// If the push sequence used LR as a temporary, and LR is live-in (for
1160+
// example because it is used by the llvm.returnaddress intrinsic), then we
1161+
// need to reload it from the stack. Thumb1 does not have a load instruction
1162+
// which can use LR, so we need to load into a temporary low register and
1163+
// copy to LR.
1164+
if (LRLiveIn && UsedLRAsTemp) {
1165+
auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
1166+
OrderedCopyRegs.rend(), CopyRegs);
1167+
assert(CopyRegIt != OrderedCopyRegs.rend());
1168+
unsigned NumRegsPushed = FrameRecord.size() + SpilledGPRs.size();
1169+
LLVM_DEBUG(
1170+
dbgs() << "LR is live-in but clobbered in prologue, restoring via "
1171+
<< RegInfo->getName(*CopyRegIt) << "\n");
1172+
1173+
BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tLDRspi), *CopyRegIt)
1174+
.addReg(ARM::SP)
1175+
.addImm(NumRegsPushed - 1)
1176+
.add(predOps(ARMCC::AL))
1177+
.setMIFlags(MachineInstr::FrameSetup);
1178+
1179+
BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tMOVr), ARM::LR)
1180+
.addReg(*CopyRegIt)
1181+
.add(predOps(ARMCC::AL))
1182+
.setMIFlags(MachineInstr::FrameSetup);
1183+
}
11221184

11231185
return true;
11241186
}

llvm/test/CodeGen/Thumb/frame-chain.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ define dso_local noundef i32 @leaf(i32 noundef %0) {
2323
; LEAF-FP-AAPCS: @ %bb.0:
2424
; LEAF-FP-AAPCS-NEXT: .save {lr}
2525
; LEAF-FP-AAPCS-NEXT: push {lr}
26-
; LEAF-FP-AAPCS-NEXT: mov lr, r11
26+
; LEAF-FP-AAPCS-NEXT: mov r3, r11
2727
; LEAF-FP-AAPCS-NEXT: .save {r11}
28-
; LEAF-FP-AAPCS-NEXT: push {lr}
28+
; LEAF-FP-AAPCS-NEXT: push {r3}
2929
; LEAF-FP-AAPCS-NEXT: .setfp r11, sp
3030
; LEAF-FP-AAPCS-NEXT: mov r11, sp
3131
; LEAF-FP-AAPCS-NEXT: .pad #4
@@ -80,9 +80,9 @@ define dso_local noundef i32 @non_leaf(i32 noundef %0) {
8080
; FP-AAPCS: @ %bb.0:
8181
; FP-AAPCS-NEXT: .save {lr}
8282
; FP-AAPCS-NEXT: push {lr}
83-
; FP-AAPCS-NEXT: mov lr, r11
83+
; FP-AAPCS-NEXT: mov r3, r11
8484
; FP-AAPCS-NEXT: .save {r11}
85-
; FP-AAPCS-NEXT: push {lr}
85+
; FP-AAPCS-NEXT: push {r3}
8686
; FP-AAPCS-NEXT: .setfp r11, sp
8787
; FP-AAPCS-NEXT: mov r11, sp
8888
; FP-AAPCS-NEXT: .pad #8
@@ -161,9 +161,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
161161
; FP-AAPCS: @ %bb.0:
162162
; FP-AAPCS-NEXT: .save {lr}
163163
; FP-AAPCS-NEXT: push {lr}
164-
; FP-AAPCS-NEXT: mov lr, r11
164+
; FP-AAPCS-NEXT: mov r3, r11
165165
; FP-AAPCS-NEXT: .save {r11}
166-
; FP-AAPCS-NEXT: push {lr}
166+
; FP-AAPCS-NEXT: push {r3}
167167
; FP-AAPCS-NEXT: .setfp r11, sp
168168
; FP-AAPCS-NEXT: mov r11, sp
169169
; FP-AAPCS-NEXT: .save {r4, r6}
@@ -227,9 +227,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
227227
; NOFP-AAPCS: @ %bb.0:
228228
; NOFP-AAPCS-NEXT: .save {lr}
229229
; NOFP-AAPCS-NEXT: push {lr}
230-
; NOFP-AAPCS-NEXT: mov lr, r11
230+
; NOFP-AAPCS-NEXT: mov r3, r11
231231
; NOFP-AAPCS-NEXT: .save {r11}
232-
; NOFP-AAPCS-NEXT: push {lr}
232+
; NOFP-AAPCS-NEXT: push {r3}
233233
; NOFP-AAPCS-NEXT: .setfp r11, sp
234234
; NOFP-AAPCS-NEXT: mov r11, sp
235235
; NOFP-AAPCS-NEXT: .save {r4, r6}

0 commit comments

Comments
 (0)