Skip to content

Commit 75a3296

Browse files
committed
[ARM] Optimise Thumb1 frame record creation to not clobber LR
When setting up frame records using r11 as the frame pointer for Thumb1, we currently use LR as a temporary. This requires us to re-load it in cases where LR is live into the function, so it is better to use an argument register if one is free.
1 parent 2a146e8 commit 75a3296

File tree

3 files changed

+38
-35
lines changed

3 files changed

+38
-35
lines changed

llvm/lib/Target/ARM/Thumb1FrameLowering.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,22 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
11251125
SpilledGPRs.insert(Reg);
11261126
}
11271127

1128-
pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}, UsedLRAsTemp);
1128+
// Determine intermediate registers which can be used for pushing the frame
1129+
// record:
1130+
// - Unused argument registers
1131+
// - LR: This is possible because the first PUSH will save it on the stack,
1132+
// so it is free to be used as a temporary for the second. However, it
1133+
// is possible for LR to be live-in to the function, in which case we
1134+
// will need to restore it later in the prologue, so we only use this
1135+
// if there are no free argument registers.
1136+
std::set<Register> FrameRecordCopyRegs;
1137+
for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
1138+
if (!MF.getRegInfo().isLiveIn(ArgReg))
1139+
FrameRecordCopyRegs.insert(ArgReg);
1140+
if (FrameRecordCopyRegs.empty())
1141+
FrameRecordCopyRegs.insert(ARM::LR);
1142+
1143+
pushRegsToStack(MBB, MI, TII, FrameRecord, FrameRecordCopyRegs, UsedLRAsTemp);
11291144

11301145
// Determine intermediate registers which can be used for pushing high regs:
11311146
// - Spilled low regs

llvm/test/CodeGen/Thumb/frame-chain.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ define dso_local noundef i32 @leaf(i32 noundef %0) {
2323
; LEAF-FP-AAPCS: @ %bb.0:
2424
; LEAF-FP-AAPCS-NEXT: .save {lr}
2525
; LEAF-FP-AAPCS-NEXT: push {lr}
26-
; LEAF-FP-AAPCS-NEXT: mov lr, r11
26+
; LEAF-FP-AAPCS-NEXT: mov r3, r11
2727
; LEAF-FP-AAPCS-NEXT: .save {r11}
28-
; LEAF-FP-AAPCS-NEXT: push {lr}
28+
; LEAF-FP-AAPCS-NEXT: push {r3}
2929
; LEAF-FP-AAPCS-NEXT: .setfp r11, sp
3030
; LEAF-FP-AAPCS-NEXT: mov r11, sp
3131
; LEAF-FP-AAPCS-NEXT: .pad #4
@@ -80,9 +80,9 @@ define dso_local noundef i32 @non_leaf(i32 noundef %0) {
8080
; FP-AAPCS: @ %bb.0:
8181
; FP-AAPCS-NEXT: .save {lr}
8282
; FP-AAPCS-NEXT: push {lr}
83-
; FP-AAPCS-NEXT: mov lr, r11
83+
; FP-AAPCS-NEXT: mov r3, r11
8484
; FP-AAPCS-NEXT: .save {r11}
85-
; FP-AAPCS-NEXT: push {lr}
85+
; FP-AAPCS-NEXT: push {r3}
8686
; FP-AAPCS-NEXT: .setfp r11, sp
8787
; FP-AAPCS-NEXT: mov r11, sp
8888
; FP-AAPCS-NEXT: .pad #8
@@ -161,9 +161,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
161161
; FP-AAPCS: @ %bb.0:
162162
; FP-AAPCS-NEXT: .save {lr}
163163
; FP-AAPCS-NEXT: push {lr}
164-
; FP-AAPCS-NEXT: mov lr, r11
164+
; FP-AAPCS-NEXT: mov r3, r11
165165
; FP-AAPCS-NEXT: .save {r11}
166-
; FP-AAPCS-NEXT: push {lr}
166+
; FP-AAPCS-NEXT: push {r3}
167167
; FP-AAPCS-NEXT: .setfp r11, sp
168168
; FP-AAPCS-NEXT: mov r11, sp
169169
; FP-AAPCS-NEXT: .save {r4, r6}
@@ -227,9 +227,9 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
227227
; NOFP-AAPCS: @ %bb.0:
228228
; NOFP-AAPCS-NEXT: .save {lr}
229229
; NOFP-AAPCS-NEXT: push {lr}
230-
; NOFP-AAPCS-NEXT: mov lr, r11
230+
; NOFP-AAPCS-NEXT: mov r3, r11
231231
; NOFP-AAPCS-NEXT: .save {r11}
232-
; NOFP-AAPCS-NEXT: push {lr}
232+
; NOFP-AAPCS-NEXT: push {r3}
233233
; NOFP-AAPCS-NEXT: .setfp r11, sp
234234
; NOFP-AAPCS-NEXT: mov r11, sp
235235
; NOFP-AAPCS-NEXT: .save {r4, r6}

llvm/test/CodeGen/Thumb/returnaddress.ll

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,11 @@ define void @ra_call() {
2626
; FP-AAPCS: @ %bb.0: @ %entry
2727
; FP-AAPCS-NEXT: .save {lr}
2828
; FP-AAPCS-NEXT: push {lr}
29-
; FP-AAPCS-NEXT: mov lr, r11
29+
; FP-AAPCS-NEXT: mov r3, r11
3030
; FP-AAPCS-NEXT: .save {r11}
31-
; FP-AAPCS-NEXT: push {lr}
31+
; FP-AAPCS-NEXT: push {r3}
3232
; FP-AAPCS-NEXT: .setfp r11, sp
3333
; FP-AAPCS-NEXT: mov r11, sp
34-
; FP-AAPCS-NEXT: ldr r3, [sp, #4]
35-
; FP-AAPCS-NEXT: mov lr, r3
3634
; FP-AAPCS-NEXT: mov r0, lr
3735
; FP-AAPCS-NEXT: bl sink_ptr
3836
; FP-AAPCS-NEXT: pop {r0}
@@ -63,13 +61,11 @@ define ptr @ra_return() {
6361
; FP-AAPCS: @ %bb.0: @ %entry
6462
; FP-AAPCS-NEXT: .save {lr}
6563
; FP-AAPCS-NEXT: push {lr}
66-
; FP-AAPCS-NEXT: mov lr, r11
64+
; FP-AAPCS-NEXT: mov r3, r11
6765
; FP-AAPCS-NEXT: .save {r11}
68-
; FP-AAPCS-NEXT: push {lr}
66+
; FP-AAPCS-NEXT: push {r3}
6967
; FP-AAPCS-NEXT: .setfp r11, sp
7068
; FP-AAPCS-NEXT: mov r11, sp
71-
; FP-AAPCS-NEXT: ldr r3, [sp, #4]
72-
; FP-AAPCS-NEXT: mov lr, r3
7369
; FP-AAPCS-NEXT: mov r0, lr
7470
; FP-AAPCS-NEXT: pop {r1}
7571
; FP-AAPCS-NEXT: mov r11, r1
@@ -104,15 +100,13 @@ define ptr @callee_saved_low() {
104100
; FP-AAPCS: @ %bb.0: @ %entry
105101
; FP-AAPCS-NEXT: .save {lr}
106102
; FP-AAPCS-NEXT: push {lr}
107-
; FP-AAPCS-NEXT: mov lr, r11
103+
; FP-AAPCS-NEXT: mov r3, r11
108104
; FP-AAPCS-NEXT: .save {r11}
109-
; FP-AAPCS-NEXT: push {lr}
105+
; FP-AAPCS-NEXT: push {r3}
110106
; FP-AAPCS-NEXT: .setfp r11, sp
111107
; FP-AAPCS-NEXT: mov r11, sp
112108
; FP-AAPCS-NEXT: .save {r4, r5}
113109
; FP-AAPCS-NEXT: push {r4, r5}
114-
; FP-AAPCS-NEXT: ldr r5, [sp, #12]
115-
; FP-AAPCS-NEXT: mov lr, r5
116110
; FP-AAPCS-NEXT: mov r0, lr
117111
; FP-AAPCS-NEXT: @APP
118112
; FP-AAPCS-NEXT: @NO_APP
@@ -163,17 +157,15 @@ define ptr @callee_saved_high() {
163157
; FP-AAPCS: @ %bb.0: @ %entry
164158
; FP-AAPCS-NEXT: .save {lr}
165159
; FP-AAPCS-NEXT: push {lr}
166-
; FP-AAPCS-NEXT: mov lr, r11
160+
; FP-AAPCS-NEXT: mov r3, r11
167161
; FP-AAPCS-NEXT: .save {r11}
168-
; FP-AAPCS-NEXT: push {lr}
162+
; FP-AAPCS-NEXT: push {r3}
169163
; FP-AAPCS-NEXT: .setfp r11, sp
170164
; FP-AAPCS-NEXT: mov r11, sp
171165
; FP-AAPCS-NEXT: mov r3, r9
172166
; FP-AAPCS-NEXT: mov r2, r8
173167
; FP-AAPCS-NEXT: .save {r8, r9}
174168
; FP-AAPCS-NEXT: push {r2, r3}
175-
; FP-AAPCS-NEXT: ldr r3, [sp, #12]
176-
; FP-AAPCS-NEXT: mov lr, r3
177169
; FP-AAPCS-NEXT: mov r0, lr
178170
; FP-AAPCS-NEXT: @APP
179171
; FP-AAPCS-NEXT: @NO_APP
@@ -237,15 +229,13 @@ define ptr @large_alloca() {
237229
; FP-AAPCS: @ %bb.0: @ %entry
238230
; FP-AAPCS-NEXT: .save {lr}
239231
; FP-AAPCS-NEXT: push {lr}
240-
; FP-AAPCS-NEXT: mov lr, r11
232+
; FP-AAPCS-NEXT: mov r3, r11
241233
; FP-AAPCS-NEXT: .save {r11}
242-
; FP-AAPCS-NEXT: push {lr}
234+
; FP-AAPCS-NEXT: push {r3}
243235
; FP-AAPCS-NEXT: .setfp r11, sp
244236
; FP-AAPCS-NEXT: mov r11, sp
245237
; FP-AAPCS-NEXT: .save {r4, r7}
246238
; FP-AAPCS-NEXT: push {r4, r7}
247-
; FP-AAPCS-NEXT: ldr r7, [sp, #12]
248-
; FP-AAPCS-NEXT: mov lr, r7
249239
; FP-AAPCS-NEXT: ldr r7, .LCPI4_0
250240
; FP-AAPCS-NEXT: .pad #2000
251241
; FP-AAPCS-NEXT: add sp, r7
@@ -318,15 +308,13 @@ define ptr @var_alloca(i32 %size) {
318308
; FP-AAPCS: @ %bb.0: @ %entry
319309
; FP-AAPCS-NEXT: .save {lr}
320310
; FP-AAPCS-NEXT: push {lr}
321-
; FP-AAPCS-NEXT: mov lr, r11
311+
; FP-AAPCS-NEXT: mov r3, r11
322312
; FP-AAPCS-NEXT: .save {r11}
323-
; FP-AAPCS-NEXT: push {lr}
313+
; FP-AAPCS-NEXT: push {r3}
324314
; FP-AAPCS-NEXT: .setfp r11, sp
325315
; FP-AAPCS-NEXT: mov r11, sp
326316
; FP-AAPCS-NEXT: .save {r4, r6}
327317
; FP-AAPCS-NEXT: push {r4, r6}
328-
; FP-AAPCS-NEXT: ldr r6, [sp, #12]
329-
; FP-AAPCS-NEXT: mov lr, r6
330318
; FP-AAPCS-NEXT: mov r6, sp
331319
; FP-AAPCS-NEXT: mov r4, lr
332320
; FP-AAPCS-NEXT: adds r0, r0, #7
@@ -433,9 +421,9 @@ define ptr @ra_depth_1() {
433421
; FP-AAPCS: @ %bb.0: @ %entry
434422
; FP-AAPCS-NEXT: .save {lr}
435423
; FP-AAPCS-NEXT: push {lr}
436-
; FP-AAPCS-NEXT: mov lr, r11
424+
; FP-AAPCS-NEXT: mov r3, r11
437425
; FP-AAPCS-NEXT: .save {r11}
438-
; FP-AAPCS-NEXT: push {lr}
426+
; FP-AAPCS-NEXT: push {r3}
439427
; FP-AAPCS-NEXT: .setfp r11, sp
440428
; FP-AAPCS-NEXT: mov r11, sp
441429
; FP-AAPCS-NEXT: mov r0, r11

0 commit comments

Comments
 (0)