Skip to content

Commit 4e2364a

Browse files
wangleiatSixWeining
authored andcommitted
[LoongArch] Add emergency spill slot for GPR for large frames
An emergency spill slot is created when the stack size cannot be represented by an 11-bit signed number. This patch also modifies how the `sp` is adjusted in the prologue. `RegScavenger` will place the spill instruction before the prologue if a VReg is created in the prologue. This will pollute the caller's stack data. Therefore, until there is better way, we just use the `addi.w/d` instruction for stack adjustment to ensure that VReg will not be created. (RISCV has the same issue #58286) Due to the addition of emergency spill slot, some test cases that use exact stacksize need to be updated. Differential Revision: https://reviews.llvm.org/D135757
1 parent 9bb1e21 commit 4e2364a

File tree

6 files changed

+271
-92
lines changed

6 files changed

+271
-92
lines changed

llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,34 @@ void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const {
118118
MFI.setStackSize(FrameSize);
119119
}
120120

121+
void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
122+
MachineFunction &MF, RegScavenger *RS) const {
123+
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
124+
const TargetRegisterClass &RC = LoongArch::GPRRegClass;
125+
MachineFrameInfo &MFI = MF.getFrameInfo();
126+
127+
// estimateStackSize has been observed to under-estimate the final stack
128+
// size, so give ourselves wiggle-room by checking for stack size
129+
// representable an 11-bit signed field rather than 12-bits.
130+
if (isInt<11>(MFI.estimateStackSize(MF)))
131+
return;
132+
133+
// Create an emergency spill slot.
134+
int FI =
135+
MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), false);
136+
RS->addScavengingFrameIndex(FI);
137+
LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
138+
<< ") as the emergency spill slot.\n");
139+
}
140+
121141
void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
122142
MachineBasicBlock &MBB) const {
123143
MachineFrameInfo &MFI = MF.getFrameInfo();
124144
auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
125145
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
126146
const LoongArchInstrInfo *TII = STI.getInstrInfo();
127147
MachineBasicBlock::iterator MBBI = MBB.begin();
148+
bool IsLA64 = STI.is64Bit();
128149

129150
Register SPReg = LoongArch::R3;
130151
Register FPReg = LoongArch::R22;
@@ -144,19 +165,22 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
144165
if (StackSize == 0 && !MFI.adjustsStack())
145166
return;
146167

147-
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
168+
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true);
169+
uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount;
148170
// Split the SP adjustment to reduce the offsets of callee saved spill.
149171
if (FirstSPAdjustAmount)
150172
StackSize = FirstSPAdjustAmount;
151173

152174
// Adjust stack.
153175
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
154-
// Emit ".cfi_def_cfa_offset StackSize".
155-
unsigned CFIIndex =
156-
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
157-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
158-
.addCFIIndex(CFIIndex)
159-
.setMIFlag(MachineInstr::FrameSetup);
176+
if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) {
177+
// Emit ".cfi_def_cfa_offset StackSize".
178+
unsigned CFIIndex =
179+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
180+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
181+
.addCFIIndex(CFIIndex)
182+
.setMIFlag(MachineInstr::FrameSetup);
183+
}
160184

161185
const auto &CSI = MFI.getCalleeSavedInfo();
162186

@@ -193,14 +217,25 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
193217
}
194218

195219
// Emit the second SP adjustment after saving callee saved registers.
196-
if (FirstSPAdjustAmount) {
197-
uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount;
198-
assert(SecondSPAdjustAmount > 0 &&
199-
"SecondSPAdjustAmount should be greater than zero");
200-
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
201-
MachineInstr::FrameSetup);
220+
if (FirstSPAdjustAmount && SecondSPAdjustAmount) {
221+
if (hasFP(MF)) {
222+
assert(SecondSPAdjustAmount > 0 &&
223+
"SecondSPAdjustAmount should be greater than zero");
224+
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
225+
MachineInstr::FrameSetup);
226+
} else {
227+
// FIXME: RegScavenger will place the spill instruction before the
228+
// prologue if a VReg is created in the prologue. This will pollute the
229+
// caller's stack data. Therefore, until there is better way, we just use
230+
// the `addi.w/d` instruction for stack adjustment to ensure that VReg
231+
// will not be created.
232+
for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048)
233+
BuildMI(MBB, MBBI, DL,
234+
TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), SPReg)
235+
.addReg(SPReg)
236+
.addImm(Val < 2048 ? -Val : -2048)
237+
.setMIFlag(MachineInstr::FrameSetup);
202238

203-
if (!hasFP(MF)) {
204239
// If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
205240
// don't emit an sp-based .cfi_def_cfa_offset
206241
// Emit ".cfi_def_cfa_offset RealStackSize"
@@ -219,14 +254,12 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
219254
Register VR =
220255
MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
221256
BuildMI(MBB, MBBI, DL,
222-
TII->get(STI.is64Bit() ? LoongArch::SRLI_D : LoongArch::SRLI_W),
223-
VR)
257+
TII->get(IsLA64 ? LoongArch::SRLI_D : LoongArch::SRLI_W), VR)
224258
.addReg(SPReg)
225259
.addImm(ShiftAmount)
226260
.setMIFlag(MachineInstr::FrameSetup);
227261
BuildMI(MBB, MBBI, DL,
228-
TII->get(STI.is64Bit() ? LoongArch::SLLI_D : LoongArch::SLLI_W),
229-
SPReg)
262+
TII->get(IsLA64 ? LoongArch::SLLI_D : LoongArch::SLLI_W), SPReg)
230263
.addReg(VR)
231264
.addImm(ShiftAmount)
232265
.setMIFlag(MachineInstr::FrameSetup);
@@ -295,20 +328,27 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
295328
// st.d $ra, $sp, 2024
296329
// st.d $fp, $sp, 2016
297330
// addi.d $sp, $sp, -16
298-
uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount(
299-
const MachineFunction &MF) const {
331+
uint64_t
332+
LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF,
333+
bool IsPrologue) const {
300334
const MachineFrameInfo &MFI = MF.getFrameInfo();
301335
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
302336

303337
// Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
304338
// 12-bit and there exists a callee-saved register needing to be pushed.
305-
if (!isInt<12>(MFI.getStackSize()) && (CSI.size() > 0)) {
339+
if (!isInt<12>(MFI.getStackSize())) {
306340
// FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
307341
// cause sp = sp + 2048 in the epilogue to be split into multiple
308342
// instructions. Offsets smaller than 2048 can fit in a single load/store
309343
// instruction, and we have to stick with the stack alignment.
310344
// So (2048 - StackAlign) will satisfy the stack alignment.
311-
return 2048 - getStackAlign().value();
345+
//
346+
// FIXME: This place may seem odd. When using multiple ADDI instructions to
347+
// adjust the stack in Prologue, and there are no callee-saved registers, we
348+
// can take advantage of the logic of split sp ajustment to reduce code
349+
// changes.
350+
return CSI.size() > 0 ? 2048 - getStackAlign().value()
351+
: (IsPrologue ? 2048 : 0);
312352
}
313353
return 0;
314354
}

llvm/lib/Target/LoongArch/LoongArchFrameLowering.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class LoongArchFrameLowering : public TargetFrameLowering {
3434
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
3535
RegScavenger *RS) const override;
3636

37+
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
38+
RegScavenger *RS) const override;
39+
3740
bool hasReservedCallFrame(const MachineFunction &MF) const override;
3841
MachineBasicBlock::iterator
3942
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -45,7 +48,8 @@ class LoongArchFrameLowering : public TargetFrameLowering {
4548
bool hasFP(const MachineFunction &MF) const override;
4649
bool hasBP(const MachineFunction &MF) const;
4750

48-
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
51+
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF,
52+
bool IsPrologue = false) const;
4953

5054
private:
5155
void determineFrameLayout(MachineFunction &MF) const;
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s
3+
4+
@var = external global i32
5+
6+
define void @func() {
7+
; CHECK-LABEL: func:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: addi.d $sp, $sp, -2048
10+
; CHECK-NEXT: addi.d $sp, $sp, -2048
11+
; CHECK-NEXT: addi.d $sp, $sp, -16
12+
; CHECK-NEXT: .cfi_def_cfa_offset 4112
13+
; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(var)
14+
; CHECK-NEXT: ld.d $a1, $a0, %got_pc_lo12(var)
15+
; CHECK-NEXT: ld.w $t8, $a1, 0
16+
; CHECK-NEXT: ld.w $t7, $a1, 0
17+
; CHECK-NEXT: ld.w $t6, $a1, 0
18+
; CHECK-NEXT: ld.w $t5, $a1, 0
19+
; CHECK-NEXT: ld.w $t4, $a1, 0
20+
; CHECK-NEXT: ld.w $t3, $a1, 0
21+
; CHECK-NEXT: ld.w $t2, $a1, 0
22+
; CHECK-NEXT: ld.w $t1, $a1, 0
23+
; CHECK-NEXT: ld.w $t0, $a1, 0
24+
; CHECK-NEXT: ld.w $a7, $a1, 0
25+
; CHECK-NEXT: ld.w $a6, $a1, 0
26+
; CHECK-NEXT: ld.w $a5, $a1, 0
27+
; CHECK-NEXT: ld.w $a4, $a1, 0
28+
; CHECK-NEXT: ld.w $a3, $a1, 0
29+
; CHECK-NEXT: ld.w $a2, $a1, 0
30+
; CHECK-NEXT: ld.w $a0, $a1, 0
31+
; CHECK-NEXT: st.d $fp, $sp, 0
32+
; CHECK-NEXT: lu12i.w $fp, 1
33+
; CHECK-NEXT: ori $fp, $fp, 12
34+
; CHECK-NEXT: add.d $fp, $sp, $fp
35+
; CHECK-NEXT: st.w $t8, $fp, 0
36+
; CHECK-NEXT: ld.d $fp, $sp, 0
37+
; CHECK-NEXT: st.w $t8, $a1, 0
38+
; CHECK-NEXT: st.w $t7, $a1, 0
39+
; CHECK-NEXT: st.w $t6, $a1, 0
40+
; CHECK-NEXT: st.w $t5, $a1, 0
41+
; CHECK-NEXT: st.w $t4, $a1, 0
42+
; CHECK-NEXT: st.w $t3, $a1, 0
43+
; CHECK-NEXT: st.w $t2, $a1, 0
44+
; CHECK-NEXT: st.w $t1, $a1, 0
45+
; CHECK-NEXT: st.w $t0, $a1, 0
46+
; CHECK-NEXT: st.w $a7, $a1, 0
47+
; CHECK-NEXT: st.w $a6, $a1, 0
48+
; CHECK-NEXT: st.w $a5, $a1, 0
49+
; CHECK-NEXT: st.w $a4, $a1, 0
50+
; CHECK-NEXT: st.w $a3, $a1, 0
51+
; CHECK-NEXT: st.w $a2, $a1, 0
52+
; CHECK-NEXT: st.w $a0, $a1, 0
53+
; CHECK-NEXT: lu12i.w $a0, 1
54+
; CHECK-NEXT: ori $a0, $a0, 16
55+
; CHECK-NEXT: add.d $sp, $sp, $a0
56+
; CHECK-NEXT: ret
57+
%space = alloca i32, align 4
58+
%stackspace = alloca[1024 x i32], align 4
59+
60+
;; Load values to increase register pressure.
61+
%v0 = load volatile i32, ptr @var
62+
%v1 = load volatile i32, ptr @var
63+
%v2 = load volatile i32, ptr @var
64+
%v3 = load volatile i32, ptr @var
65+
%v4 = load volatile i32, ptr @var
66+
%v5 = load volatile i32, ptr @var
67+
%v6 = load volatile i32, ptr @var
68+
%v7 = load volatile i32, ptr @var
69+
%v8 = load volatile i32, ptr @var
70+
%v9 = load volatile i32, ptr @var
71+
%v10 = load volatile i32, ptr @var
72+
%v11 = load volatile i32, ptr @var
73+
%v12 = load volatile i32, ptr @var
74+
%v13 = load volatile i32, ptr @var
75+
%v14 = load volatile i32, ptr @var
76+
%v15 = load volatile i32, ptr @var
77+
78+
;; Computing a stack-relative values needs an additional register.
79+
;; We should get an emergency spill/reload for this.
80+
store volatile i32 %v0, ptr %space
81+
82+
;; store values so they are used.
83+
store volatile i32 %v0, ptr @var
84+
store volatile i32 %v1, ptr @var
85+
store volatile i32 %v2, ptr @var
86+
store volatile i32 %v3, ptr @var
87+
store volatile i32 %v4, ptr @var
88+
store volatile i32 %v5, ptr @var
89+
store volatile i32 %v6, ptr @var
90+
store volatile i32 %v7, ptr @var
91+
store volatile i32 %v8, ptr @var
92+
store volatile i32 %v9, ptr @var
93+
store volatile i32 %v10, ptr @var
94+
store volatile i32 %v11, ptr @var
95+
store volatile i32 %v12, ptr @var
96+
store volatile i32 %v13, ptr @var
97+
store volatile i32 %v14, ptr @var
98+
store volatile i32 %v15, ptr @var
99+
100+
ret void
101+
}

llvm/test/CodeGen/LoongArch/frame.ll

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ define i32 @test() nounwind {
2727
ret i32 0
2828
}
2929

30+
;; Note: will create an emergency spill slot, if (!isInt<11>(StackSize)).
3031
;; Should involve only one SP-adjusting addi per adjustment.
3132
define void @test_large_frame_size_2032() {
3233
; CHECK-LABEL: test_large_frame_size_2032:
@@ -35,7 +36,7 @@ define void @test_large_frame_size_2032() {
3536
; CHECK-NEXT: .cfi_def_cfa_offset 2032
3637
; CHECK-NEXT: addi.d $sp, $sp, 2032
3738
; CHECK-NEXT: ret
38-
%1 = alloca i8, i32 2032
39+
%1 = alloca i8, i32 2016 ; + 16(emergency slot) = 2032
3940
ret void
4041
}
4142

@@ -49,7 +50,7 @@ define void @test_large_frame_size_2048() {
4950
; CHECK-NEXT: addi.d $sp, $sp, 2032
5051
; CHECK-NEXT: addi.d $sp, $sp, 16
5152
; CHECK-NEXT: ret
52-
%1 = alloca i8, i32 2048
53+
%1 = alloca i8, i32 2032 ; + 16(emergency slot) = 2048
5354
ret void
5455
}
5556

@@ -63,21 +64,35 @@ define void @test_large_frame_size_2064() {
6364
; CHECK-NEXT: addi.d $sp, $sp, 2032
6465
; CHECK-NEXT: addi.d $sp, $sp, 32
6566
; CHECK-NEXT: ret
66-
%1 = alloca i8, i32 2064
67+
%1 = alloca i8, i32 2048 ; + 16(emergency slot) = 2064
6768
ret void
6869
}
6970

71+
;; NOTE: Due to the problem with the emegency spill slot, the scratch register
72+
;; will not be used when the fp is eliminated. To make this test valid, add the
73+
;; attribute "frame-pointer=all".
74+
7075
;; SP should be adjusted with help of a scratch register.
71-
define void @test_large_frame_size_1234576() {
76+
define void @test_large_frame_size_1234576() "frame-pointer"="all" {
7277
; CHECK-LABEL: test_large_frame_size_1234576:
7378
; CHECK: # %bb.0:
74-
; CHECK-NEXT: lu12i.w $a0, 301
75-
; CHECK-NEXT: ori $a0, $a0, 1680
79+
; CHECK-NEXT: addi.d $sp, $sp, -2032
80+
; CHECK-NEXT: .cfi_def_cfa_offset 2032
81+
; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill
82+
; CHECK-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill
83+
; CHECK-NEXT: .cfi_offset 1, -8
84+
; CHECK-NEXT: .cfi_offset 22, -16
85+
; CHECK-NEXT: addi.d $fp, $sp, 2032
86+
; CHECK-NEXT: .cfi_def_cfa 22, 0
87+
; CHECK-NEXT: lu12i.w $a0, 300
88+
; CHECK-NEXT: ori $a0, $a0, 3760
7689
; CHECK-NEXT: sub.d $sp, $sp, $a0
77-
; CHECK-NEXT: .cfi_def_cfa_offset 1234576
78-
; CHECK-NEXT: lu12i.w $a0, 301
79-
; CHECK-NEXT: ori $a0, $a0, 1680
90+
; CHECK-NEXT: lu12i.w $a0, 300
91+
; CHECK-NEXT: ori $a0, $a0, 3760
8092
; CHECK-NEXT: add.d $sp, $sp, $a0
93+
; CHECK-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload
94+
; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
95+
; CHECK-NEXT: addi.d $sp, $sp, 2032
8196
; CHECK-NEXT: ret
8297
%1 = alloca i8, i32 1234567
8398
ret void

llvm/test/CodeGen/LoongArch/split-sp-adjust.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,20 @@ entry:
2424
}
2525

2626
;; The stack size is 2032 and the SP adjustment will not be split.
27+
;; 2016 + 8(RA) + 8(emergency spill slot) = 2032
2728
define i32 @NoSplitSP() nounwind {
2829
; CHECK-LABEL: NoSplitSP:
2930
; CHECK: # %bb.0: # %entry
3031
; CHECK-NEXT: addi.d $sp, $sp, -2032
3132
; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill
32-
; CHECK-NEXT: addi.d $a0, $sp, 0
33+
; CHECK-NEXT: addi.d $a0, $sp, 8
3334
; CHECK-NEXT: bl %plt(foo)
3435
; CHECK-NEXT: move $a0, $zero
3536
; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
3637
; CHECK-NEXT: addi.d $sp, $sp, 2032
3738
; CHECK-NEXT: ret
3839
entry:
39-
%xx = alloca [2024 x i8], align 1
40+
%xx = alloca [2016 x i8], align 1
4041
%0 = getelementptr inbounds [2024 x i8], ptr %xx, i32 0, i32 0
4142
%call = call i32 @foo(ptr nonnull %0)
4243
ret i32 0

0 commit comments

Comments
 (0)