Skip to content

Commit 0e4378c

Browse files
committed
[LoongArch] Add emergency spill slot for CFR spill/reload
When all registers have been allocated and CFR needs to be saved on the stack, an emergency spill slot is required. Because CFR's spill and reload require a general purpose register to transfer. The attached test case was bugpoint-reduced down from `MultiSource/Benchmarks/mafft/Lalignmm.c` in the test-suite. Without this patch, llc will crash and report the following errors: ``` LLVM ERROR: Error while trying to spill R4 from class GPR: Cannot scavenge register without an emergency spill slot! ``` Reviewed By: SixWeining Differential Revision: https://reviews.llvm.org/D138007
1 parent 1dd5ca2 commit 0e4378c

File tree

2 files changed

+159
-14
lines changed

2 files changed

+159
-14
lines changed

llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "LoongArchMachineFunctionInfo.h"
1515
#include "LoongArchSubtarget.h"
1616
#include "MCTargetDesc/LoongArchBaseInfo.h"
17+
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
1718
#include "llvm/CodeGen/MachineFrameInfo.h"
1819
#include "llvm/CodeGen/MachineFunction.h"
1920
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -127,6 +128,16 @@ static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII,
127128
return FuncSize;
128129
}
129130

131+
static bool needScavSlotForCFR(MachineFunction &MF) {
132+
if (!MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
133+
return false;
134+
for (auto &MBB : MF)
135+
for (auto &MI : MBB)
136+
if (MI.getOpcode() == LoongArch::PseudoST_CFR)
137+
return true;
138+
return false;
139+
}
140+
130141
void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
131142
MachineFunction &MF, RegScavenger *RS) const {
132143
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
@@ -136,26 +147,33 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
136147
MF.getInfo<LoongArchMachineFunctionInfo>();
137148
MachineFrameInfo &MFI = MF.getFrameInfo();
138149

150+
unsigned ScavSlotsNum = 0;
151+
139152
// Far branches beyond 27-bit offset require a spill slot for scratch register.
140-
if (!isInt<27>(estimateFunctionSizeInBytes(TII, MF))) {
153+
bool IsLargeFunction = !isInt<27>(estimateFunctionSizeInBytes(TII, MF));
154+
if (IsLargeFunction)
155+
ScavSlotsNum = 1;
156+
157+
// estimateStackSize has been observed to under-estimate the final stack
158+
// size, so give ourselves wiggle-room by checking for stack size
159+
// representable an 11-bit signed field rather than 12-bits.
160+
if (!isInt<11>(MFI.estimateStackSize(MF)))
161+
ScavSlotsNum = std::max(ScavSlotsNum, 1u);
162+
163+
// For CFR spill.
164+
if (needScavSlotForCFR(MF))
165+
++ScavSlotsNum;
166+
167+
// Create emergency spill slots.
168+
for (unsigned i = 0; i < ScavSlotsNum; ++i) {
141169
int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC),
142170
false);
143171
RS->addScavengingFrameIndex(FI);
144-
if (LAFI->getBranchRelaxationSpillFrameIndex() == -1)
172+
if (IsLargeFunction && LAFI->getBranchRelaxationSpillFrameIndex() == -1)
145173
LAFI->setBranchRelaxationSpillFrameIndex(FI);
174+
LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
175+
<< ") as the emergency spill slot.\n");
146176
}
147-
// estimateStackSize has been observed to under-estimate the final stack
148-
// size, so give ourselves wiggle-room by checking for stack size
149-
// representable an 11-bit signed field rather than 12-bits.
150-
if (isInt<11>(MFI.estimateStackSize(MF)))
151-
return;
152-
153-
// Create an emergency spill slot.
154-
int FI =
155-
MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), false);
156-
RS->addScavengingFrameIndex(FI);
157-
LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
158-
<< ") as the emergency spill slot.\n");
159177
}
160178

161179
void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; RUN: llc --mtriple=loongarch64 -mattr=+d %s -o /dev/null
2+
3+
; ModuleID = 'bugpoint-reduced-simplifycfg.bc'
4+
source_filename = "test-suite-src/MultiSource/Benchmarks/mafft/Lalignmm.c"
5+
6+
define float @Lalignmm_hmout(ptr %seq1, ptr %eff1, i32 %icyc) {
7+
entry:
8+
%call4 = tail call i64 @strlen(ptr dereferenceable(1) poison)
9+
%conv5 = trunc i64 %call4 to i32
10+
%call7 = tail call i64 @strlen(ptr dereferenceable(1) poison)
11+
%call20 = tail call ptr @AllocateFloatVec(i32 signext poison)
12+
%call22 = tail call ptr @AllocateFloatVec(i32 signext poison)
13+
tail call void @st_OpeningGapCount(ptr poison, i32 signext %icyc, ptr %seq1, ptr %eff1, i32 signext %conv5)
14+
%sub110 = add nsw i32 %conv5, -1
15+
%sub111 = add nsw i32 0, -1
16+
br i1 poison, label %for.cond.preheader.i, label %if.end.i
17+
18+
for.cond.preheader.i: ; preds = %entry
19+
%sext294 = shl i64 %call4, 32
20+
%conv23.i = ashr exact i64 %sext294, 32
21+
br label %for.body.i
22+
23+
for.body.i: ; preds = %for.body.i, %for.cond.preheader.i
24+
%call.i = tail call ptr @strncpy(ptr poison, ptr poison, i64 %conv23.i)
25+
br label %for.body.i
26+
27+
if.end.i: ; preds = %entry
28+
%call82.i = tail call ptr @AllocateFloatVec(i32 signext poison)
29+
%call84.i = tail call ptr @AllocateFloatVec(i32 signext poison)
30+
%call86.i = tail call ptr @AllocateFloatVec(i32 signext poison)
31+
%call88.i = tail call ptr @AllocateFloatVec(i32 signext poison)
32+
%call90.i = tail call ptr @AllocateFloatVec(i32 signext poison)
33+
%call92.i = tail call ptr @AllocateIntVec(i32 signext poison)
34+
%call94.i = tail call ptr @AllocateIntVec(i32 signext poison)
35+
%call104.i = tail call ptr @AllocateFloatVec(i32 signext poison)
36+
%call108.i = tail call ptr @AllocateFloatVec(i32 signext poison)
37+
%call110.i = tail call ptr @AllocateIntVec(i32 signext poison)
38+
%idxprom220.i = sext i32 %sub111 to i64
39+
%mpjpt.018.i = getelementptr inbounds i32, ptr %call110.i, i64 1
40+
%arrayidx329.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom220.i
41+
%idxprom332.i = and i64 %call7, 4294967295
42+
%wide.trip.count130.i = zext i32 poison to i64
43+
%0 = add nsw i64 1, -1
44+
%arrayidx239.i = getelementptr inbounds float, ptr %call104.i, i64 1
45+
%1 = load float, ptr %arrayidx239.i, align 4
46+
store float %1, ptr %call84.i, align 4
47+
%curpt.017.i = getelementptr inbounds float, ptr %call84.i, i64 1
48+
%arrayidx279.i = getelementptr inbounds float, ptr %call20, i64 %0
49+
%2 = load ptr, ptr poison, align 8
50+
%3 = load ptr, ptr null, align 8
51+
%4 = trunc i64 %0 to i32
52+
br label %for.body260.us.i
53+
54+
for.body260.us.i: ; preds = %if.end292.us.i, %if.end.i
55+
%indvars.iv132.i = phi i64 [ %indvars.iv.next133.i, %if.end292.us.i ], [ 1, %if.end.i ]
56+
%mpjpt.026.us.i = phi ptr [ poison, %if.end292.us.i ], [ %mpjpt.018.i, %if.end.i ]
57+
%curpt.025.us.i = phi ptr [ %curpt.0.us.i, %if.end292.us.i ], [ %curpt.017.i, %if.end.i ]
58+
%prept.022.us.i = phi ptr [ %incdec.ptr316.us.i, %if.end292.us.i ], [ %call82.i, %if.end.i ]
59+
%mi.021.us.i = phi float [ %mi.1.us.i, %if.end292.us.i ], [ poison, %if.end.i ]
60+
%5 = load float, ptr %prept.022.us.i, align 4
61+
%6 = add nsw i64 %indvars.iv132.i, -1
62+
%arrayidx263.us.i = getelementptr inbounds float, ptr %call22, i64 %6
63+
%7 = load float, ptr %arrayidx263.us.i, align 4
64+
%add264.us.i = fadd float %mi.021.us.i, %7
65+
%cmp265.us.i = fcmp ogt float %add264.us.i, %5
66+
%wm.0.us.i = select i1 %cmp265.us.i, float %add264.us.i, float %5
67+
%arrayidx270.us.i = getelementptr inbounds float, ptr poison, i64 %indvars.iv132.i
68+
%cmp272.us.i = fcmp ult float 0.000000e+00, %mi.021.us.i
69+
%mi.1.us.i = select i1 %cmp272.us.i, float %mi.021.us.i, float 0.000000e+00
70+
%8 = trunc i64 %6 to i32
71+
%mpi.1.us.i = select i1 %cmp272.us.i, i32 0, i32 %8
72+
%9 = load float, ptr %arrayidx279.i, align 4
73+
%add280.us.i = fadd float 0.000000e+00, %9
74+
%cmp281.us.i = fcmp ogt float %add280.us.i, %wm.0.us.i
75+
%wm.1.us.i = select i1 %cmp281.us.i, float %add280.us.i, float %wm.0.us.i
76+
%cmp288.us.i = fcmp ult float poison, 0.000000e+00
77+
br i1 %cmp288.us.i, label %if.end292.us.i, label %if.then290.us.i
78+
79+
if.then290.us.i: ; preds = %for.body260.us.i
80+
store i32 %4, ptr %mpjpt.026.us.i, align 4
81+
br label %if.end292.us.i
82+
83+
if.end292.us.i: ; preds = %if.then290.us.i, %for.body260.us.i
84+
%10 = phi i32 [ %4, %if.then290.us.i ], [ poison, %for.body260.us.i ]
85+
%add293.us.i = fadd float %wm.1.us.i, 0.000000e+00
86+
%arrayidx297.us.i = getelementptr inbounds float, ptr %2, i64 %indvars.iv132.i
87+
store float %add293.us.i, ptr %arrayidx297.us.i, align 4
88+
%arrayidx306.us.i = getelementptr inbounds i32, ptr %call94.i, i64 %indvars.iv132.i
89+
store i32 %10, ptr %arrayidx306.us.i, align 4
90+
%arrayidx308.us.i = getelementptr inbounds i32, ptr %call92.i, i64 %indvars.iv132.i
91+
store i32 %mpi.1.us.i, ptr %arrayidx308.us.i, align 4
92+
%11 = load float, ptr %curpt.025.us.i, align 4
93+
%arrayidx310.us.i = getelementptr inbounds float, ptr %call86.i, i64 %indvars.iv132.i
94+
store float %11, ptr %arrayidx310.us.i, align 4
95+
%arrayidx312.us.i = getelementptr inbounds float, ptr %call90.i, i64 %indvars.iv132.i
96+
store float 0.000000e+00, ptr %arrayidx312.us.i, align 4
97+
%arrayidx314.us.i = getelementptr inbounds float, ptr %call88.i, i64 %indvars.iv132.i
98+
store float %mi.1.us.i, ptr %arrayidx314.us.i, align 4
99+
%incdec.ptr316.us.i = getelementptr inbounds float, ptr %prept.022.us.i, i64 1
100+
%indvars.iv.next133.i = add nuw nsw i64 %indvars.iv132.i, 1
101+
%curpt.0.us.i = getelementptr inbounds float, ptr %curpt.025.us.i, i64 1
102+
%exitcond137.not.i = icmp eq i64 %indvars.iv.next133.i, %wide.trip.count130.i
103+
br i1 %exitcond137.not.i, label %for.end321.i, label %for.body260.us.i
104+
105+
for.end321.i: ; preds = %if.end292.us.i
106+
%12 = load float, ptr %arrayidx329.i, align 4
107+
%arrayidx333.i = getelementptr inbounds float, ptr %3, i64 %idxprom332.i
108+
store float %12, ptr %arrayidx333.i, align 4
109+
tail call fastcc void @match_calc(ptr %call104.i, ptr poison, ptr poison, i32 signext %sub111, i32 signext %conv5, ptr poison, ptr poison, i32 signext 1)
110+
br label %for.body429.i
111+
112+
for.body429.i: ; preds = %for.body429.i, %for.end321.i
113+
%j.743.i = phi i32 [ %sub111, %for.end321.i ], [ %sub436.i, %for.body429.i ]
114+
%sub436.i = add nsw i32 %j.743.i, -1
115+
%idxprom437.i = zext i32 %sub436.i to i64
116+
%arrayidx438.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom437.i
117+
store float 0.000000e+00, ptr %arrayidx438.i, align 4
118+
store i32 %sub110, ptr poison, align 4
119+
br label %for.body429.i
120+
}
121+
122+
declare i64 @strlen(ptr)
123+
declare ptr @AllocateFloatVec(i32)
124+
declare void @st_OpeningGapCount(ptr, i32, ptr, ptr, i32)
125+
declare ptr @strncpy(ptr, ptr, i64)
126+
declare ptr @AllocateIntVec(i32)
127+
declare void @match_calc(ptr, ptr, ptr, i32, i32, ptr, ptr, i32)

0 commit comments

Comments
 (0)