Skip to content

Commit c2a44b5

Browse files
xen0nSixWeining
authored andcommitted
[LoongArch] Support lowering frames larger than 2048 bytes
Differential Revision: https://reviews.llvm.org/D134582
1 parent f8ad6ea commit c2a44b5

File tree

6 files changed

+180
-10
lines changed

6 files changed

+180
-10
lines changed

llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,55 @@ void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB,
5353
MachineInstr::MIFlag Flag) const {
5454
const LoongArchInstrInfo *TII = STI.getInstrInfo();
5555
bool IsLA64 = STI.is64Bit();
56+
unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
5657

5758
if (DestReg == SrcReg && Val == 0)
5859
return;
5960

6061
if (isInt<12>(Val)) {
6162
// addi.w/d $DstReg, $SrcReg, Val
62-
BuildMI(MBB, MBBI, DL,
63-
TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg)
63+
BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
6464
.addReg(SrcReg)
6565
.addImm(Val)
6666
.setMIFlag(Flag);
6767
return;
6868
}
6969

70-
report_fatal_error("adjustReg cannot yet handle adjustments >12 bits");
70+
// Try to split the offset across two ADDIs. We need to keep the stack pointer
71+
// aligned after each ADDI. We need to determine the maximum value we can put
72+
// in each ADDI. In the negative direction, we can use -2048 which is always
73+
// sufficiently aligned. In the positive direction, we need to find the
74+
// largest 12-bit immediate that is aligned. Exclude -4096 since it can be
75+
// created with LU12I.W.
76+
assert(getStackAlign().value() < 2048 && "Stack alignment too large");
77+
int64_t MaxPosAdjStep = 2048 - getStackAlign().value();
78+
if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) {
79+
int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep;
80+
Val -= FirstAdj;
81+
BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
82+
.addReg(SrcReg)
83+
.addImm(FirstAdj)
84+
.setMIFlag(Flag);
85+
BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
86+
.addReg(DestReg, RegState::Kill)
87+
.addImm(Val)
88+
.setMIFlag(Flag);
89+
return;
90+
}
91+
92+
unsigned Opc = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
93+
if (Val < 0) {
94+
Val = -Val;
95+
Opc = IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W;
96+
}
97+
98+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
99+
Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
100+
TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
101+
BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
102+
.addReg(SrcReg)
103+
.addReg(ScratchReg, RegState::Kill)
104+
.setMIFlag(Flag);
71105
}
72106

73107
// Determine the size of the frame and maximum call frame size.

llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "LoongArchInstrInfo.h"
1414
#include "LoongArch.h"
1515
#include "LoongArchMachineFunctionInfo.h"
16+
#include "MCTargetDesc/LoongArchMatInt.h"
1617

1718
using namespace llvm;
1819

@@ -21,7 +22,8 @@ using namespace llvm;
2122

2223
LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI)
2324
: LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN,
24-
LoongArch::ADJCALLSTACKUP) {}
25+
LoongArch::ADJCALLSTACKUP),
26+
STI(STI) {}
2527

2628
void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2729
MachineBasicBlock::iterator MBBI,
@@ -114,6 +116,43 @@ void LoongArchInstrInfo::loadRegFromStackSlot(
114116
.addMemOperand(MMO);
115117
}
116118

119+
void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB,
120+
MachineBasicBlock::iterator MBBI,
121+
const DebugLoc &DL, Register DstReg,
122+
uint64_t Val, MachineInstr::MIFlag Flag) const {
123+
Register SrcReg = LoongArch::R0;
124+
125+
if (!STI.is64Bit() && !isInt<32>(Val))
126+
report_fatal_error("Should only materialize 32-bit constants for LA32");
127+
128+
auto Seq = LoongArchMatInt::generateInstSeq(Val);
129+
assert(!Seq.empty());
130+
131+
for (auto &Inst : Seq) {
132+
switch (Inst.Opc) {
133+
case LoongArch::LU12I_W:
134+
BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
135+
.addImm(Inst.Imm)
136+
.setMIFlag(Flag);
137+
break;
138+
case LoongArch::ADDI_W:
139+
case LoongArch::ORI:
140+
case LoongArch::LU32I_D: // "rj" is needed due to InstrInfo pattern
141+
case LoongArch::LU52I_D:
142+
BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
143+
.addReg(SrcReg, RegState::Kill)
144+
.addImm(Inst.Imm)
145+
.setMIFlag(Flag);
146+
break;
147+
default:
148+
assert(false && "Unknown insn emitted by LoongArchMatInt");
149+
}
150+
151+
// Only the first instruction has $zero as its source.
152+
SrcReg = DstReg;
153+
}
154+
}
155+
117156
unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
118157
return MI.getDesc().getSize();
119158
}

llvm/lib/Target/LoongArch/LoongArchInstrInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
4141
int FrameIndex, const TargetRegisterClass *RC,
4242
const TargetRegisterInfo *TRI) const override;
4343

44+
// Materializes the given integer Val into DstReg.
45+
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
46+
const DebugLoc &DL, Register DstReg, uint64_t Val,
47+
MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
48+
4449
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
4550

4651
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
@@ -60,6 +65,9 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
6065

6166
bool
6267
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
68+
69+
protected:
70+
const LoongArchSubtarget &STI;
6371
};
6472

6573
} // end namespace llvm

llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313

1414
#include "LoongArchRegisterInfo.h"
1515
#include "LoongArch.h"
16+
#include "LoongArchInstrInfo.h"
1617
#include "LoongArchSubtarget.h"
18+
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
1719
#include "llvm/CodeGen/MachineFrameInfo.h"
1820
#include "llvm/CodeGen/MachineFunction.h"
1921
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -112,7 +114,11 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
112114
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
113115

114116
MachineInstr &MI = *II;
117+
MachineBasicBlock &MBB = *MI.getParent();
115118
MachineFunction &MF = *MI.getParent()->getParent();
119+
MachineRegisterInfo &MRI = MF.getRegInfo();
120+
const LoongArchSubtarget &STI = MF.getSubtarget<LoongArchSubtarget>();
121+
const LoongArchInstrInfo *TII = STI.getInstrInfo();
116122
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
117123
DebugLoc DL = MI.getDebugLoc();
118124

@@ -122,12 +128,32 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
122128
TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) +
123129
StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
124130

125-
// Offsets must be encodable with a 12-bit immediate field.
131+
bool FrameRegIsKill = false;
132+
126133
if (!isInt<12>(Offset.getFixed())) {
127-
report_fatal_error("Frame offsets outside of the signed 12-bit range is "
128-
"not supported currently");
134+
unsigned Addi = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
135+
unsigned Add = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
136+
137+
// The offset won't fit in an immediate, so use a scratch register instead.
138+
// Modify Offset and FrameReg appropriately.
139+
Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
140+
TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed());
141+
if (MI.getOpcode() == Addi) {
142+
BuildMI(MBB, II, DL, TII->get(Add), MI.getOperand(0).getReg())
143+
.addReg(FrameReg)
144+
.addReg(ScratchReg, RegState::Kill);
145+
MI.eraseFromParent();
146+
return;
147+
}
148+
BuildMI(MBB, II, DL, TII->get(Add), ScratchReg)
149+
.addReg(FrameReg)
150+
.addReg(ScratchReg, RegState::Kill);
151+
Offset = StackOffset::getFixed(0);
152+
FrameReg = ScratchReg;
153+
FrameRegIsKill = true;
129154
}
130155

131-
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
156+
MI.getOperand(FIOperandNum)
157+
.ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
132158
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
133159
}

llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo {
4343
RegScavenger *RS = nullptr) const override;
4444

4545
Register getFrameRegister(const MachineFunction &MF) const override;
46+
47+
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
48+
return true;
49+
}
50+
51+
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
52+
return true;
53+
}
4654
};
4755
} // end namespace llvm
4856

llvm/test/CodeGen/LoongArch/frame.ll

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
%struct.key_t = type { i32, [16 x i8] }
55

6+
declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)
7+
declare void @test1(ptr)
8+
69
define i32 @test() nounwind {
710
; CHECK-LABEL: test:
811
; CHECK: # %bb.0:
@@ -24,6 +27,58 @@ define i32 @test() nounwind {
2427
ret i32 0
2528
}
2629

27-
declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)
30+
;; Should involve only one SP-adjusting addi per adjustment.
31+
define void @test_large_frame_size_2032() {
32+
; CHECK-LABEL: test_large_frame_size_2032:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: addi.d $sp, $sp, -2032
35+
; CHECK-NEXT: .cfi_def_cfa_offset 2032
36+
; CHECK-NEXT: addi.d $sp, $sp, 2032
37+
; CHECK-NEXT: ret
38+
%1 = alloca i8, i32 2032
39+
ret void
40+
}
2841

29-
declare void @test1(ptr)
42+
;; Should involve two SP-adjusting addi's when adjusting SP up, but only one
43+
;; when adjusting down.
44+
define void @test_large_frame_size_2048() {
45+
; CHECK-LABEL: test_large_frame_size_2048:
46+
; CHECK: # %bb.0:
47+
; CHECK-NEXT: addi.d $sp, $sp, -2048
48+
; CHECK-NEXT: .cfi_def_cfa_offset 2048
49+
; CHECK-NEXT: addi.d $sp, $sp, 2032
50+
; CHECK-NEXT: addi.d $sp, $sp, 16
51+
; CHECK-NEXT: ret
52+
%1 = alloca i8, i32 2048
53+
ret void
54+
}
55+
56+
;; Should involve two SP-adjusting addi's per adjustment.
57+
define void @test_large_frame_size_2064() {
58+
; CHECK-LABEL: test_large_frame_size_2064:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: addi.d $sp, $sp, -2048
61+
; CHECK-NEXT: addi.d $sp, $sp, -16
62+
; CHECK-NEXT: .cfi_def_cfa_offset 2064
63+
; CHECK-NEXT: addi.d $sp, $sp, 2032
64+
; CHECK-NEXT: addi.d $sp, $sp, 32
65+
; CHECK-NEXT: ret
66+
%1 = alloca i8, i32 2064
67+
ret void
68+
}
69+
70+
;; SP should be adjusted with help of a scratch register.
71+
define void @test_large_frame_size_1234576() {
72+
; CHECK-LABEL: test_large_frame_size_1234576:
73+
; CHECK: # %bb.0:
74+
; CHECK-NEXT: lu12i.w $a0, 301
75+
; CHECK-NEXT: ori $a0, $a0, 1680
76+
; CHECK-NEXT: sub.d $sp, $sp, $a0
77+
; CHECK-NEXT: .cfi_def_cfa_offset 1234576
78+
; CHECK-NEXT: lu12i.w $a0, 301
79+
; CHECK-NEXT: ori $a0, $a0, 1680
80+
; CHECK-NEXT: add.d $sp, $sp, $a0
81+
; CHECK-NEXT: ret
82+
%1 = alloca i8, i32 1234567
83+
ret void
84+
}

0 commit comments

Comments
 (0)