Skip to content

Commit c0221e0

Browse files
committed
[RISCV] Add a pass to combine cm.pop and ret insts
`RISCVPushPopOptimizer.cpp` combine `cm.pop` and `ret` to generates `cm.popretz` or `cm.popret` . Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D150416
1 parent a10dccf commit c0221e0

File tree

8 files changed

+226
-53
lines changed

8 files changed

+226
-53
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ add_llvm_target(RISCVCodeGen
3737
RISCVOptWInstrs.cpp
3838
RISCVRedundantCopyElimination.cpp
3939
RISCVMoveMerger.cpp
40+
RISCVPushPopOptimizer.cpp
4041
RISCVRegisterInfo.cpp
4142
RISCVRVVInitUndef.cpp
4243
RISCVSubtarget.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ extern char &RISCVInitUndefID;
7373
FunctionPass *createRISCVMoveMergePass();
7474
void initializeRISCVMoveMergePass(PassRegistry &);
7575

76+
FunctionPass *createRISCVPushPopOptimizationPass();
77+
void initializeRISCVPushPopOptPass(PassRegistry &);
78+
7679
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
7780
RISCVSubtarget &,
7881
RISCVRegisterBankInfo &);
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains a pass that modifies PUSH/POP instructions from Zca
10+
// standard to use their non prolog/epilog related functionalities
11+
// and generates POPRET instruction.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "RISCVInstrInfo.h"
16+
#include "RISCVMachineFunctionInfo.h"
17+
18+
using namespace llvm;
19+
20+
#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zcmp Push/Pop optimization pass"
21+
22+
namespace {
23+
struct RISCVPushPopOpt : public MachineFunctionPass {
24+
static char ID;
25+
26+
RISCVPushPopOpt() : MachineFunctionPass(ID) {
27+
initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry());
28+
}
29+
30+
const RISCVInstrInfo *TII;
31+
const TargetRegisterInfo *TRI;
32+
33+
// Track which register units have been modified and used.
34+
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
35+
36+
bool usePopRet(MachineBasicBlock::iterator &MBBI,
37+
MachineBasicBlock::iterator &NextI, bool IsReturnZero);
38+
bool adjustRetVal(MachineBasicBlock::iterator &MBBI);
39+
bool runOnMachineFunction(MachineFunction &Fn) override;
40+
41+
StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; }
42+
};
43+
44+
char RISCVPushPopOpt::ID = 0;
45+
46+
} // end of anonymous namespace
47+
48+
INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME,
49+
false, false)
50+
51+
// Check if POP instruction was inserted into the MBB and return iterator to it.
52+
static MachineBasicBlock::iterator containsPop(MachineBasicBlock &MBB) {
53+
for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end();
54+
MBBI = next_nodbg(MBBI, MBB.end()))
55+
if (MBBI->getOpcode() == RISCV::CM_POP)
56+
return MBBI;
57+
58+
return MBB.end();
59+
}
60+
61+
bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI,
62+
MachineBasicBlock::iterator &NextI,
63+
bool IsReturnZero) {
64+
// Since Pseudo instruction lowering happen later in the pipeline,
65+
// this will detect all ret instruction.
66+
DebugLoc DL = NextI->getDebugLoc();
67+
unsigned Opc = IsReturnZero ? RISCV::CM_POPRETZ : RISCV::CM_POPRET;
68+
BuildMI(*NextI->getParent(), NextI, DL, TII->get(Opc))
69+
.add(MBBI->getOperand(0))
70+
.add(MBBI->getOperand(1));
71+
72+
MBBI->eraseFromParent();
73+
NextI->eraseFromParent();
74+
return true;
75+
}
76+
77+
// Search for last assignment to a0 and if possible use ret_val slot of POP to
78+
// store return value.
79+
bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI) {
80+
MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend();
81+
// Track which register units have been modified and used between the POP
82+
// insn and the last assignment to register a0.
83+
ModifiedRegUnits.clear();
84+
UsedRegUnits.clear();
85+
// Since POP instruction is in Epilogue no normal instructions will follow
86+
// after it. Therefore search only previous ones to find the return value.
87+
for (MachineBasicBlock::reverse_iterator I =
88+
next_nodbg(MBBI.getReverse(), RE);
89+
I != RE; I = next_nodbg(I, RE)) {
90+
MachineInstr &MI = *I;
91+
if (auto OperandPair = TII->isCopyInstrImpl(MI)) {
92+
Register DestReg = OperandPair->Destination->getReg();
93+
Register Source = OperandPair->Source->getReg();
94+
if (DestReg == RISCV::X10 && Source == RISCV::X0) {
95+
MI.removeFromParent();
96+
return true;
97+
}
98+
}
99+
// Update modified / used register units.
100+
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
101+
// If a0 was modified or used, there is no possibility
102+
// of using ret_val slot of POP instruction.
103+
if (!ModifiedRegUnits.available(RISCV::X10) ||
104+
!UsedRegUnits.available(RISCV::X10))
105+
return false;
106+
}
107+
return false;
108+
}
109+
110+
bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) {
111+
if (skipFunction(Fn.getFunction()))
112+
return false;
113+
114+
// If Zcmp extension is not supported, abort.
115+
const RISCVSubtarget *Subtarget = &Fn.getSubtarget<RISCVSubtarget>();
116+
if (!Subtarget->hasStdExtZcmp())
117+
return false;
118+
119+
// If frame pointer elimination has been disabled, abort to avoid breaking the
120+
// ABI.
121+
if (Fn.getTarget().Options.DisableFramePointerElim(Fn))
122+
return false;
123+
124+
TII = static_cast<const RISCVInstrInfo *>(Subtarget->getInstrInfo());
125+
TRI = Subtarget->getRegisterInfo();
126+
// Resize the modified and used register unit trackers. We do this once
127+
// per function and then clear the register units each time we determine
128+
// correct return value for the POP.
129+
ModifiedRegUnits.init(*TRI);
130+
UsedRegUnits.init(*TRI);
131+
bool Modified = false;
132+
for (auto &MBB : Fn) {
133+
MachineBasicBlock::iterator MBBI = containsPop(MBB);
134+
MachineBasicBlock::iterator NextI = next_nodbg(MBBI, MBB.end());
135+
if (MBBI != MBB.end() && NextI->getOpcode() == RISCV::PseudoRET)
136+
Modified |= usePopRet(MBBI, NextI, adjustRetVal(MBBI));
137+
}
138+
return Modified;
139+
}
140+
141+
/// createRISCVPushPopOptimizationPass - returns an instance of the
142+
/// Push/Pop optimization pass.
143+
FunctionPass *llvm::createRISCVPushPopOptimizationPass() {
144+
return new RISCVPushPopOpt();
145+
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
8989
initializeRISCVDAGToDAGISelPass(*PR);
9090
initializeRISCVInitUndefPass(*PR);
9191
initializeRISCVMoveMergePass(*PR);
92+
initializeRISCVPushPopOptPass(*PR);
9293
}
9394

9495
static StringRef computeDataLayout(const Triple &TT) {
@@ -353,8 +354,12 @@ void RISCVPassConfig::addPreEmitPass() {
353354
}
354355

355356
void RISCVPassConfig::addPreEmitPass2() {
356-
if (TM->getOptLevel() != CodeGenOpt::None)
357+
if (TM->getOptLevel() != CodeGenOpt::None) {
357358
addPass(createRISCVMoveMergePass());
359+
// Schedule PushPop Optimization before expansion of Pseudo instruction,
360+
// ensuring return instruction is detected correctly.
361+
addPass(createRISCVPushPopOptimizationPass());
362+
}
358363
addPass(createRISCVExpandPseudoPass());
359364

360365
// Schedule the expansion of AMOs at the last possible moment, avoiding the

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@
179179
; CHECK-NEXT: Machine Optimization Remark Emitter
180180
; CHECK-NEXT: Stack Frame Layout Analysis
181181
; CHECK-NEXT: RISC-V Zcmp move merging pass
182+
; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass
182183
; CHECK-NEXT: RISC-V pseudo instruction expansion pass
183184
; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass
184185
; CHECK-NEXT: Unpack machine instruction bundles

llvm/test/CodeGen/RISCV/callee-saved-gprs.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,7 @@ define void @callee() nounwind {
338338
; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a7)
339339
; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
340340
; RV32IZCMP-NEXT: sw a0, %lo(var)(a7)
341-
; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 96
342-
; RV32IZCMP-NEXT: ret
341+
; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 96
343342
;
344343
; RV32IZCMP-WITH-FP-LABEL: callee:
345344
; RV32IZCMP-WITH-FP: # %bb.0:
@@ -758,8 +757,7 @@ define void @callee() nounwind {
758757
; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a7)
759758
; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
760759
; RV64IZCMP-NEXT: sw a0, %lo(var)(a7)
761-
; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160
762-
; RV64IZCMP-NEXT: ret
760+
; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160
763761
;
764762
; RV64IZCMP-WITH-FP-LABEL: callee:
765763
; RV64IZCMP-WITH-FP: # %bb.0:
@@ -1287,8 +1285,7 @@ define void @caller() nounwind {
12871285
; RV32IZCMP-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
12881286
; RV32IZCMP-NEXT: sw a0, %lo(var)(s0)
12891287
; RV32IZCMP-NEXT: addi sp, sp, 48
1290-
; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 112
1291-
; RV32IZCMP-NEXT: ret
1288+
; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 112
12921289
;
12931290
; RV32IZCMP-WITH-FP-LABEL: caller:
12941291
; RV32IZCMP-WITH-FP: # %bb.0:
@@ -1841,8 +1838,7 @@ define void @caller() nounwind {
18411838
; RV64IZCMP-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
18421839
; RV64IZCMP-NEXT: sw a0, %lo(var)(s0)
18431840
; RV64IZCMP-NEXT: addi sp, sp, 128
1844-
; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160
1845-
; RV64IZCMP-NEXT: ret
1841+
; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160
18461842
;
18471843
; RV64IZCMP-WITH-FP-LABEL: caller:
18481844
; RV64IZCMP-WITH-FP: # %bb.0:

llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ define i32 @zcmp_mv(i32 %num, i32 %f) nounwind {
4343
; CHECK32ZCMP-NEXT: cm.mva01s s1, s0
4444
; CHECK32ZCMP-NEXT: call func@plt
4545
; CHECK32ZCMP-NEXT: add a0, s2, s0
46-
; CHECK32ZCMP-NEXT: cm.pop {ra, s0-s2}, 16
47-
; CHECK32ZCMP-NEXT: ret
46+
; CHECK32ZCMP-NEXT: cm.popret {ra, s0-s2}, 16
4847
;
4948
; CHECK64I-LABEL: zcmp_mv:
5049
; CHECK64I: # %bb.0:
@@ -77,8 +76,7 @@ define i32 @zcmp_mv(i32 %num, i32 %f) nounwind {
7776
; CHECK64ZCMP-NEXT: cm.mva01s s1, s0
7877
; CHECK64ZCMP-NEXT: call func@plt
7978
; CHECK64ZCMP-NEXT: addw a0, s2, s0
80-
; CHECK64ZCMP-NEXT: cm.pop {ra, s0-s2}, 32
81-
; CHECK64ZCMP-NEXT: ret
79+
; CHECK64ZCMP-NEXT: cm.popret {ra, s0-s2}, 32
8280
%call = call i32 @func(i32 %num, i32 %f)
8381
%call1 = call i32 @func(i32 %num, i32 %f)
8482
%res = add i32 %call, %f
@@ -121,8 +119,7 @@ define i32 @not_zcmp_mv(i32 %num, i32 %f) nounwind {
121119
; CHECK32ZCMP-NEXT: li a0, 1
122120
; CHECK32ZCMP-NEXT: mv a1, s0
123121
; CHECK32ZCMP-NEXT: call func@plt
124-
; CHECK32ZCMP-NEXT: cm.pop {ra, s0-s1}, 16
125-
; CHECK32ZCMP-NEXT: ret
122+
; CHECK32ZCMP-NEXT: cm.popret {ra, s0-s1}, 16
126123
;
127124
; CHECK64I-LABEL: not_zcmp_mv:
128125
; CHECK64I: # %bb.0:
@@ -159,8 +156,7 @@ define i32 @not_zcmp_mv(i32 %num, i32 %f) nounwind {
159156
; CHECK64ZCMP-NEXT: li a0, 1
160157
; CHECK64ZCMP-NEXT: mv a1, s0
161158
; CHECK64ZCMP-NEXT: call func@plt
162-
; CHECK64ZCMP-NEXT: cm.pop {ra, s0-s1}, 32
163-
; CHECK64ZCMP-NEXT: ret
159+
; CHECK64ZCMP-NEXT: cm.popret {ra, s0-s1}, 32
164160
%call = call i32 @foo(i32 %num)
165161
%call1 = call i32 @foo(i32 %f)
166162
%tmp = call i32 @foo(i32 %call)

0 commit comments

Comments
 (0)