Skip to content

Commit dd8a3d3

Browse files
committed
[RISCV] Implement tail call optimization in machine outliner
Following up issue #89822, this patch adds opportunity to use tail call in machine outliner pass. Also it enables outline patterns with X5(T0) register.
1 parent 343a810 commit dd8a3d3

8 files changed

+242
-56
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2929,6 +2929,7 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
29292929

29302930
// Enum values indicating how an outlined call should be constructed.
29312931
enum MachineOutlinerConstructionID {
2932+
MachineOutlinerTailCall,
29322933
MachineOutlinerDefault
29332934
};
29342935

@@ -2937,19 +2938,47 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
29372938
return MF.getFunction().hasMinSize();
29382939
}
29392940

2941+
static bool IsCandidatePatchable(const MachineInstr &MI) {
2942+
const MachineBasicBlock *MBB = MI.getParent();
2943+
const MachineFunction *MF = MBB->getParent();
2944+
const Function &F = MF->getFunction();
2945+
return F.getFnAttribute("fentry-call").getValueAsBool() ||
2946+
F.hasFnAttribute("patchable-function-entry");
2947+
}
2948+
2949+
static bool CannotInsertTailCall(const MachineInstr &MI) {
2950+
if (MI.isTerminator())
2951+
return IsCandidatePatchable(MI);
2952+
return true;
2953+
}
2954+
2955+
static bool MIUseX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
2956+
return MI.modifiesRegister(RISCV::X5, TRI) ||
2957+
MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5);
2958+
}
2959+
29402960
std::optional<std::unique_ptr<outliner::OutlinedFunction>>
29412961
RISCVInstrInfo::getOutliningCandidateInfo(
29422962
const MachineModuleInfo &MMI,
29432963
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
29442964
unsigned MinRepeats) const {
29452965

2946-
// First we need to filter out candidates where the X5 register (IE t0) can't
2947-
// be used to setup the function call.
2948-
auto CannotInsertCall = [](outliner::Candidate &C) {
2966+
auto CandidateUseX5 = [](outliner::Candidate &C) {
29492967
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2968+
for (const MachineInstr &MI : C)
2969+
if (MIUseX5(MI, TRI))
2970+
return true;
29502971
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
29512972
};
29522973

2974+
auto CannotInsertCall = [CandidateUseX5](outliner::Candidate &C) {
2975+
if (!CandidateUseX5(C))
2976+
return false;
2977+
if (!CannotInsertTailCall(C.back()))
2978+
return false;
2979+
return true;
2980+
};
2981+
29532982
llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
29542983

29552984
// If the sequence doesn't have enough candidates left, then we're done.
@@ -2961,6 +2990,17 @@ RISCVInstrInfo::getOutliningCandidateInfo(
29612990
for (auto &MI : RepeatedSequenceLocs[0])
29622991
SequenceSize += getInstSizeInBytes(MI);
29632992

2993+
if (!CannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
2994+
// tail function = 8 bytes. Can't be compressed
2995+
for (auto &C : RepeatedSequenceLocs)
2996+
C.setCallInfo(MachineOutlinerTailCall, 8);
2997+
2998+
// Using tail call we move ret instrunction from caller to calee.
2999+
// So, FrameOverhead for this is 0
3000+
return std::make_unique<outliner::OutlinedFunction>(
3001+
RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall);
3002+
}
3003+
29643004
// call t0, function = 8 bytes.
29653005
unsigned CallOverhead = 8;
29663006
for (auto &C : RepeatedSequenceLocs)
@@ -2997,15 +3037,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
29973037
return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
29983038
: outliner::InstrType::Invisible;
29993039

3000-
// We need support for tail calls to outlined functions before return
3001-
// statements can be allowed.
3002-
if (MI.isReturn())
3003-
return outliner::InstrType::Illegal;
3004-
3005-
// Don't allow modifying the X5 register which we use for return addresses for
3006-
// these outlined functions.
3007-
if (MI.modifiesRegister(RISCV::X5, TRI) ||
3008-
MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
3040+
if (CannotInsertTailCall(MBB->back()) && MIUseX5(MI, TRI))
30093041
return outliner::InstrType::Illegal;
30103042

30113043
// Make sure the operands don't reference something unsafe.
@@ -3041,19 +3073,29 @@ void RISCVInstrInfo::buildOutlinedFrame(
30413073
}
30423074
}
30433075

3076+
if (OF.FrameConstructionID == MachineOutlinerTailCall)
3077+
return;
3078+
30443079
MBB.addLiveIn(RISCV::X5);
30453080

30463081
// Add in a return instruction to the end of the outlined frame.
30473082
MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
3048-
.addReg(RISCV::X0, RegState::Define)
3049-
.addReg(RISCV::X5)
3050-
.addImm(0));
3083+
.addReg(RISCV::X0, RegState::Define)
3084+
.addReg(RISCV::X5)
3085+
.addImm(0));
30513086
}
30523087

30533088
MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
30543089
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
30553090
MachineFunction &MF, outliner::Candidate &C) const {
30563091

3092+
if (C.CallConstructionID == MachineOutlinerTailCall) {
3093+
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
3094+
.addGlobalAddress(M.getNamedValue(MF.getName()),
3095+
/*Offset=*/0, RISCVII::MO_CALL));
3096+
return It;
3097+
}
3098+
30573099
// Add in a call instruction to the outlined function at the given location.
30583100
It = MBB.insert(It,
30593101
BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: llc < %s -verify-machineinstrs -enable-machine-outliner | FileCheck %s
2+
3+
target triple = "riscv64-unknown-linux-gnu"
4+
5+
declare void @foo(i32, i32, i32, i32) minsize
6+
7+
define void @fentry0(i1 %a) nounwind {
8+
; CHECK-LABEL: fentry0:
9+
; CHECK: # %bb.1:
10+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
11+
; CHECK-NEXT: call foo
12+
; CHECK-LABEL: .LBB0_2:
13+
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
14+
entry:
15+
br i1 %a, label %if.then, label %if.end
16+
if.then:
17+
call void @foo(i32 1, i32 2, i32 3, i32 4)
18+
br label %if.end
19+
if.end:
20+
call void @foo(i32 5, i32 6, i32 7, i32 8)
21+
ret void
22+
}
23+
24+
define void @fentry1(i1 %a) nounwind {
25+
; CHECK-LABEL: fentry1:
26+
; CHECK: # %bb.1:
27+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
28+
; CHECK-NEXT: call foo
29+
; CHECK-LABEL: .LBB1_2:
30+
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
31+
entry:
32+
br i1 %a, label %if.then, label %if.end
33+
if.then:
34+
call void @foo(i32 1, i32 2, i32 3, i32 4)
35+
br label %if.end
36+
if.end:
37+
call void @foo(i32 5, i32 6, i32 7, i32 8)
38+
ret void
39+
}
40+
41+
define void @fentry2(i1 %a) nounwind {
42+
; CHECK-LABEL: fentry2:
43+
; CHECK: # %bb.1:
44+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
45+
; CHECK-NEXT: call foo
46+
; CHECK-LABEL: .LBB2_2:
47+
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
48+
entry:
49+
br i1 %a, label %if.then, label %if.end
50+
if.then:
51+
call void @foo(i32 1, i32 2, i32 3, i32 4)
52+
br label %if.end
53+
if.end:
54+
call void @foo(i32 5, i32 6, i32 7, i32 8)
55+
ret void
56+
}
57+
58+
; CHECK: OUTLINED_FUNCTION_[[BB2]]:
59+
; CHECK: li a0, 5
60+
; CHECK-NEXT: li a1, 6
61+
; CHECK-NEXT: li a2, 7
62+
; CHECK-NEXT: li a3, 8
63+
; CHECK-NEXT: call foo
64+
65+
; CHECK: OUTLINED_FUNCTION_[[BB1]]:
66+
; CHECK: li a0, 1
67+
; CHECK-NEXT: li a1, 2
68+
; CHECK-NEXT: li a2, 3
69+
; CHECK-NEXT: li a3, 4
70+
; CHECK-NEXT: jr t0

llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ body: |
2222
; RV32I-MO-LABEL: name: func1
2323
; RV32I-MO: liveins: $x10, $x11
2424
; RV32I-MO-NEXT: {{ $}}
25-
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
26-
; RV32I-MO-NEXT: PseudoRET
25+
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
2726
; RV64I-MO-LABEL: name: func1
2827
; RV64I-MO: liveins: $x10, $x11
2928
; RV64I-MO-NEXT: {{ $}}
30-
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
31-
; RV64I-MO-NEXT: PseudoRET
29+
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
3230
$x10 = ORI $x10, 1023
3331
CFI_INSTRUCTION offset $x1, 0
3432
$x11 = ORI $x11, 1023
@@ -49,13 +47,11 @@ body: |
4947
; RV32I-MO-LABEL: name: func2
5048
; RV32I-MO: liveins: $x10, $x11
5149
; RV32I-MO-NEXT: {{ $}}
52-
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
53-
; RV32I-MO-NEXT: PseudoRET
50+
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
5451
; RV64I-MO-LABEL: name: func2
5552
; RV64I-MO: liveins: $x10, $x11
5653
; RV64I-MO-NEXT: {{ $}}
57-
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
58-
; RV64I-MO-NEXT: PseudoRET
54+
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
5955
$x10 = ORI $x10, 1023
6056
CFI_INSTRUCTION offset $x1, 0
6157
$x11 = ORI $x11, 1023
@@ -76,13 +72,11 @@ body: |
7672
; RV32I-MO-LABEL: name: func3
7773
; RV32I-MO: liveins: $x10, $x11
7874
; RV32I-MO-NEXT: {{ $}}
79-
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
80-
; RV32I-MO-NEXT: PseudoRET
75+
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
8176
; RV64I-MO-LABEL: name: func3
8277
; RV64I-MO: liveins: $x10, $x11
8378
; RV64I-MO-NEXT: {{ $}}
84-
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
85-
; RV64I-MO-NEXT: PseudoRET
79+
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
8680
$x10 = ORI $x10, 1023
8781
CFI_INSTRUCTION offset $x1, -12
8882
$x11 = ORI $x11, 1023
@@ -96,11 +90,11 @@ body: |
9690
9791
9892
# OUTLINED-LABEL: name: OUTLINED_FUNCTION_0
99-
# OUTLINED: liveins: $x11, $x10, $x5
93+
# OUTLINED: liveins: $x11, $x10
10094
# OUTLINED-NEXT: {{ $}}
10195
# OUTLINED-NEXT: $x10 = ORI $x10, 1023
10296
# OUTLINED-NEXT: $x11 = ORI $x11, 1023
10397
# OUTLINED-NEXT: $x12 = ADDI $x10, 17
10498
# OUTLINED-NEXT: $x11 = AND $x12, $x11
10599
# OUTLINED-NEXT: $x10 = SUB $x10, $x11
106-
# OUTLINED-NEXT: $x0 = JALR $x5, 0
100+
# OUTLINED-NEXT: PseudoRET

llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,25 +94,28 @@ define i32 @_Z2f6v() minsize {
9494
; CHECK-BASELINE-NEXT: li a3, 0x4
9595
; CHECK-BASELINE-NEXT: li a4, 0x5
9696
; CHECK-BASELINE-NEXT: li a5, 0x6
97-
; CHECK-BASELINE-NEXT: jr t0
97+
; CHECK-BASELINE-NEXT: auipc t1, 0x0
98+
; CHECK-BASELINE-NEXT: jr t1
9899

99100
; CHECK-BASELINE: <OUTLINED_FUNCTION_1>:
100101
; CHECK-BASELINE-NEXT: li a0, 0x1
101102
; CHECK-BASELINE-NEXT: li a1, 0x2
102103
; CHECK-BASELINE-NEXT: li a2, 0x3
103104
; CHECK-BASELINE-NEXT: li a3, 0x4
104105
; CHECK-BASELINE-NEXT: li a4, 0x5
105-
; CHECK-BASELINE-NEXT: li a5, 0x7
106-
; CHECK-BASELINE-NEXT: jr t0
106+
; CHECK-BASELINE-NEXT: li a5, 0x8
107+
; CHECK-BASELINE-NEXT: auipc t1, 0x0
108+
; CHECK-BASELINE-NEXT: jr t1
107109

108110
; CHECK-BASELINE: <OUTLINED_FUNCTION_2>:
109111
; CHECK-BASELINE-NEXT: li a0, 0x1
110112
; CHECK-BASELINE-NEXT: li a1, 0x2
111113
; CHECK-BASELINE-NEXT: li a2, 0x3
112114
; CHECK-BASELINE-NEXT: li a3, 0x4
113115
; CHECK-BASELINE-NEXT: li a4, 0x5
114-
; CHECK-BASELINE-NEXT: li a5, 0x8
115-
; CHECK-BASELINE-NEXT: jr t0
116+
; CHECK-BASELINE-NEXT: li a5, 0x7
117+
; CHECK-BASELINE-NEXT: auipc t1, 0x0
118+
; CHECK-BASELINE-NEXT: jr t1
116119

117120
; CHECK-LEAF-DESCENDANTS: <OUTLINED_FUNCTION_0>:
118121
; CHECK-LEAF-DESCENDANTS-NEXT: li a0, 0x1

llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" {
1111
; CHECK-NEXT: # %bb.0:
1212
; CHECK-NEXT: # FEntry call
1313
; CHECK: # %bb.1:
14-
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
14+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
15+
; CHECK-NEXT: call foo
16+
; CHECK-LABEL: .LBB0_2:
17+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
18+
; CHECK-NEXT: call foo
1519
entry:
1620
br i1 %a, label %if.then, label %if.end
1721
if.then:
@@ -27,7 +31,11 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" {
2731
; CHECK-NEXT: # %bb.0:
2832
; CHECK-NEXT: # FEntry call
2933
; CHECK: # %bb.1:
30-
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
34+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
35+
; CHECK-NEXT: call foo
36+
; CHECK-LABEL: .LBB1_2:
37+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
38+
; CHECK-NEXT: call foo
3139
entry:
3240
br i1 %a, label %if.then, label %if.end
3341
if.then:
@@ -47,7 +55,11 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" {
4755
; CHECK-NEXT: nop
4856
; CHECK-NEXT: nop
4957
; CHECK: # %bb.1:
50-
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
58+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
59+
; CHECK-NEXT: call foo
60+
; CHECK-LABEL: .LBB2_2:
61+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
62+
; CHECK-NEXT: call foo
5163
entry:
5264
br i1 %a, label %if.then, label %if.end
5365
if.then:
@@ -65,7 +77,11 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" {
6577
; CHECK-NEXT: nop
6678
; CHECK-NEXT: nop
6779
; CHECK: # %bb.1:
68-
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
80+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
81+
; CHECK-NEXT: call foo
82+
; CHECK-LABEL: .LBB3_2:
83+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
84+
; CHECK-NEXT: call foo
6985
entry:
7086
br i1 %a, label %if.then, label %if.end
7187
if.then:

0 commit comments

Comments
 (0)