Skip to content

Commit 6757cf4

Browse files
authored
[RISCV] [MachineOutliner] Analyze all candidates (llvm#127659)
llvm#117700 made a change from analyzing all the candidates to analyzing just the first candidate before deciding to either delete or keep all of them. Even though the candidates all have the same instructions, the basic blocks in which they are present are different and we will need to check each of them before deciding whether to keep or erase them. Particularly, `isAvailableAcrossAndOutOfSeq` checks to see if the register (x5 in this case) is available from the end of the MBB to the beginning of the candidate and not checking this for each candidate led to incorrect candidates being outlined resulting in correctness issues in a few downstream benchmarks. Similarly, deleting all the candidates if the first one is not viable will result in missed outlining opportunities.
1 parent 3302bef commit 6757cf4

File tree

2 files changed

+158
-30
lines changed

2 files changed

+158
-30
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3016,30 +3016,25 @@ static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
30163016
return false;
30173017
}
30183018

3019-
static std::optional<MachineOutlinerConstructionID>
3020-
analyzeCandidate(outliner::Candidate &C) {
3019+
static bool analyzeCandidate(outliner::Candidate &C) {
30213020
// If last instruction is return then we can rely on
30223021
// the verification already performed in the getOutliningTypeImpl.
30233022
if (C.back().isReturn()) {
30243023
assert(!cannotInsertTailCall(*C.getMBB()) &&
30253024
"The candidate who uses return instruction must be outlined "
30263025
"using tail call");
3027-
return MachineOutlinerTailCall;
3026+
return false;
30283027
}
30293028

3030-
auto CandidateUsesX5 = [](outliner::Candidate &C) {
3031-
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
3032-
if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
3033-
return isMIModifiesReg(MI, TRI, RISCV::X5);
3034-
}))
3035-
return true;
3036-
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
3037-
};
3038-
3039-
if (!CandidateUsesX5(C))
3040-
return MachineOutlinerDefault;
3029+
// Filter out candidates where the X5 register (t0) can't be used to setup
3030+
// the function call.
3031+
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
3032+
if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
3033+
return isMIModifiesReg(MI, TRI, RISCV::X5);
3034+
}))
3035+
return true;
30413036

3042-
return std::nullopt;
3037+
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
30433038
}
30443039

30453040
std::optional<std::unique_ptr<outliner::OutlinedFunction>>
@@ -3048,35 +3043,32 @@ RISCVInstrInfo::getOutliningCandidateInfo(
30483043
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
30493044
unsigned MinRepeats) const {
30503045

3051-
// Each RepeatedSequenceLoc is identical.
3052-
outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3053-
auto CandidateInfo = analyzeCandidate(Candidate);
3054-
if (!CandidateInfo)
3055-
RepeatedSequenceLocs.clear();
3046+
// Analyze each candidate and erase the ones that are not viable.
3047+
llvm::erase_if(RepeatedSequenceLocs, analyzeCandidate);
30563048

30573049
// If the sequence doesn't have enough candidates left, then we're done.
30583050
if (RepeatedSequenceLocs.size() < MinRepeats)
30593051
return std::nullopt;
30603052

3053+
// Each RepeatedSequenceLoc is identical.
3054+
outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
30613055
unsigned InstrSizeCExt =
30623056
Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtCOrZca() ? 2
30633057
: 4;
30643058
unsigned CallOverhead = 0, FrameOverhead = 0;
30653059

3066-
MachineOutlinerConstructionID MOCI = CandidateInfo.value();
3067-
switch (MOCI) {
3068-
case MachineOutlinerDefault:
3069-
// call t0, function = 8 bytes.
3070-
CallOverhead = 8;
3071-
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3072-
FrameOverhead = InstrSizeCExt;
3073-
break;
3074-
case MachineOutlinerTailCall:
3060+
MachineOutlinerConstructionID MOCI = MachineOutlinerDefault;
3061+
if (Candidate.back().isReturn()) {
3062+
MOCI = MachineOutlinerTailCall;
30753063
// tail call = auipc + jalr in the worst case without linker relaxation.
30763064
CallOverhead = 4 + InstrSizeCExt;
30773065
// Using tail call we move ret instruction from caller to callee.
30783066
FrameOverhead = 0;
3079-
break;
3067+
} else {
3068+
// call t0, function = 8 bytes.
3069+
CallOverhead = 8;
3070+
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3071+
FrameOverhead = InstrSizeCExt;
30803072
}
30813073

30823074
for (auto &C : RepeatedSequenceLocs)
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
3+
# RUN: | FileCheck -check-prefixes=RV32I-MO %s
4+
# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
5+
# RUN: | FileCheck -check-prefixes=RV64I-MO %s
6+
7+
# MIR has been edited by hand to have x5 as live out in @dont_outline
8+
9+
---
10+
11+
name: outline_0
12+
tracksRegLiveness: true
13+
isOutlined: false
14+
body: |
15+
bb.0:
16+
liveins: $x10, $x11
17+
18+
; RV32I-MO-LABEL: name: outline_0
19+
; RV32I-MO: liveins: $x10, $x11
20+
; RV32I-MO-NEXT: {{ $}}
21+
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
22+
; RV32I-MO-NEXT: PseudoRET implicit $x11
23+
;
24+
; RV64I-MO-LABEL: name: outline_0
25+
; RV64I-MO: liveins: $x10, $x11
26+
; RV64I-MO-NEXT: {{ $}}
27+
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
28+
; RV64I-MO-NEXT: PseudoRET implicit $x11
29+
$x11 = ORI $x11, 1023
30+
$x12 = ADDI $x10, 17
31+
$x11 = AND $x12, $x11
32+
$x10 = SUB $x10, $x11
33+
PseudoRET implicit $x11
34+
35+
...
36+
---
37+
38+
name: dont_outline
39+
tracksRegLiveness: true
40+
isOutlined: false
41+
body: |
42+
; RV32I-MO-LABEL: name: dont_outline
43+
; RV32I-MO: bb.0:
44+
; RV32I-MO-NEXT: liveins: $x10, $x11
45+
; RV32I-MO-NEXT: {{ $}}
46+
; RV32I-MO-NEXT: $x11 = ORI $x11, 1023
47+
; RV32I-MO-NEXT: $x12 = ADDI $x10, 17
48+
; RV32I-MO-NEXT: $x11 = AND $x12, $x11
49+
; RV32I-MO-NEXT: $x10 = SUB $x10, $x11
50+
; RV32I-MO-NEXT: BEQ $x10, $x11, %bb.1
51+
; RV32I-MO-NEXT: {{ $}}
52+
; RV32I-MO-NEXT: bb.1:
53+
; RV32I-MO-NEXT: liveins: $x10, $x5
54+
; RV32I-MO-NEXT: {{ $}}
55+
; RV32I-MO-NEXT: PseudoRET implicit $x10, implicit $x5
56+
;
57+
; RV64I-MO-LABEL: name: dont_outline
58+
; RV64I-MO: bb.0:
59+
; RV64I-MO-NEXT: liveins: $x10, $x11
60+
; RV64I-MO-NEXT: {{ $}}
61+
; RV64I-MO-NEXT: $x11 = ORI $x11, 1023
62+
; RV64I-MO-NEXT: $x12 = ADDI $x10, 17
63+
; RV64I-MO-NEXT: $x11 = AND $x12, $x11
64+
; RV64I-MO-NEXT: $x10 = SUB $x10, $x11
65+
; RV64I-MO-NEXT: BEQ $x10, $x11, %bb.1
66+
; RV64I-MO-NEXT: {{ $}}
67+
; RV64I-MO-NEXT: bb.1:
68+
; RV64I-MO-NEXT: liveins: $x10, $x5
69+
; RV64I-MO-NEXT: {{ $}}
70+
; RV64I-MO-NEXT: PseudoRET implicit $x10, implicit $x5
71+
bb.0:
72+
liveins: $x10, $x11
73+
74+
$x11 = ORI $x11, 1023
75+
$x12 = ADDI $x10, 17
76+
$x11 = AND $x12, $x11
77+
$x10 = SUB $x10, $x11
78+
BEQ $x10, $x11, %bb.1
79+
80+
bb.1:
81+
liveins: $x10, $x5
82+
PseudoRET implicit $x10, implicit $x5
83+
84+
...
85+
---
86+
87+
name: outline_1
88+
tracksRegLiveness: true
89+
isOutlined: false
90+
body: |
91+
bb.0:
92+
liveins: $x10, $x11
93+
94+
; RV32I-MO-LABEL: name: outline_1
95+
; RV32I-MO: liveins: $x10, $x11
96+
; RV32I-MO-NEXT: {{ $}}
97+
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
98+
; RV32I-MO-NEXT: PseudoRET implicit $x10
99+
;
100+
; RV64I-MO-LABEL: name: outline_1
101+
; RV64I-MO: liveins: $x10, $x11
102+
; RV64I-MO-NEXT: {{ $}}
103+
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
104+
; RV64I-MO-NEXT: PseudoRET implicit $x10
105+
$x11 = ORI $x11, 1023
106+
$x12 = ADDI $x10, 17
107+
$x11 = AND $x12, $x11
108+
$x10 = SUB $x10, $x11
109+
PseudoRET implicit $x10
110+
111+
...
112+
---
113+
name: outline_2
114+
tracksRegLiveness: true
115+
isOutlined: false
116+
body: |
117+
bb.0:
118+
liveins: $x10, $x11
119+
120+
; RV32I-MO-LABEL: name: outline_2
121+
; RV32I-MO: liveins: $x10, $x11
122+
; RV32I-MO-NEXT: {{ $}}
123+
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
124+
; RV32I-MO-NEXT: PseudoRET implicit $x12
125+
;
126+
; RV64I-MO-LABEL: name: outline_2
127+
; RV64I-MO: liveins: $x10, $x11
128+
; RV64I-MO-NEXT: {{ $}}
129+
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
130+
; RV64I-MO-NEXT: PseudoRET implicit $x12
131+
$x11 = ORI $x11, 1023
132+
$x12 = ADDI $x10, 17
133+
$x11 = AND $x12, $x11
134+
$x10 = SUB $x10, $x11
135+
PseudoRET implicit $x12
136+
...

0 commit comments

Comments
 (0)