Skip to content

Commit 3076a68

Browse files
svs-quictstellar
authored andcommitted
[RISCV] [MachineOutliner] Analyze all candidates (llvm#127659)
llvm#117700 made a change from analyzing all the candidates to analyzing just the first candidate before deciding to either delete or keep all of them. Even though the candidates all have the same instructions, the basic blocks in which they are present are different and we will need to check each of them before deciding whether to keep or erase them. Particularly, `isAvailableAcrossAndOutOfSeq` checks to see if the register (x5 in this case) is available from the end of the MBB to the beginning of the candidate and not checking this for each candidate led to incorrect candidates being outlined resulting in correctness issues in a few downstream benchmarks. Similarly, deleting all the candidates if the first one is not viable will result in missed outlining opportunities. (cherry picked from commit 6757cf4)
1 parent deb63e7 commit 3076a68

File tree

2 files changed

+158
-30
lines changed

2 files changed

+158
-30
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3010,30 +3010,25 @@ static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
30103010
return false;
30113011
}
30123012

3013-
static std::optional<MachineOutlinerConstructionID>
3014-
analyzeCandidate(outliner::Candidate &C) {
3013+
static bool analyzeCandidate(outliner::Candidate &C) {
30153014
// If last instruction is return then we can rely on
30163015
// the verification already performed in the getOutliningTypeImpl.
30173016
if (C.back().isReturn()) {
30183017
assert(!cannotInsertTailCall(*C.getMBB()) &&
30193018
"The candidate who uses return instruction must be outlined "
30203019
"using tail call");
3021-
return MachineOutlinerTailCall;
3020+
return false;
30223021
}
30233022

3024-
auto CandidateUsesX5 = [](outliner::Candidate &C) {
3025-
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
3026-
if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
3027-
return isMIModifiesReg(MI, TRI, RISCV::X5);
3028-
}))
3029-
return true;
3030-
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
3031-
};
3032-
3033-
if (!CandidateUsesX5(C))
3034-
return MachineOutlinerDefault;
3023+
// Filter out candidates where the X5 register (t0) can't be used to setup
3024+
// the function call.
3025+
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
3026+
if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
3027+
return isMIModifiesReg(MI, TRI, RISCV::X5);
3028+
}))
3029+
return true;
30353030

3036-
return std::nullopt;
3031+
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
30373032
}
30383033

30393034
std::optional<std::unique_ptr<outliner::OutlinedFunction>>
@@ -3042,35 +3037,32 @@ RISCVInstrInfo::getOutliningCandidateInfo(
30423037
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
30433038
unsigned MinRepeats) const {
30443039

3045-
// Each RepeatedSequenceLoc is identical.
3046-
outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3047-
auto CandidateInfo = analyzeCandidate(Candidate);
3048-
if (!CandidateInfo)
3049-
RepeatedSequenceLocs.clear();
3040+
// Analyze each candidate and erase the ones that are not viable.
3041+
llvm::erase_if(RepeatedSequenceLocs, analyzeCandidate);
30503042

30513043
// If the sequence doesn't have enough candidates left, then we're done.
30523044
if (RepeatedSequenceLocs.size() < MinRepeats)
30533045
return std::nullopt;
30543046

3047+
// Each RepeatedSequenceLoc is identical.
3048+
outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
30553049
unsigned InstrSizeCExt =
30563050
Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtCOrZca() ? 2
30573051
: 4;
30583052
unsigned CallOverhead = 0, FrameOverhead = 0;
30593053

3060-
MachineOutlinerConstructionID MOCI = CandidateInfo.value();
3061-
switch (MOCI) {
3062-
case MachineOutlinerDefault:
3063-
// call t0, function = 8 bytes.
3064-
CallOverhead = 8;
3065-
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3066-
FrameOverhead = InstrSizeCExt;
3067-
break;
3068-
case MachineOutlinerTailCall:
3054+
MachineOutlinerConstructionID MOCI = MachineOutlinerDefault;
3055+
if (Candidate.back().isReturn()) {
3056+
MOCI = MachineOutlinerTailCall;
30693057
// tail call = auipc + jalr in the worst case without linker relaxation.
30703058
CallOverhead = 4 + InstrSizeCExt;
30713059
// Using tail call we move ret instruction from caller to callee.
30723060
FrameOverhead = 0;
3073-
break;
3061+
} else {
3062+
// call t0, function = 8 bytes.
3063+
CallOverhead = 8;
3064+
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3065+
FrameOverhead = InstrSizeCExt;
30743066
}
30753067

30763068
for (auto &C : RepeatedSequenceLocs)
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
3+
# RUN: | FileCheck -check-prefixes=RV32I-MO %s
4+
# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
5+
# RUN: | FileCheck -check-prefixes=RV64I-MO %s
6+
7+
# MIR has been edited by hand to have x5 as live out in @dont_outline
8+
9+
---
10+
11+
name: outline_0
12+
tracksRegLiveness: true
13+
isOutlined: false
14+
body: |
15+
bb.0:
16+
liveins: $x10, $x11
17+
18+
; RV32I-MO-LABEL: name: outline_0
19+
; RV32I-MO: liveins: $x10, $x11
20+
; RV32I-MO-NEXT: {{ $}}
21+
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
22+
; RV32I-MO-NEXT: PseudoRET implicit $x11
23+
;
24+
; RV64I-MO-LABEL: name: outline_0
25+
; RV64I-MO: liveins: $x10, $x11
26+
; RV64I-MO-NEXT: {{ $}}
27+
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
28+
; RV64I-MO-NEXT: PseudoRET implicit $x11
29+
$x11 = ORI $x11, 1023
30+
$x12 = ADDI $x10, 17
31+
$x11 = AND $x12, $x11
32+
$x10 = SUB $x10, $x11
33+
PseudoRET implicit $x11
34+
35+
...
36+
---
37+
38+
name: dont_outline
39+
tracksRegLiveness: true
40+
isOutlined: false
41+
body: |
42+
; RV32I-MO-LABEL: name: dont_outline
43+
; RV32I-MO: bb.0:
44+
; RV32I-MO-NEXT: liveins: $x10, $x11
45+
; RV32I-MO-NEXT: {{ $}}
46+
; RV32I-MO-NEXT: $x11 = ORI $x11, 1023
47+
; RV32I-MO-NEXT: $x12 = ADDI $x10, 17
48+
; RV32I-MO-NEXT: $x11 = AND $x12, $x11
49+
; RV32I-MO-NEXT: $x10 = SUB $x10, $x11
50+
; RV32I-MO-NEXT: BEQ $x10, $x11, %bb.1
51+
; RV32I-MO-NEXT: {{ $}}
52+
; RV32I-MO-NEXT: bb.1:
53+
; RV32I-MO-NEXT: liveins: $x10, $x5
54+
; RV32I-MO-NEXT: {{ $}}
55+
; RV32I-MO-NEXT: PseudoRET implicit $x10, implicit $x5
56+
;
57+
; RV64I-MO-LABEL: name: dont_outline
58+
; RV64I-MO: bb.0:
59+
; RV64I-MO-NEXT: liveins: $x10, $x11
60+
; RV64I-MO-NEXT: {{ $}}
61+
; RV64I-MO-NEXT: $x11 = ORI $x11, 1023
62+
; RV64I-MO-NEXT: $x12 = ADDI $x10, 17
63+
; RV64I-MO-NEXT: $x11 = AND $x12, $x11
64+
; RV64I-MO-NEXT: $x10 = SUB $x10, $x11
65+
; RV64I-MO-NEXT: BEQ $x10, $x11, %bb.1
66+
; RV64I-MO-NEXT: {{ $}}
67+
; RV64I-MO-NEXT: bb.1:
68+
; RV64I-MO-NEXT: liveins: $x10, $x5
69+
; RV64I-MO-NEXT: {{ $}}
70+
; RV64I-MO-NEXT: PseudoRET implicit $x10, implicit $x5
71+
bb.0:
72+
liveins: $x10, $x11
73+
74+
$x11 = ORI $x11, 1023
75+
$x12 = ADDI $x10, 17
76+
$x11 = AND $x12, $x11
77+
$x10 = SUB $x10, $x11
78+
BEQ $x10, $x11, %bb.1
79+
80+
bb.1:
81+
liveins: $x10, $x5
82+
PseudoRET implicit $x10, implicit $x5
83+
84+
...
85+
---
86+
87+
name: outline_1
88+
tracksRegLiveness: true
89+
isOutlined: false
90+
body: |
91+
bb.0:
92+
liveins: $x10, $x11
93+
94+
; RV32I-MO-LABEL: name: outline_1
95+
; RV32I-MO: liveins: $x10, $x11
96+
; RV32I-MO-NEXT: {{ $}}
97+
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
98+
; RV32I-MO-NEXT: PseudoRET implicit $x10
99+
;
100+
; RV64I-MO-LABEL: name: outline_1
101+
; RV64I-MO: liveins: $x10, $x11
102+
; RV64I-MO-NEXT: {{ $}}
103+
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
104+
; RV64I-MO-NEXT: PseudoRET implicit $x10
105+
$x11 = ORI $x11, 1023
106+
$x12 = ADDI $x10, 17
107+
$x11 = AND $x12, $x11
108+
$x10 = SUB $x10, $x11
109+
PseudoRET implicit $x10
110+
111+
...
112+
---
113+
name: outline_2
114+
tracksRegLiveness: true
115+
isOutlined: false
116+
body: |
117+
bb.0:
118+
liveins: $x10, $x11
119+
120+
; RV32I-MO-LABEL: name: outline_2
121+
; RV32I-MO: liveins: $x10, $x11
122+
; RV32I-MO-NEXT: {{ $}}
123+
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
124+
; RV32I-MO-NEXT: PseudoRET implicit $x12
125+
;
126+
; RV64I-MO-LABEL: name: outline_2
127+
; RV64I-MO: liveins: $x10, $x11
128+
; RV64I-MO-NEXT: {{ $}}
129+
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
130+
; RV64I-MO-NEXT: PseudoRET implicit $x12
131+
$x11 = ORI $x11, 1023
132+
$x12 = ADDI $x10, 17
133+
$x11 = AND $x12, $x11
134+
$x10 = SUB $x10, $x11
135+
PseudoRET implicit $x12
136+
...

0 commit comments

Comments
 (0)