Skip to content

Commit 5599c01

Browse files
committed
[BOLT] Fixes for new profile
Summary: Do a better job of recording fall-through branches in new profile mode (-prof-compat-mode=0). For this we need to record offsets for all instructions that are last in the containing basic block. Change the way we convert conditional tail calls. Now we never reverse the condition. This is required for better profile matching. The original approach of preserving the direction was controversial to start with. Add "-infer-fall-throughs" option (on by default) to allow disabling inference of fall-through edge counts. (cherry picked from FBD6994293)
1 parent a24c554 commit 5599c01

File tree

5 files changed

+96
-92
lines changed

5 files changed

+96
-92
lines changed

bolt/BinaryBasicBlock.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,18 +411,18 @@ class BinaryBasicBlock {
411411

412412
/// Add instruction at the end of this basic block.
413413
/// Returns the index of the instruction in the Instructions vector of the BB.
414-
uint32_t addInstruction(MCInst &&Inst) {
414+
iterator addInstruction(MCInst &&Inst) {
415415
adjustNumPseudos(Inst, 1);
416416
Instructions.emplace_back(Inst);
417-
return Instructions.size() - 1;
417+
return std::prev(Instructions.end());
418418
}
419419

420420
/// Add instruction at the end of this basic block.
421421
/// Returns the index of the instruction in the Instructions vector of the BB.
422-
uint32_t addInstruction(const MCInst &Inst) {
422+
iterator addInstruction(const MCInst &Inst) {
423423
adjustNumPseudos(Inst, 1);
424424
Instructions.push_back(Inst);
425-
return Instructions.size() - 1;
425+
return std::prev(Instructions.end());
426426
}
427427

428428
/// Add a range of instructions to the end of this basic block.

bolt/BinaryFunction.cpp

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
470470

471471
uint64_t BBExecCount = BB->getExecutionCount();
472472
if (hasValidProfile()) {
473-
OS << " Exec Count : " << BBExecCount << '\n';
473+
OS << " Exec Count : ";
474+
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
475+
OS << BBExecCount << '\n';
476+
else
477+
OS << "<unknown>\n";
474478
}
475479
if (BB->getCFIState() >= 0) {
476480
OS << " CFI State : " << BB->getCFIState() << '\n';
@@ -1492,7 +1496,7 @@ bool BinaryFunction::buildCFG() {
14921496
BinaryBasicBlock *InsertBB{nullptr};
14931497
BinaryBasicBlock *PrevBB{nullptr};
14941498
bool IsLastInstrNop{false};
1495-
const MCInst *PrevInstr{nullptr};
1499+
uint64_t LastInstrOffset{0};
14961500

14971501
auto addCFIPlaceholders =
14981502
[this](uint64_t CFIOffset, BinaryBasicBlock *InsertBB) {
@@ -1503,6 +1507,16 @@ bool BinaryFunction::buildCFG() {
15031507
}
15041508
};
15051509

1510+
// For profiling purposes we need to save the offset of the last instruction
1511+
// in the basic block. But in certain cases we don't if the instruction was
1512+
// the last one, and we have to go back and update its offset.
1513+
auto updateOffset = [&](uint64_t Offset) {
1514+
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
1515+
auto *PrevInstr = PrevBB->getLastNonPseudoInstr();
1516+
if (PrevInstr && !MIA->hasAnnotation(*PrevInstr, "Offset"))
1517+
MIA->addAnnotation(BC.Ctx.get(), *PrevInstr, "Offset", Offset);
1518+
};
1519+
15061520
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
15071521
const auto Offset = I->first;
15081522
auto &Instr = I->second;
@@ -1515,6 +1529,8 @@ bool BinaryFunction::buildCFG() {
15151529
/* DeriveAlignment = */ IsLastInstrNop);
15161530
if (hasEntryPointAtOffset(Offset))
15171531
InsertBB->setEntryPoint();
1532+
if (PrevBB)
1533+
updateOffset(LastInstrOffset);
15181534
}
15191535
// Ignore nops. We use nops to derive alignment of the next basic block.
15201536
// It will not always work, as some blocks are naturally aligned, but
@@ -1528,6 +1544,7 @@ bool BinaryFunction::buildCFG() {
15281544
// we see an unconditional branch following a conditional one. The latter
15291545
// should not be a conditional tail call.
15301546
assert(PrevBB && "no previous basic block for a fall through");
1547+
auto *PrevInstr = PrevBB->getLastNonPseudoInstr();
15311548
assert(PrevInstr && "no previous instruction for a fall through");
15321549
if (MIA->isUnconditionalBranch(Instr) &&
15331550
!MIA->isUnconditionalBranch(*PrevInstr) &&
@@ -1538,16 +1555,18 @@ bool BinaryFunction::buildCFG() {
15381555
InsertBB = addBasicBlock(Offset,
15391556
BC.Ctx->createTempSymbol("FT", true),
15401557
/* DeriveAlignment = */ IsLastInstrNop);
1558+
updateOffset(LastInstrOffset);
15411559
}
15421560
}
15431561
if (Offset == 0) {
15441562
// Add associated CFI pseudos in the first offset (0)
15451563
addCFIPlaceholders(0, InsertBB);
15461564
}
15471565

1548-
IsLastInstrNop = false;
1549-
InsertBB->addInstruction(Instr);
1550-
PrevInstr = &Instr;
1566+
const auto IsBlockEnd = MIA->isTerminator(Instr);
1567+
IsLastInstrNop = MIA->isNoop(Instr);
1568+
LastInstrOffset = Offset;
1569+
InsertBB->addInstruction(std::move(Instr));
15511570

15521571
// Add associated CFI instrs. We always add the CFI instruction that is
15531572
// located immediately after this instruction, since the next CFI
@@ -1558,9 +1577,11 @@ bool BinaryFunction::buildCFG() {
15581577
CFIOffset = NextInstr->first;
15591578
else
15601579
CFIOffset = getSize();
1580+
1581+
// Note: this potentially invalidates instruction pointers/iterators.
15611582
addCFIPlaceholders(CFIOffset, InsertBB);
15621583

1563-
if (MIA->isTerminator(Instr)) {
1584+
if (IsBlockEnd) {
15641585
PrevBB = InsertBB;
15651586
InsertBB = nullptr;
15661587
}
@@ -1769,10 +1790,6 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
17691790
}
17701791

17711792
void BinaryFunction::removeConditionalTailCalls() {
1772-
// Don't touch code if non-simple ARM
1773-
if (BC.TheTriple->getArch() == llvm::Triple::aarch64 && !isSimple())
1774-
return;
1775-
17761793
// Blocks to be appended at the end.
17771794
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
17781795

@@ -1824,29 +1841,14 @@ void BinaryFunction::removeConditionalTailCalls() {
18241841

18251842
BC.MIA->convertTailCallToJmp(*CTCInstr);
18261843

1827-
// In attempt to preserve the direction of the original conditional jump,
1828-
// we will either create an unconditional jump in a separate basic block
1829-
// at the end of the function, or reverse a condition of the jump
1830-
// and create a fall-through block right after the original tail call.
1831-
if (getAddress() >= *TargetAddressOrNone) {
1832-
// Insert the basic block right after the current one.
1833-
std::vector<std::unique_ptr<BinaryBasicBlock>> TCBB;
1834-
TCBB.emplace_back(std::move(TailCallBB));
1835-
BBI = insertBasicBlocks(BBI,
1836-
std::move(TCBB),
1837-
/* UpdateLayout */ true,
1838-
/* UpdateCFIState */ false);
1839-
BC.MIA->reverseBranchCondition(
1840-
*CTCInstr, (*std::next(BBI)).getLabel(), BC.Ctx.get());
1844+
BC.MIA->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
1845+
BC.Ctx.get());
18411846

1842-
} else {
1843-
BC.MIA->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
1844-
BC.Ctx.get());
1845-
// Add basic block to the list that will be added to the end.
1846-
NewBlocks.emplace_back(std::move(TailCallBB));
1847-
// Swap edges as the TailCallBB corresponds to the taken branch.
1848-
BB.swapConditionalSuccessors();
1849-
}
1847+
// Add basic block to the list that will be added to the end.
1848+
NewBlocks.emplace_back(std::move(TailCallBB));
1849+
1850+
// Swap edges as the TailCallBB corresponds to the taken branch.
1851+
BB.swapConditionalSuccessors();
18501852

18511853
// This branch is no longer a conditional tail call.
18521854
BC.MIA->unsetConditionalTailCall(*CTCInstr);

bolt/BinaryFunction.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,22 @@ using IndirectCallSiteProfile = SmallVector<IndirectCallProfile, 4>;
189189

190190
inline raw_ostream &operator<<(raw_ostream &OS,
191191
const bolt::IndirectCallSiteProfile &ICSP) {
192-
const char *Sep = "";
192+
std::string TempString;
193+
raw_string_ostream SS(TempString);
194+
195+
const char *Sep = "\n ";
196+
uint64_t TotalCount = 0;
197+
uint64_t TotalMispreds = 0;
193198
for (auto &CSP : ICSP) {
194-
OS << Sep << "{ " << (CSP.IsFunction ? CSP.Name : "<unknown>") << ": "
199+
SS << Sep << "{ " << (CSP.IsFunction ? CSP.Name : "<unknown>") << ": "
195200
<< CSP.Count << " (" << CSP.Mispreds << " misses) }";
196-
Sep = ", ";
201+
Sep = ",\n ";
202+
TotalCount += CSP.Count;
203+
TotalMispreds += CSP.Mispreds;
197204
}
205+
SS.flush();
206+
207+
OS << TotalCount << " (" << TotalMispreds << " misses) :" << TempString;
198208
return OS;
199209
}
200210

bolt/BinaryFunctionProfile.cpp

Lines changed: 42 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,22 @@ FixFuncCounts("fix-func-counts",
6969
cl::Hidden,
7070
cl::cat(BoltOptCategory));
7171

72+
static cl::opt<bool>
73+
InferFallThroughs("infer-fall-throughs",
74+
cl::desc("infer execution count for fall-through blocks"),
75+
cl::init(true),
76+
cl::ZeroOrMore,
77+
cl::Hidden,
78+
cl::cat(BoltOptCategory));
79+
7280
} // namespace opts
7381

7482
namespace llvm {
7583
namespace bolt {
7684

7785
bool BinaryFunction::recordTrace(
78-
const LBREntry &First,
79-
const LBREntry &Second,
86+
const LBREntry &FirstLBR,
87+
const LBREntry &SecondLBR,
8088
uint64_t Count,
8189
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) {
8290
if (!isSimple())
@@ -85,8 +93,8 @@ bool BinaryFunction::recordTrace(
8593
assert(CurrentState == State::CFG && "can only record traces in CFG state");
8694

8795
// Offsets of the trace within this function.
88-
const auto From = First.To - getAddress();
89-
const auto To = Second.From - getAddress();
96+
const auto From = FirstLBR.To - getAddress();
97+
const auto To = SecondLBR.From - getAddress();
9098

9199
if (From > To)
92100
return false;
@@ -97,47 +105,27 @@ bool BinaryFunction::recordTrace(
97105
if (!FromBB || !ToBB)
98106
return false;
99107

108+
// Adjust FromBB if the first LBR is a return from the last instruction in
109+
// the previous block (that instruction should be a call).
110+
if (From == FromBB->getOffset() && !containsAddress(FirstLBR.From) &&
111+
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
112+
auto *PrevBB = BasicBlocksLayout[FromBB->getIndex() - 1];
113+
if (PrevBB->getSuccessor(FromBB->getLabel())) {
114+
const auto *Instr = PrevBB->getLastNonPseudoInstr();
115+
if (Instr && BC.MIA->isCall(*Instr)) {
116+
FromBB = PrevBB;
117+
} else {
118+
DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR << '\n');
119+
}
120+
} else {
121+
DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
122+
}
123+
}
124+
100125
// Fill out information for fall-through edges. The From and To could be
101126
// within the same basic block, e.g. when two call instructions are in the
102127
// same block. In this case we skip the processing.
103128
if (FromBB == ToBB) {
104-
if (opts::CompatMode)
105-
return true;
106-
107-
// If the previous block ended with a call, the destination of a return
108-
// would be in ToBB basic block. And if the ToBB starts with a control
109-
// transfer instruction, we will have a 0-length trace that we have to
110-
// account for as a fall-through edge.
111-
if (To == ToBB->getOffset()) {
112-
// External entry point.
113-
if (ToBB->isEntryPoint() || ToBB->isLandingPad())
114-
return true;
115-
116-
// Check that the origin LBR of a trace starts in another function.
117-
// Otherwise it's an internal branch that was accounted for.
118-
if (containsAddress(First.From))
119-
return true;
120-
121-
auto *PrevBB = BasicBlocksLayout[ToBB->getIndex() - 1];
122-
123-
// This could be a bad trace.
124-
if (!PrevBB->getSuccessor(ToBB->getLabel())) {
125-
DEBUG(dbgs() << "invalid LBR sequence:\n"
126-
<< " " << First << '\n'
127-
<< " " << Second << '\n');
128-
return false;
129-
}
130-
131-
auto &BI = PrevBB->getBranchInfo(*ToBB);
132-
BI.Count += Count;
133-
if (Branches) {
134-
const auto *Instr = PrevBB->getLastNonPseudoInstr();
135-
const auto Offset =
136-
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
137-
Branches->push_back(std::make_pair(Offset, ToBB->getOffset()));
138-
}
139-
}
140-
141129
return true;
142130
}
143131

@@ -151,8 +139,8 @@ bool BinaryFunction::recordTrace(
151139
// Check for bad LBRs.
152140
if (!BB->getSuccessor(NextBB->getLabel())) {
153141
DEBUG(dbgs() << "no fall-through for the trace:\n"
154-
<< " " << First << '\n'
155-
<< " " << Second << '\n');
142+
<< " " << FirstLBR << '\n'
143+
<< " " << SecondLBR << '\n');
156144
return false;
157145
}
158146

@@ -166,12 +154,13 @@ bool BinaryFunction::recordTrace(
166154

167155
if (Branches) {
168156
const auto *Instr = BB->getLastNonPseudoInstr();
169-
// Note: real offset for conditional jump instruction shouldn't be 0.
170-
const auto Offset =
171-
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
172-
if (Offset) {
173-
Branches->push_back(std::make_pair(Offset, NextBB->getOffset()));
157+
uint64_t Offset{0};
158+
if (Instr) {
159+
Offset = BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
160+
} else {
161+
Offset = BB->getOffset();
174162
}
163+
Branches->emplace_back(std::make_pair(Offset, NextBB->getOffset()));
175164
}
176165
}
177166

@@ -374,7 +363,8 @@ void BinaryFunction::postProcessProfile() {
374363
}
375364
}
376365

377-
inferFallThroughCounts();
366+
if (opts::InferFallThroughs)
367+
inferFallThroughCounts();
378368

379369
// Update profile information for jump tables based on CFG branch data.
380370
for (auto *BB : BasicBlocks) {
@@ -421,11 +411,11 @@ void BinaryFunction::postProcessProfile() {
421411
}
422412

423413
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
424-
BinaryFunction::getFallthroughsInTrace(const LBREntry &First,
425-
const LBREntry &Second) {
414+
BinaryFunction::getFallthroughsInTrace(const LBREntry &FirstLBR,
415+
const LBREntry &SecondLBR) {
426416
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
427417

428-
if (!recordTrace(First, Second, 1, &Res))
418+
if (!recordTrace(FirstLBR, SecondLBR, 1, &Res))
429419
return NoneType();
430420

431421
return Res;

bolt/ProfileReader.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,9 @@ ProfileReader::parseFunctionProfile(BinaryFunction &BF,
161161
continue;
162162
}
163163

164-
BB.setSuccessorBranchInfo(SuccessorBB, YamlSI.Count, YamlSI.Mispreds);
164+
auto &BI = BB.getBranchInfo(SuccessorBB);
165+
BI.Count += YamlSI.Count;
166+
BI.MispredictedCount += YamlSI.Mispreds;
165167
}
166168
}
167169

0 commit comments

Comments
 (0)