Skip to content

Commit e3560e2

Browse files
committed
Extend to ignore new split blocks and unreacheable blocks
1 parent 95e2c87 commit e3560e2

File tree

2 files changed

+83
-37
lines changed

2 files changed

+83
-37
lines changed

llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,15 @@ class SampleProfileProber {
8181
uint64_t getFunctionHash() const { return FunctionHash; }
8282
uint32_t getBlockId(const BasicBlock *BB) const;
8383
uint32_t getCallsiteId(const Instruction *Call) const;
84-
void findInvokeNormalDests(DenseSet<BasicBlock *> &InvokeNormalDests);
84+
void findNewSplitBlocks(DenseSet<BasicBlock *> &NewSplitBlocks);
85+
void findUnreachableBlocks(DenseSet<BasicBlock *> &BlocksToIgnore);
86+
void computeBlocksToIgnore(DenseSet<BasicBlock *> &BlocksToIgnoreProbe,
87+
DenseSet<BasicBlock *> &BlocksToIgnoreCall);
88+
void
89+
computeProbeIdForCallsites(const DenseSet<BasicBlock *> &BlocksToIgnoreCall);
90+
const Instruction *
91+
getOriginalTerminator(const BasicBlock *BB,
92+
const DenseSet<BasicBlock *> &BlocksToIgnore);
8593
void computeCFGHash(const DenseSet<BasicBlock *> &BlocksToIgnore);
8694
void computeProbeIdForBlocks(const DenseSet<BasicBlock *> &BlocksToIgnore);
8795
void computeProbeIdForCallsites();

llvm/lib/Transforms/IPO/SampleProfileProbe.cpp

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -174,32 +174,76 @@ SampleProfileProber::SampleProfileProber(Function &Func,
174174
CallProbeIds.clear();
175175
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
176176

177-
DenseSet<BasicBlock *> BlocksToIgnore;
178-
// Ignore the cold EH blocks. This will reduce IR size as
179-
// well as the binary size while retaining the profile quality.
180-
computeEHOnlyBlocks(*F, BlocksToIgnore);
181-
// While optimizing nounwind attribute, the frondend may generate unstable IR,
182-
// e.g. some versions are optimized with the call-to-invoke conversion, while
183-
// other versions do not. This discrepancy in probe ID could cause profile
184-
// mismatching issues. To make the probe ID consistent, we can ignore all the
185-
// EH flows. Specifically, we can ignore the normal dest block which
186-
// originating from the same block as the call/invoke block and the unwind
187-
// dest block(computed in computeEHOnlyBlocks), which is a cold block. It
188-
// doesn't affect the profile quality.
189-
findInvokeNormalDests(BlocksToIgnore);
190-
191-
computeProbeIdForBlocks(BlocksToIgnore);
192-
computeProbeIdForCallsites();
193-
computeCFGHash(BlocksToIgnore);
177+
DenseSet<BasicBlock *> BlocksToIgnoreProbe;
178+
DenseSet<BasicBlock *> BlocksToIgnoreCall;
179+
computeBlocksToIgnore(BlocksToIgnoreProbe, BlocksToIgnoreCall);
180+
181+
computeProbeIdForBlocks(BlocksToIgnoreProbe);
182+
computeProbeIdForCallsites(BlocksToIgnoreCall);
183+
computeCFGHash(BlocksToIgnoreProbe);
184+
}
185+
186+
// Two purposes to compute the blocks to ignore:
187+
// 1. Reduce the IR size.
188+
// 2. Make the instrumentation(checksum mismatch) stable. e.g. the frondend may
189+
// generate unstable IR while optimizing nounwind attribute, some versions are
190+
// optimized with the call-to-invoke conversion, while other versions do not.
191+
// This discrepancy in probe ID could cause profile mismatching issues.
192+
// Note that those ignored blocks are either cold blocks or new split blocks
193+
// whose original blocks are instrumented, so it shouldn't degrade the profile
194+
// quailty.
195+
void SampleProfileProber::computeBlocksToIgnore(
196+
DenseSet<BasicBlock *> &BlocksToIgnoreProbe,
197+
DenseSet<BasicBlock *> &BlocksToIgnoreCall) {
198+
// Ignore the cold EH blocks.
199+
computeEHOnlyBlocks(*F, BlocksToIgnoreCall);
200+
findUnreachableBlocks(BlocksToIgnoreCall);
201+
202+
BlocksToIgnoreProbe.insert(BlocksToIgnoreCall.begin(),
203+
BlocksToIgnoreCall.end());
204+
findNewSplitBlocks(BlocksToIgnoreProbe);
205+
}
206+
207+
void SampleProfileProber::findUnreachableBlocks(
208+
DenseSet<BasicBlock *> &BlocksToIgnore) {
209+
for (auto &BB : *F) {
210+
if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0)
211+
BlocksToIgnore.insert(&BB);
212+
}
194213
}
195214

196-
void SampleProfileProber::findInvokeNormalDests(
197-
DenseSet<BasicBlock *> &InvokeNormalDests) {
215+
// Basic block can be split into multiple blocks, e.g. due to the
216+
// call-to-invoke. If they are hotness-wise equal, we can optimize to only
217+
// instrument the leading block, ignore the other new split blocks.
218+
void SampleProfileProber::findNewSplitBlocks(
219+
DenseSet<BasicBlock *> &NewSplitBlocks) {
198220
for (auto &BB : *F) {
221+
// Blocks connected by unconditional branch are hotness-wise equal, ignore
222+
// the second block.
223+
if (pred_size(&BB) == 1 && succ_size(*pred_begin(&BB)) == 1)
224+
NewSplitBlocks.insert(&BB);
225+
226+
// For call-to-invoke conversion, the unwind dest is usually cold, so ignore
227+
// the normal dest of invoke as the new split BBs.
199228
auto *TI = BB.getTerminator();
200229
if (auto *II = dyn_cast<InvokeInst>(TI))
201-
InvokeNormalDests.insert(II->getNormalDest());
230+
NewSplitBlocks.insert(II->getNormalDest());
231+
}
232+
}
233+
234+
// To keep the CFG Hash consistent before and after the block split opt(such as
235+
// call-to-invoke conversion), we need to compute the hash using the original
236+
// BB's successors for the new split BB. It keep searching to find the leaf
237+
// new-split BB, the leaf's successors are the original BB's successors.
238+
const Instruction *SampleProfileProber::getOriginalTerminator(
239+
const BasicBlock *BB, const DenseSet<BasicBlock *> &BlocksToIgnore) {
240+
auto *TI = BB->getTerminator();
241+
if (auto *II = dyn_cast<InvokeInst>(TI)) {
242+
return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore);
243+
} else if (succ_size(BB) == 1 && BlocksToIgnore.contains(*succ_begin(BB))) {
244+
return getOriginalTerminator(*succ_begin(BB), BlocksToIgnore);
202245
}
246+
return TI;
203247
}
204248

205249
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
@@ -213,23 +257,14 @@ void SampleProfileProber::computeCFGHash(
213257
for (auto &BB : *F) {
214258
if (BlocksToIgnore.contains(&BB))
215259
continue;
216-
// To keep the CFG Hash consistent before and after the call-to-invoke
217-
// conversion, we need to compute the hash using the original call BB's
218-
// successors for the invoke BB. As the current invoke BB's
219-
// successors(normal dest and unwind dest) are ignored, we keep searching to
220-
// find the leaf normal dest, the leaf's successors are the original call's
221-
// successors.
222-
auto *BBPtr = &BB;
223-
auto *TI = BBPtr->getTerminator();
224-
while (auto *II = dyn_cast<InvokeInst>(TI)) {
225-
BBPtr = II->getNormalDest();
226-
TI = BBPtr->getTerminator();
227-
}
228260

229-
for (BasicBlock *Succ : successors(BBPtr)) {
261+
auto *TI = getOriginalTerminator(&BB, BlocksToIgnore);
262+
for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
263+
auto *Succ = TI->getSuccessor(I);
230264
auto Index = getBlockId(Succ);
231-
assert(Index && "Ignored block(zero ID) is used for hash computation, it "
232-
"could cause profile checksum mismatch");
265+
// Ingore ignored-block(zero ID) to avoid unstable checksum.
266+
if (Index == 0)
267+
continue;
233268
for (int J = 0; J < 4; J++)
234269
Indexes.push_back((uint8_t)(Index >> (J * 8)));
235270
}
@@ -258,11 +293,14 @@ void SampleProfileProber::computeProbeIdForBlocks(
258293
}
259294
}
260295

261-
void SampleProfileProber::computeProbeIdForCallsites() {
296+
void SampleProfileProber::computeProbeIdForCallsites(
297+
const DenseSet<BasicBlock *> &BlocksToIgnoreCall) {
262298
LLVMContext &Ctx = F->getContext();
263299
Module *M = F->getParent();
264300

265301
for (auto &BB : *F) {
302+
if (BlocksToIgnoreCall.contains(&BB))
303+
continue;
266304
for (auto &I : BB) {
267305
if (!isa<CallBase>(I))
268306
continue;

0 commit comments

Comments
 (0)