@@ -174,32 +174,76 @@ SampleProfileProber::SampleProfileProber(Function &Func,
174
174
CallProbeIds.clear ();
175
175
LastProbeId = (uint32_t )PseudoProbeReservedId::Last;
176
176
177
- DenseSet<BasicBlock *> BlocksToIgnore;
178
- // Ignore the cold EH blocks. This will reduce IR size as
179
- // well as the binary size while retaining the profile quality.
180
- computeEHOnlyBlocks (*F, BlocksToIgnore);
181
- // While optimizing nounwind attribute, the frondend may generate unstable IR,
182
- // e.g. some versions are optimized with the call-to-invoke conversion, while
183
- // other versions do not. This discrepancy in probe ID could cause profile
184
- // mismatching issues. To make the probe ID consistent, we can ignore all the
185
- // EH flows. Specifically, we can ignore the normal dest block which
186
- // originating from the same block as the call/invoke block and the unwind
187
- // dest block(computed in computeEHOnlyBlocks), which is a cold block. It
188
- // doesn't affect the profile quality.
189
- findInvokeNormalDests (BlocksToIgnore);
190
-
191
- computeProbeIdForBlocks (BlocksToIgnore);
192
- computeProbeIdForCallsites ();
193
- computeCFGHash (BlocksToIgnore);
177
+ DenseSet<BasicBlock *> BlocksToIgnoreProbe;
178
+ DenseSet<BasicBlock *> BlocksToIgnoreCall;
179
+ computeBlocksToIgnore (BlocksToIgnoreProbe, BlocksToIgnoreCall);
180
+
181
+ computeProbeIdForBlocks (BlocksToIgnoreProbe);
182
+ computeProbeIdForCallsites (BlocksToIgnoreCall);
183
+ computeCFGHash (BlocksToIgnoreProbe);
184
+ }
185
+
186
+ // Two purposes to compute the blocks to ignore:
187
+ // 1. Reduce the IR size.
188
+ // 2. Make the instrumentation(checksum mismatch) stable. e.g. the frondend may
189
+ // generate unstable IR while optimizing nounwind attribute, some versions are
190
+ // optimized with the call-to-invoke conversion, while other versions do not.
191
+ // This discrepancy in probe ID could cause profile mismatching issues.
192
+ // Note that those ignored blocks are either cold blocks or new split blocks
193
+ // whose original blocks are instrumented, so it shouldn't degrade the profile
194
+ // quailty.
195
+ void SampleProfileProber::computeBlocksToIgnore (
196
+ DenseSet<BasicBlock *> &BlocksToIgnoreProbe,
197
+ DenseSet<BasicBlock *> &BlocksToIgnoreCall) {
198
+ // Ignore the cold EH blocks.
199
+ computeEHOnlyBlocks (*F, BlocksToIgnoreCall);
200
+ findUnreachableBlocks (BlocksToIgnoreCall);
201
+
202
+ BlocksToIgnoreProbe.insert (BlocksToIgnoreCall.begin (),
203
+ BlocksToIgnoreCall.end ());
204
+ findNewSplitBlocks (BlocksToIgnoreProbe);
205
+ }
206
+
207
+ void SampleProfileProber::findUnreachableBlocks (
208
+ DenseSet<BasicBlock *> &BlocksToIgnore) {
209
+ for (auto &BB : *F) {
210
+ if (&BB != &F->getEntryBlock () && pred_size (&BB) == 0 )
211
+ BlocksToIgnore.insert (&BB);
212
+ }
194
213
}
195
214
196
- void SampleProfileProber::findInvokeNormalDests (
197
- DenseSet<BasicBlock *> &InvokeNormalDests) {
215
+ // Basic block can be split into multiple blocks, e.g. due to the
216
+ // call-to-invoke. If they are hotness-wise equal, we can optimize to only
217
+ // instrument the leading block, ignore the other new split blocks.
218
+ void SampleProfileProber::findNewSplitBlocks (
219
+ DenseSet<BasicBlock *> &NewSplitBlocks) {
198
220
for (auto &BB : *F) {
221
+ // Blocks connected by unconditional branch are hotness-wise equal, ignore
222
+ // the second block.
223
+ if (pred_size (&BB) == 1 && succ_size (*pred_begin (&BB)) == 1 )
224
+ NewSplitBlocks.insert (&BB);
225
+
226
+ // For call-to-invoke conversion, the unwind dest is usually cold, so ignore
227
+ // the normal dest of invoke as the new split BBs.
199
228
auto *TI = BB.getTerminator ();
200
229
if (auto *II = dyn_cast<InvokeInst>(TI))
201
- InvokeNormalDests.insert (II->getNormalDest ());
230
+ NewSplitBlocks.insert (II->getNormalDest ());
231
+ }
232
+ }
233
+
234
+ // To keep the CFG Hash consistent before and after the block split opt(such as
235
+ // call-to-invoke conversion), we need to compute the hash using the original
236
+ // BB's successors for the new split BB. It keep searching to find the leaf
237
+ // new-split BB, the leaf's successors are the original BB's successors.
238
+ const Instruction *SampleProfileProber::getOriginalTerminator (
239
+ const BasicBlock *BB, const DenseSet<BasicBlock *> &BlocksToIgnore) {
240
+ auto *TI = BB->getTerminator ();
241
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
242
+ return getOriginalTerminator (II->getNormalDest (), BlocksToIgnore);
243
+ } else if (succ_size (BB) == 1 && BlocksToIgnore.contains (*succ_begin (BB))) {
244
+ return getOriginalTerminator (*succ_begin (BB), BlocksToIgnore);
202
245
}
246
+ return TI;
203
247
}
204
248
205
249
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
@@ -213,23 +257,14 @@ void SampleProfileProber::computeCFGHash(
213
257
for (auto &BB : *F) {
214
258
if (BlocksToIgnore.contains (&BB))
215
259
continue ;
216
- // To keep the CFG Hash consistent before and after the call-to-invoke
217
- // conversion, we need to compute the hash using the original call BB's
218
- // successors for the invoke BB. As the current invoke BB's
219
- // successors(normal dest and unwind dest) are ignored, we keep searching to
220
- // find the leaf normal dest, the leaf's successors are the original call's
221
- // successors.
222
- auto *BBPtr = &BB;
223
- auto *TI = BBPtr->getTerminator ();
224
- while (auto *II = dyn_cast<InvokeInst>(TI)) {
225
- BBPtr = II->getNormalDest ();
226
- TI = BBPtr->getTerminator ();
227
- }
228
260
229
- for (BasicBlock *Succ : successors (BBPtr)) {
261
+ auto *TI = getOriginalTerminator (&BB, BlocksToIgnore);
262
+ for (unsigned I = 0 , E = TI->getNumSuccessors (); I != E; ++I) {
263
+ auto *Succ = TI->getSuccessor (I);
230
264
auto Index = getBlockId (Succ);
231
- assert (Index && " Ignored block(zero ID) is used for hash computation, it "
232
- " could cause profile checksum mismatch" );
265
+ // Ingore ignored-block(zero ID) to avoid unstable checksum.
266
+ if (Index == 0 )
267
+ continue ;
233
268
for (int J = 0 ; J < 4 ; J++)
234
269
Indexes.push_back ((uint8_t )(Index >> (J * 8 )));
235
270
}
@@ -258,11 +293,14 @@ void SampleProfileProber::computeProbeIdForBlocks(
258
293
}
259
294
}
260
295
261
- void SampleProfileProber::computeProbeIdForCallsites () {
296
+ void SampleProfileProber::computeProbeIdForCallsites (
297
+ const DenseSet<BasicBlock *> &BlocksToIgnoreCall) {
262
298
LLVMContext &Ctx = F->getContext ();
263
299
Module *M = F->getParent ();
264
300
265
301
for (auto &BB : *F) {
302
+ if (BlocksToIgnoreCall.contains (&BB))
303
+ continue ;
266
304
for (auto &I : BB) {
267
305
if (!isa<CallBase>(I))
268
306
continue ;
0 commit comments