@@ -174,14 +174,23 @@ SampleProfileProber::SampleProfileProber(Function &Func,
174
174
CallProbeIds.clear ();
175
175
LastProbeId = (uint32_t )PseudoProbeReservedId::Last;
176
176
177
- DenseSet<BasicBlock *> InvokeNormalDests;
178
- findInvokeNormalDests (InvokeNormalDests);
179
- DenseSet<BasicBlock *> KnownColdBlocks;
180
- computeEHOnlyBlocks (*F, KnownColdBlocks);
181
-
182
- computeProbeIdForBlocks (InvokeNormalDests, KnownColdBlocks);
177
+ DenseSet<BasicBlock *> BlocksToIgnore;
178
+ // Ignore the cold EH blocks. This will reduce IR size as
179
+ // well as the binary size while retaining the profile quality.
180
+ computeEHOnlyBlocks (*F, BlocksToIgnore);
181
+ // While optimizing nounwind attribute, the frondend may generate unstable IR,
182
+ // e.g. some versions are optimized with the call-to-invoke conversion, while
183
+ // other versions do not. This discrepancy in probe ID could cause profile
184
+ // mismatching issues. To make the probe ID consistent, we can ignore all the
185
+ // EH flows. Specifically, we can ignore the normal dest block which
186
+ // originating from the same block as the call/invoke block and the unwind
187
+ // dest block(computed in computeEHOnlyBlocks), which is a cold block. It
188
+ // doesn't affect the profile quality.
189
+ findInvokeNormalDests (BlocksToIgnore);
190
+
191
+ computeProbeIdForBlocks (BlocksToIgnore);
183
192
computeProbeIdForCallsites ();
184
- computeCFGHash (InvokeNormalDests, KnownColdBlocks );
193
+ computeCFGHash (BlocksToIgnore );
185
194
}
186
195
187
196
void SampleProfileProber::findInvokeNormalDests (
@@ -198,16 +207,18 @@ void SampleProfileProber::findInvokeNormalDests(
198
207
// preceded by the number of indirect calls.
199
208
// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
200
209
void SampleProfileProber::computeCFGHash (
201
- const DenseSet<BasicBlock *> &InvokeNormalDests,
202
- const DenseSet<BasicBlock *> &KnownColdBlocks) {
210
+ const DenseSet<BasicBlock *> &BlocksToIgnore) {
203
211
std::vector<uint8_t > Indexes;
204
212
JamCRC JC;
205
213
for (auto &BB : *F) {
206
- // Skip the EH flow blocks.
207
- if (InvokeNormalDests.contains (&BB) || KnownColdBlocks.contains (&BB))
214
+ if (BlocksToIgnore.contains (&BB))
208
215
continue ;
209
-
210
- // Find the original successors by skipping the EH flow succs.
216
+ // To keep the CFG Hash consistent before and after the call-to-invoke
217
+ // conversion, we need to compute the hash using the original call BB's
218
+ // successors for the invoke BB. As the current invoke BB's
219
+ // successors(normal dest and unwind dest) are ignored, we keep searching to
220
+ // find the leaf normal dest, the leaf's successors are the original call's
221
+ // successors.
211
222
auto *BBPtr = &BB;
212
223
auto *TI = BBPtr->getTerminator ();
213
224
while (auto *II = dyn_cast<InvokeInst>(TI)) {
@@ -217,6 +228,8 @@ void SampleProfileProber::computeCFGHash(
217
228
218
229
for (BasicBlock *Succ : successors (BBPtr)) {
219
230
auto Index = getBlockId (Succ);
231
+ assert (Index &&
232
+ " Ignored block(zero ID) should not be used for hash computation" );
220
233
for (int J = 0 ; J < 4 ; J++)
221
234
Indexes.push_back ((uint8_t )(Index >> (J * 8 )));
222
235
}
@@ -237,12 +250,9 @@ void SampleProfileProber::computeCFGHash(
237
250
}
238
251
239
252
void SampleProfileProber::computeProbeIdForBlocks (
240
- const DenseSet<BasicBlock *> &InvokeNormalDests,
241
- const DenseSet<BasicBlock *> &KnownColdBlocks) {
242
- // Insert pseudo probe to non-cold blocks only. This will reduce IR size as
243
- // well as the binary size while retaining the profile quality.
253
+ const DenseSet<BasicBlock *> &BlocksToIgnore) {
244
254
for (auto &BB : *F) {
245
- if (InvokeNormalDests. contains (&BB) || KnownColdBlocks .contains (&BB))
255
+ if (BlocksToIgnore .contains (&BB))
246
256
continue ;
247
257
BlockProbeIds[&BB] = ++LastProbeId;
248
258
}
0 commit comments