@@ -124,7 +124,7 @@ static cl::opt<unsigned>
124
124
125
125
// Optionally enable cloning of callsites involved with recursive cycles
126
126
static cl::opt<bool > AllowRecursiveCallsites (
127
- " memprof-allow-recursive-callsites" , cl::init(false ), cl::Hidden,
127
+ " memprof-allow-recursive-callsites" , cl::init(true ), cl::Hidden,
128
128
cl::desc(" Allow cloning of callsites involved in recursive cycles" ));
129
129
130
130
// When disabled, try to detect and prevent cloning of recursive contexts.
@@ -301,12 +301,14 @@ class CallsiteContextGraph {
301
301
// callers (i.e. if this is the leaf allocation node).
302
302
if (!CalleeEdges.empty ())
303
303
return &CalleeEdges;
304
- if (!CallerEdges.empty ()) {
305
- // A node with caller edges but no callee edges must be the allocation
306
- // node.
307
- assert (IsAllocation);
304
+ // Typically if the callee edges are empty either the caller edges are
305
+ // also empty, or this is an allocation (leaf node). However, if we are
306
+ // allowing recursive callsites and contexts this will be violated for
307
+ // incompletely cloned recursive cycles.
308
+ assert (CallerEdges.empty () || IsAllocation ||
309
+ (AllowRecursiveCallsites && AllowRecursiveContexts));
310
+ if (!CallerEdges.empty () && IsAllocation)
308
311
return &CallerEdges;
309
- }
310
312
return nullptr ;
311
313
}
312
314
@@ -403,8 +405,13 @@ class CallsiteContextGraph {
403
405
// True if this node was effectively removed from the graph, in which case
404
406
// it should have an allocation type of None and empty context ids.
405
407
bool isRemoved () const {
406
- assert ((AllocTypes == (uint8_t )AllocationType::None) ==
407
- emptyContextIds ());
408
+ // Typically if the callee edges are empty either the caller edges are
409
+ // also empty, or this is an allocation (leaf node). However, if we are
410
+ // allowing recursive callsites and contexts this will be violated for
411
+ // incompletely cloned recursive cycles.
412
+ assert ((AllowRecursiveCallsites && AllowRecursiveContexts) ||
413
+ (AllocTypes == (uint8_t )AllocationType::None) ==
414
+ emptyContextIds ());
408
415
return AllocTypes == (uint8_t )AllocationType::None;
409
416
}
410
417
@@ -1344,16 +1351,48 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1344
1351
DenseSet<uint32_t > RemainingContextIds) {
1345
1352
auto &OrigEdges =
1346
1353
TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges ;
1354
+ DenseSet<uint32_t > RecursiveContextIds;
1355
+ DenseSet<uint32_t > AllCallerContextIds;
1356
+ if (AllowRecursiveCallsites) {
1357
+ // Identify which context ids are recursive which is needed to properly
1358
+ // update the RemainingContextIds set. The relevant recursive context ids
1359
+ // are those that are in multiple edges.
1360
+ for (auto &CE : OrigEdges) {
1361
+ AllCallerContextIds.reserve (CE->getContextIds ().size ());
1362
+ for (auto Id : CE->getContextIds ())
1363
+ if (!AllCallerContextIds.insert (Id).second )
1364
+ RecursiveContextIds.insert (Id);
1365
+ }
1366
+ }
1347
1367
// Increment iterator in loop so that we can remove edges as needed.
1348
1368
for (auto EI = OrigEdges.begin (); EI != OrigEdges.end ();) {
1349
1369
auto Edge = *EI;
1370
+ DenseSet<uint32_t > NewEdgeContextIds;
1371
+ DenseSet<uint32_t > NotFoundContextIds;
1350
1372
// Remove any matching context ids from Edge, return set that were found and
1351
1373
// removed, these are the new edge's context ids. Also update the remaining
1352
1374
// (not found ids).
1353
- DenseSet<uint32_t > NewEdgeContextIds, NotFoundContextIds;
1354
1375
set_subtract (Edge->getContextIds (), RemainingContextIds, NewEdgeContextIds,
1355
1376
NotFoundContextIds);
1356
- RemainingContextIds.swap (NotFoundContextIds);
1377
+ // Update the remaining context ids set for the later edges. This is a
1378
+ // compile time optimization.
1379
+ if (RecursiveContextIds.empty ()) {
1380
+ // No recursive ids, so all of the previously remaining context ids that
1381
+ // were not seen on this edge are the new remaining set.
1382
+ RemainingContextIds.swap (NotFoundContextIds);
1383
+ } else {
1384
+ // Keep the recursive ids in the remaining set as we expect to see those
1385
+ // on another edge. We can remove the non-recursive remaining ids that
1386
+ // were seen on this edge, however. We already have the set of remaining
1387
+ // ids that were on this edge (in NewEdgeContextIds). Figure out which are
1388
+ // non-recursive and only remove those. Note that despite the higher
1389
+ // overhead of updating the remaining context ids set when recursion
1390
+ // handling is enabled, it was found to be at worst performance neutral
1391
+ // and in one case a clear win.
1392
+ DenseSet<uint32_t > NonRecursiveRemainingCurEdgeIds =
1393
+ set_difference (NewEdgeContextIds, RecursiveContextIds);
1394
+ set_subtract (RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1395
+ }
1357
1396
// If no matching context ids for this edge, skip it.
1358
1397
if (NewEdgeContextIds.empty ()) {
1359
1398
++EI;
@@ -1410,9 +1449,9 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1410
1449
set_union (CallerEdgeContextIds, Edge->ContextIds );
1411
1450
}
1412
1451
// Node can have more context ids than callers if some contexts terminate at
1413
- // node and some are longer. If we are allowing recursive callsites but
1414
- // haven't disabled recursive contexts, this will be violated for
1415
- // incompletely cloned recursive cycles, so skip the checking in that case.
1452
+ // node and some are longer. If we are allowing recursive callsites and
1453
+ // contexts this will be violated for incompletely cloned recursive cycles,
1454
+ // so skip the checking in that case.
1416
1455
assert ((AllowRecursiveCallsites && AllowRecursiveContexts) ||
1417
1456
NodeContextIds == CallerEdgeContextIds ||
1418
1457
set_is_subset (CallerEdgeContextIds, NodeContextIds));
@@ -1425,7 +1464,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1425
1464
checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1426
1465
set_union (CalleeEdgeContextIds, Edge->getContextIds ());
1427
1466
}
1428
- assert (NodeContextIds == CalleeEdgeContextIds);
1467
+ // If we are allowing recursive callsites and contexts this will be violated
1468
+ // for incompletely cloned recursive cycles, so skip the checking in that
1469
+ // case.
1470
+ assert ((AllowRecursiveCallsites && AllowRecursiveContexts) ||
1471
+ NodeContextIds == CalleeEdgeContextIds);
1429
1472
}
1430
1473
// FIXME: Since this checking is only invoked under an option, we should
1431
1474
// change the error checking from using assert to something that will trigger
@@ -3134,6 +3177,12 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3134
3177
// over to the corresponding edge into the clone (which is created here if
3135
3178
// this is a newly created clone).
3136
3179
for (auto &OldCalleeEdge : OldCallee->CalleeEdges ) {
3180
+ ContextNode *CalleeToUse = OldCalleeEdge->Callee ;
3181
+ // If this is a direct recursion edge, use NewCallee (the clone) as the
3182
+ // callee as well, so that any edge updated/created here is also direct
3183
+ // recursive.
3184
+ if (CalleeToUse == OldCallee)
3185
+ CalleeToUse = NewCallee;
3137
3186
// The context ids moving to the new callee are the subset of this edge's
3138
3187
// context ids and the context ids on the caller edge being moved.
3139
3188
DenseSet<uint32_t > EdgeContextIdsToMove =
@@ -3147,17 +3196,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3147
3196
// clone, specifically during function assignment, where we would have
3148
3197
// removed none type edges after creating the clone. If we can't find
3149
3198
// a corresponding edge there, fall through to the cloning below.
3150
- if (auto *NewCalleeEdge =
3151
- NewCallee->findEdgeFromCallee (OldCalleeEdge->Callee )) {
3199
+ if (auto *NewCalleeEdge = NewCallee->findEdgeFromCallee (CalleeToUse)) {
3152
3200
NewCalleeEdge->getContextIds ().insert (EdgeContextIdsToMove.begin (),
3153
3201
EdgeContextIdsToMove.end ());
3154
3202
NewCalleeEdge->AllocTypes |= computeAllocType (EdgeContextIdsToMove);
3155
3203
continue ;
3156
3204
}
3157
3205
}
3158
3206
auto NewEdge = std::make_shared<ContextEdge>(
3159
- OldCalleeEdge-> Callee , NewCallee,
3160
- computeAllocType (EdgeContextIdsToMove), EdgeContextIdsToMove);
3207
+ CalleeToUse , NewCallee, computeAllocType (EdgeContextIdsToMove) ,
3208
+ EdgeContextIdsToMove);
3161
3209
NewCallee->CalleeEdges .push_back (NewEdge);
3162
3210
NewEdge->Callee ->CallerEdges .push_back (NewEdge);
3163
3211
}
@@ -3183,13 +3231,20 @@ template <typename DerivedCCG, typename FuncTy, typename CallTy>
3183
3231
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3184
3232
moveCalleeEdgeToNewCaller (const std::shared_ptr<ContextEdge> &Edge,
3185
3233
ContextNode *NewCaller) {
3234
+ auto *OldCallee = Edge->Callee ;
3235
+ auto *NewCallee = OldCallee;
3236
+ // If this edge was direct recursive, make any new/updated edge also direct
3237
+ // recursive to NewCaller.
3238
+ bool Recursive = Edge->Caller == Edge->Callee ;
3239
+ if (Recursive)
3240
+ NewCallee = NewCaller;
3186
3241
3187
3242
ContextNode *OldCaller = Edge->Caller ;
3188
3243
OldCaller->eraseCalleeEdge (Edge.get ());
3189
3244
3190
3245
// We might already have an edge to the new caller. If one exists we will
3191
3246
// reuse it.
3192
- auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee (Edge-> Callee );
3247
+ auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee (NewCallee );
3193
3248
3194
3249
if (ExistingEdgeToNewCaller) {
3195
3250
// Since we already have an edge to NewCaller, simply move the ids
@@ -3199,11 +3254,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3199
3254
ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes ;
3200
3255
Edge->ContextIds .clear ();
3201
3256
Edge->AllocTypes = (uint8_t )AllocationType::None;
3202
- Edge-> Callee ->eraseCallerEdge (Edge.get ());
3257
+ OldCallee ->eraseCallerEdge (Edge.get ());
3203
3258
} else {
3204
3259
// Otherwise just reconnect Edge to NewCaller.
3205
3260
Edge->Caller = NewCaller;
3206
3261
NewCaller->CalleeEdges .push_back (Edge);
3262
+ if (Recursive) {
3263
+ assert (NewCallee == NewCaller);
3264
+ // In the case of (direct) recursive edges, we update the callee as well
3265
+ // so that it becomes recursive on the new caller.
3266
+ Edge->Callee = NewCallee;
3267
+ NewCallee->CallerEdges .push_back (Edge);
3268
+ OldCallee->eraseCallerEdge (Edge.get ());
3269
+ }
3207
3270
// Don't need to update Edge's context ids since we are simply
3208
3271
// reconnecting it.
3209
3272
}
@@ -3217,32 +3280,50 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3217
3280
#ifndef NDEBUG
3218
3281
bool IsNewNode = NewCaller->CallerEdges .empty ();
3219
3282
#endif
3220
- for (auto &OldCallerEdge : OldCaller->CallerEdges ) {
3221
- // The context ids moving to the new caller are the subset of this edge's
3222
- // context ids and the context ids on the callee edge being moved.
3223
- DenseSet<uint32_t > EdgeContextIdsToMove =
3224
- set_intersection (OldCallerEdge->getContextIds (), Edge->getContextIds ());
3225
- set_subtract (OldCallerEdge->getContextIds (), EdgeContextIdsToMove);
3226
- OldCallerEdge->AllocTypes =
3227
- computeAllocType (OldCallerEdge->getContextIds ());
3228
- // In this function we expect that any pre-existing node already has edges
3229
- // from the same callers as the old node. That should be true in the current
3230
- // use case, where we will remove None-type edges after copying over all
3231
- // caller edges from the callee.
3232
- auto *ExistingCallerEdge =
3233
- NewCaller->findEdgeFromCaller (OldCallerEdge->Caller );
3234
- assert (IsNewNode || ExistingCallerEdge);
3235
- if (ExistingCallerEdge) {
3236
- ExistingCallerEdge->getContextIds ().insert (EdgeContextIdsToMove.begin (),
3237
- EdgeContextIdsToMove.end ());
3238
- ExistingCallerEdge->AllocTypes |= computeAllocType (EdgeContextIdsToMove);
3239
- continue ;
3283
+ // If we just moved a direct recursive edge, presumably its context ids should
3284
+ // also flow out of OldCaller via some other non-recursive callee edge. We
3285
+ // don't want to remove the recursive context ids from other caller edges yet,
3286
+ // otherwise the context ids get into an inconsistent state on OldCaller.
3287
+ // We will update these context ids on the non-recursive caller edge when and
3288
+ // if they are updated on the non-recursive callee.
3289
+ if (!Recursive) {
3290
+ for (auto &OldCallerEdge : OldCaller->CallerEdges ) {
3291
+ auto OldCallerCaller = OldCallerEdge->Caller ;
3292
+ // The context ids moving to the new caller are the subset of this edge's
3293
+ // context ids and the context ids on the callee edge being moved.
3294
+ DenseSet<uint32_t > EdgeContextIdsToMove = set_intersection (
3295
+ OldCallerEdge->getContextIds (), Edge->getContextIds ());
3296
+ if (OldCaller == OldCallerCaller) {
3297
+ OldCallerCaller = NewCaller;
3298
+ // Don't actually move this one. The caller will move it directly via a
3299
+ // call to this function with this as the Edge if it is appropriate to
3300
+ // move to a diff node that has a matching callee (itself).
3301
+ continue ;
3302
+ }
3303
+ set_subtract (OldCallerEdge->getContextIds (), EdgeContextIdsToMove);
3304
+ OldCallerEdge->AllocTypes =
3305
+ computeAllocType (OldCallerEdge->getContextIds ());
3306
+ // In this function we expect that any pre-existing node already has edges
3307
+ // from the same callers as the old node. That should be true in the
3308
+ // current use case, where we will remove None-type edges after copying
3309
+ // over all caller edges from the callee.
3310
+ auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller (OldCallerCaller);
3311
+ // Since we would have skipped caller edges when moving a direct recursive
3312
+ // edge, this may not hold true when recursive handling enabled.
3313
+ assert (IsNewNode || ExistingCallerEdge || AllowRecursiveCallsites);
3314
+ if (ExistingCallerEdge) {
3315
+ ExistingCallerEdge->getContextIds ().insert (EdgeContextIdsToMove.begin (),
3316
+ EdgeContextIdsToMove.end ());
3317
+ ExistingCallerEdge->AllocTypes |=
3318
+ computeAllocType (EdgeContextIdsToMove);
3319
+ continue ;
3320
+ }
3321
+ auto NewEdge = std::make_shared<ContextEdge>(
3322
+ NewCaller, OldCallerCaller, computeAllocType (EdgeContextIdsToMove),
3323
+ EdgeContextIdsToMove);
3324
+ NewCaller->CallerEdges .push_back (NewEdge);
3325
+ NewEdge->Caller ->CalleeEdges .push_back (NewEdge);
3240
3326
}
3241
- auto NewEdge = std::make_shared<ContextEdge>(
3242
- NewCaller, OldCallerEdge->Caller ,
3243
- computeAllocType (EdgeContextIdsToMove), EdgeContextIdsToMove);
3244
- NewCaller->CallerEdges .push_back (NewEdge);
3245
- NewEdge->Caller ->CalleeEdges .push_back (NewEdge);
3246
3327
}
3247
3328
// Recompute the node alloc type now that its caller edges have been
3248
3329
// updated (since we will compute from those edges).
@@ -3946,6 +4027,11 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3946
4027
// handling and makes it less error-prone.
3947
4028
auto CloneCallerEdges = Clone->CallerEdges ;
3948
4029
for (auto &Edge : CloneCallerEdges) {
4030
+ // Skip removed edges (due to direct recursive edges updated when
4031
+ // updating callee edges when moving an edge and subsequently
4032
+ // removed by call to removeNoneTypeCalleeEdges on the Clone).
4033
+ if (Edge->isRemoved ())
4034
+ continue ;
3949
4035
// Ignore any caller that does not have a recorded callsite Call.
3950
4036
if (!Edge->Caller ->hasCall ())
3951
4037
continue ;
0 commit comments