Skip to content

Commit b9e4bde

Browse files
[MemProf] Re-enable cloning of callsites in recursive cycles with fixes (#125947)
This change addresses a number of issues with the support added by PR121985 which were exposed through more exhaustive testing, specifically places that needed updates to perform correct graph updates in the presence of cycles. A new test case is added that reproduces these issues, and the default is flipped back to enabling this handling.
1 parent c268a3f commit b9e4bde

File tree

4 files changed

+396
-53
lines changed

4 files changed

+396
-53
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 131 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ static cl::opt<unsigned>
124124

125125
// Optionally enable cloning of callsites involved with recursive cycles
126126
static cl::opt<bool> AllowRecursiveCallsites(
127-
"memprof-allow-recursive-callsites", cl::init(false), cl::Hidden,
127+
"memprof-allow-recursive-callsites", cl::init(true), cl::Hidden,
128128
cl::desc("Allow cloning of callsites involved in recursive cycles"));
129129

130130
// When disabled, try to detect and prevent cloning of recursive contexts.
@@ -301,12 +301,14 @@ class CallsiteContextGraph {
301301
// callers (i.e. if this is the leaf allocation node).
302302
if (!CalleeEdges.empty())
303303
return &CalleeEdges;
304-
if (!CallerEdges.empty()) {
305-
// A node with caller edges but no callee edges must be the allocation
306-
// node.
307-
assert(IsAllocation);
304+
// Typically if the callee edges are empty either the caller edges are
305+
// also empty, or this is an allocation (leaf node). However, if we are
306+
// allowing recursive callsites and contexts this will be violated for
307+
// incompletely cloned recursive cycles.
308+
assert(CallerEdges.empty() || IsAllocation ||
309+
(AllowRecursiveCallsites && AllowRecursiveContexts));
310+
if (!CallerEdges.empty() && IsAllocation)
308311
return &CallerEdges;
309-
}
310312
return nullptr;
311313
}
312314

@@ -403,8 +405,13 @@ class CallsiteContextGraph {
403405
// True if this node was effectively removed from the graph, in which case
404406
// it should have an allocation type of None and empty context ids.
405407
bool isRemoved() const {
406-
assert((AllocTypes == (uint8_t)AllocationType::None) ==
407-
emptyContextIds());
408+
// Typically if the callee edges are empty either the caller edges are
409+
// also empty, or this is an allocation (leaf node). However, if we are
410+
// allowing recursive callsites and contexts this will be violated for
411+
// incompletely cloned recursive cycles.
412+
assert((AllowRecursiveCallsites && AllowRecursiveContexts) ||
413+
(AllocTypes == (uint8_t)AllocationType::None) ==
414+
emptyContextIds());
408415
return AllocTypes == (uint8_t)AllocationType::None;
409416
}
410417

@@ -1344,16 +1351,48 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
13441351
DenseSet<uint32_t> RemainingContextIds) {
13451352
auto &OrigEdges =
13461353
TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1354+
DenseSet<uint32_t> RecursiveContextIds;
1355+
DenseSet<uint32_t> AllCallerContextIds;
1356+
if (AllowRecursiveCallsites) {
1357+
// Identify which context ids are recursive which is needed to properly
1358+
// update the RemainingContextIds set. The relevant recursive context ids
1359+
// are those that are in multiple edges.
1360+
for (auto &CE : OrigEdges) {
1361+
AllCallerContextIds.reserve(CE->getContextIds().size());
1362+
for (auto Id : CE->getContextIds())
1363+
if (!AllCallerContextIds.insert(Id).second)
1364+
RecursiveContextIds.insert(Id);
1365+
}
1366+
}
13471367
// Increment iterator in loop so that we can remove edges as needed.
13481368
for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
13491369
auto Edge = *EI;
1370+
DenseSet<uint32_t> NewEdgeContextIds;
1371+
DenseSet<uint32_t> NotFoundContextIds;
13501372
// Remove any matching context ids from Edge, return set that were found and
13511373
// removed, these are the new edge's context ids. Also update the remaining
13521374
// (not found ids).
1353-
DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
13541375
set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
13551376
NotFoundContextIds);
1356-
RemainingContextIds.swap(NotFoundContextIds);
1377+
// Update the remaining context ids set for the later edges. This is a
1378+
// compile time optimization.
1379+
if (RecursiveContextIds.empty()) {
1380+
// No recursive ids, so all of the previously remaining context ids that
1381+
// were not seen on this edge are the new remaining set.
1382+
RemainingContextIds.swap(NotFoundContextIds);
1383+
} else {
1384+
// Keep the recursive ids in the remaining set as we expect to see those
1385+
// on another edge. We can remove the non-recursive remaining ids that
1386+
// were seen on this edge, however. We already have the set of remaining
1387+
// ids that were on this edge (in NewEdgeContextIds). Figure out which are
1388+
// non-recursive and only remove those. Note that despite the higher
1389+
// overhead of updating the remaining context ids set when recursion
1390+
// handling is enabled, it was found to be at worst performance neutral
1391+
// and in one case a clear win.
1392+
DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1393+
set_difference(NewEdgeContextIds, RecursiveContextIds);
1394+
set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1395+
}
13571396
// If no matching context ids for this edge, skip it.
13581397
if (NewEdgeContextIds.empty()) {
13591398
++EI;
@@ -1410,9 +1449,9 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
14101449
set_union(CallerEdgeContextIds, Edge->ContextIds);
14111450
}
14121451
// Node can have more context ids than callers if some contexts terminate at
1413-
// node and some are longer. If we are allowing recursive callsites but
1414-
// haven't disabled recursive contexts, this will be violated for
1415-
// incompletely cloned recursive cycles, so skip the checking in that case.
1452+
// node and some are longer. If we are allowing recursive callsites and
1453+
// contexts this will be violated for incompletely cloned recursive cycles,
1454+
// so skip the checking in that case.
14161455
assert((AllowRecursiveCallsites && AllowRecursiveContexts) ||
14171456
NodeContextIds == CallerEdgeContextIds ||
14181457
set_is_subset(CallerEdgeContextIds, NodeContextIds));
@@ -1425,7 +1464,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
14251464
checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
14261465
set_union(CalleeEdgeContextIds, Edge->getContextIds());
14271466
}
1428-
assert(NodeContextIds == CalleeEdgeContextIds);
1467+
// If we are allowing recursive callsites and contexts this will be violated
1468+
// for incompletely cloned recursive cycles, so skip the checking in that
1469+
// case.
1470+
assert((AllowRecursiveCallsites && AllowRecursiveContexts) ||
1471+
NodeContextIds == CalleeEdgeContextIds);
14291472
}
14301473
// FIXME: Since this checking is only invoked under an option, we should
14311474
// change the error checking from using assert to something that will trigger
@@ -3134,6 +3177,12 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31343177
// over to the corresponding edge into the clone (which is created here if
31353178
// this is a newly created clone).
31363179
for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3180+
ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3181+
// If this is a direct recursion edge, use NewCallee (the clone) as the
3182+
// callee as well, so that any edge updated/created here is also direct
3183+
// recursive.
3184+
if (CalleeToUse == OldCallee)
3185+
CalleeToUse = NewCallee;
31373186
// The context ids moving to the new callee are the subset of this edge's
31383187
// context ids and the context ids on the caller edge being moved.
31393188
DenseSet<uint32_t> EdgeContextIdsToMove =
@@ -3147,17 +3196,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31473196
// clone, specifically during function assignment, where we would have
31483197
// removed none type edges after creating the clone. If we can't find
31493198
// a corresponding edge there, fall through to the cloning below.
3150-
if (auto *NewCalleeEdge =
3151-
NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3199+
if (auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
31523200
NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
31533201
EdgeContextIdsToMove.end());
31543202
NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
31553203
continue;
31563204
}
31573205
}
31583206
auto NewEdge = std::make_shared<ContextEdge>(
3159-
OldCalleeEdge->Callee, NewCallee,
3160-
computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3207+
CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3208+
EdgeContextIdsToMove);
31613209
NewCallee->CalleeEdges.push_back(NewEdge);
31623210
NewEdge->Callee->CallerEdges.push_back(NewEdge);
31633211
}
@@ -3183,13 +3231,20 @@ template <typename DerivedCCG, typename FuncTy, typename CallTy>
31833231
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31843232
moveCalleeEdgeToNewCaller(const std::shared_ptr<ContextEdge> &Edge,
31853233
ContextNode *NewCaller) {
3234+
auto *OldCallee = Edge->Callee;
3235+
auto *NewCallee = OldCallee;
3236+
// If this edge was direct recursive, make any new/updated edge also direct
3237+
// recursive to NewCaller.
3238+
bool Recursive = Edge->Caller == Edge->Callee;
3239+
if (Recursive)
3240+
NewCallee = NewCaller;
31863241

31873242
ContextNode *OldCaller = Edge->Caller;
31883243
OldCaller->eraseCalleeEdge(Edge.get());
31893244

31903245
// We might already have an edge to the new caller. If one exists we will
31913246
// reuse it.
3192-
auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3247+
auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
31933248

31943249
if (ExistingEdgeToNewCaller) {
31953250
// Since we already have an edge to NewCaller, simply move the ids
@@ -3199,11 +3254,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31993254
ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
32003255
Edge->ContextIds.clear();
32013256
Edge->AllocTypes = (uint8_t)AllocationType::None;
3202-
Edge->Callee->eraseCallerEdge(Edge.get());
3257+
OldCallee->eraseCallerEdge(Edge.get());
32033258
} else {
32043259
// Otherwise just reconnect Edge to NewCaller.
32053260
Edge->Caller = NewCaller;
32063261
NewCaller->CalleeEdges.push_back(Edge);
3262+
if (Recursive) {
3263+
assert(NewCallee == NewCaller);
3264+
// In the case of (direct) recursive edges, we update the callee as well
3265+
// so that it becomes recursive on the new caller.
3266+
Edge->Callee = NewCallee;
3267+
NewCallee->CallerEdges.push_back(Edge);
3268+
OldCallee->eraseCallerEdge(Edge.get());
3269+
}
32073270
// Don't need to update Edge's context ids since we are simply
32083271
// reconnecting it.
32093272
}
@@ -3217,32 +3280,50 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
32173280
#ifndef NDEBUG
32183281
bool IsNewNode = NewCaller->CallerEdges.empty();
32193282
#endif
3220-
for (auto &OldCallerEdge : OldCaller->CallerEdges) {
3221-
// The context ids moving to the new caller are the subset of this edge's
3222-
// context ids and the context ids on the callee edge being moved.
3223-
DenseSet<uint32_t> EdgeContextIdsToMove =
3224-
set_intersection(OldCallerEdge->getContextIds(), Edge->getContextIds());
3225-
set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3226-
OldCallerEdge->AllocTypes =
3227-
computeAllocType(OldCallerEdge->getContextIds());
3228-
// In this function we expect that any pre-existing node already has edges
3229-
// from the same callers as the old node. That should be true in the current
3230-
// use case, where we will remove None-type edges after copying over all
3231-
// caller edges from the callee.
3232-
auto *ExistingCallerEdge =
3233-
NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3234-
assert(IsNewNode || ExistingCallerEdge);
3235-
if (ExistingCallerEdge) {
3236-
ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3237-
EdgeContextIdsToMove.end());
3238-
ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3239-
continue;
3283+
// If we just moved a direct recursive edge, presumably its context ids should
3284+
// also flow out of OldCaller via some other non-recursive callee edge. We
3285+
// don't want to remove the recursive context ids from other caller edges yet,
3286+
// otherwise the context ids get into an inconsistent state on OldCaller.
3287+
// We will update these context ids on the non-recursive caller edge when and
3288+
// if they are updated on the non-recursive callee.
3289+
if (!Recursive) {
3290+
for (auto &OldCallerEdge : OldCaller->CallerEdges) {
3291+
auto OldCallerCaller = OldCallerEdge->Caller;
3292+
// The context ids moving to the new caller are the subset of this edge's
3293+
// context ids and the context ids on the callee edge being moved.
3294+
DenseSet<uint32_t> EdgeContextIdsToMove = set_intersection(
3295+
OldCallerEdge->getContextIds(), Edge->getContextIds());
3296+
if (OldCaller == OldCallerCaller) {
3297+
OldCallerCaller = NewCaller;
3298+
// Don't actually move this one. The caller will move it directly via a
3299+
// call to this function with this as the Edge if it is appropriate to
3300+
// move to a diff node that has a matching callee (itself).
3301+
continue;
3302+
}
3303+
set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3304+
OldCallerEdge->AllocTypes =
3305+
computeAllocType(OldCallerEdge->getContextIds());
3306+
// In this function we expect that any pre-existing node already has edges
3307+
// from the same callers as the old node. That should be true in the
3308+
// current use case, where we will remove None-type edges after copying
3309+
// over all caller edges from the callee.
3310+
auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3311+
// Since we would have skipped caller edges when moving a direct recursive
3312+
// edge, this may not hold true when recursive handling enabled.
3313+
assert(IsNewNode || ExistingCallerEdge || AllowRecursiveCallsites);
3314+
if (ExistingCallerEdge) {
3315+
ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3316+
EdgeContextIdsToMove.end());
3317+
ExistingCallerEdge->AllocTypes |=
3318+
computeAllocType(EdgeContextIdsToMove);
3319+
continue;
3320+
}
3321+
auto NewEdge = std::make_shared<ContextEdge>(
3322+
NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3323+
EdgeContextIdsToMove);
3324+
NewCaller->CallerEdges.push_back(NewEdge);
3325+
NewEdge->Caller->CalleeEdges.push_back(NewEdge);
32403326
}
3241-
auto NewEdge = std::make_shared<ContextEdge>(
3242-
NewCaller, OldCallerEdge->Caller,
3243-
computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3244-
NewCaller->CallerEdges.push_back(NewEdge);
3245-
NewEdge->Caller->CalleeEdges.push_back(NewEdge);
32463327
}
32473328
// Recompute the node alloc type now that its caller edges have been
32483329
// updated (since we will compute from those edges).
@@ -3946,6 +4027,11 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
39464027
// handling and makes it less error-prone.
39474028
auto CloneCallerEdges = Clone->CallerEdges;
39484029
for (auto &Edge : CloneCallerEdges) {
4030+
// Skip removed edges (due to direct recursive edges updated when
4031+
// updating callee edges when moving an edge and subsequently
4032+
// removed by call to removeNoneTypeCalleeEdges on the Clone).
4033+
if (Edge->isRemoved())
4034+
continue;
39494035
// Ignore any caller that does not have a recorded callsite Call.
39504036
if (!Edge->Caller->hasCall())
39514037
continue;

0 commit comments

Comments
 (0)