Skip to content

Commit 524a028

Browse files
[MemProf] Streamline and avoid unnecessary context id duplication (#107918)
Sort the list of calls such that those with the same stack ids are also sorted by function. This allows processing of all matching calls (that can share a context node) in bulk as they are all adjacent. This has 2 benefits: 1. It reduces unnecessary work, specifically the handling to intersect the context ids with those along the graph edges for the stack ids, for calls that we know can share a node. 2. It simplifies detecting when we have matching stack ids but don't need to duplicate context ids. Specifically, we were previously still duplicating context ids whenever we saw another call with the same stack ids, but that isn't necessary if they will share a context node. With this change we now only duplicate context ids if we see some that not only have the same ids but also are in different functions. This change reduced the amount of context id duplication and provided reductions in both both peak memory (~8%) and time (~%5) for a large target.
1 parent ce9f987 commit 524a028

File tree

1 file changed

+61
-35
lines changed

1 file changed

+61
-35
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1467,14 +1467,26 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
14671467
// of length, and within each length, lexicographically by stack id. The
14681468
// latter is so that we can specially handle calls that have identical stack
14691469
// id sequences (either due to cloning or artificially because of the MIB
1470-
// context pruning).
1471-
std::stable_sort(Calls.begin(), Calls.end(),
1472-
[](const CallContextInfo &A, const CallContextInfo &B) {
1473-
auto &IdsA = std::get<1>(A);
1474-
auto &IdsB = std::get<1>(B);
1475-
return IdsA.size() > IdsB.size() ||
1476-
(IdsA.size() == IdsB.size() && IdsA < IdsB);
1477-
});
1470+
// context pruning). Those with the same Ids are then sorted by function to
1471+
// facilitate efficiently mapping them to the same context node.
1472+
// Because the functions are pointers, to ensure a stable sort first assign
1473+
// each function pointer to its first index in the Calls array, and then use
1474+
// that to sort by.
1475+
DenseMap<const FuncTy *, unsigned> FuncToIndex;
1476+
for (const auto &[Idx, CallCtxInfo] : enumerate(Calls))
1477+
FuncToIndex.insert({std::get<2>(CallCtxInfo), Idx});
1478+
std::stable_sort(
1479+
Calls.begin(), Calls.end(),
1480+
[&FuncToIndex](const CallContextInfo &A, const CallContextInfo &B) {
1481+
auto &IdsA = std::get<1>(A);
1482+
auto &IdsB = std::get<1>(B);
1483+
auto *FuncA = std::get<2>(A);
1484+
auto *FuncB = std::get<2>(B);
1485+
return IdsA.size() > IdsB.size() ||
1486+
(IdsA.size() == IdsB.size() &&
1487+
(IdsA < IdsB ||
1488+
(IdsA == IdsB && FuncToIndex[FuncA] < FuncToIndex[FuncB])));
1489+
});
14781490

14791491
// Find the node for the last stack id, which should be the same
14801492
// across all calls recorded for this id, and is the id for this
@@ -1492,18 +1504,35 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
14921504
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
14931505
assert(!LastNodeContextIds.empty());
14941506

1495-
// Map from function to the first call from the below list (with matching
1496-
// stack ids) found in that function. Note that calls from different
1497-
// functions can have the same stack ids because this is the list of stack
1498-
// ids that had (possibly pruned) nodes after building the graph from the
1499-
// allocation MIBs.
1500-
DenseMap<const FuncTy *, CallInfo> FuncToCallMap;
1507+
#ifndef NDEBUG
1508+
// Save the set of functions seen for a particular set of the same stack
1509+
// ids. This is used to ensure that they have been correctly sorted to be
1510+
// adjacent in the Calls list, since we rely on that to efficiently place
1511+
// all such matching calls onto the same context node.
1512+
DenseSet<const FuncTy *> MatchingIdsFuncSet;
1513+
#endif
15011514

15021515
for (unsigned I = 0; I < Calls.size(); I++) {
15031516
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
15041517
assert(SavedContextIds.empty());
15051518
assert(LastId == Ids.back());
15061519

1520+
#ifndef NDEBUG
1521+
// If this call has a different set of ids than the last one, clear the
1522+
// set used to ensure they are sorted properly.
1523+
if (I > 0 && Ids != std::get<1>(Calls[I - 1]))
1524+
MatchingIdsFuncSet.clear();
1525+
else
1526+
// If the prior call had the same stack ids this set would not be empty.
1527+
// Check if we already have a call that "matches" because it is located
1528+
// in the same function. If the Calls list was sorted properly we should
1529+
// not encounter this situation as all such entries should be adjacent
1530+
// and processed in bulk further below.
1531+
assert(!MatchingIdsFuncSet.contains(Func));
1532+
1533+
MatchingIdsFuncSet.insert(Func);
1534+
#endif
1535+
15071536
// First compute the context ids for this stack id sequence (the
15081537
// intersection of the context ids of the corresponding nodes).
15091538
// Start with the remaining saved ids for the last node.
@@ -1572,22 +1601,26 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
15721601
continue;
15731602
}
15741603

1575-
// If the prior call had the same stack ids this map would not be empty.
1576-
// Check if we already have a call that "matches" because it is located
1577-
// in the same function.
1578-
if (FuncToCallMap.contains(Func)) {
1579-
// Record the matching call found for this call, and skip it. We
1580-
// will subsequently combine it into the same node.
1581-
CallToMatchingCall[Call] = FuncToCallMap[Func];
1582-
continue;
1583-
}
1584-
15851604
// Check if the next set of stack ids is the same (since the Calls vector
15861605
// of tuples is sorted by the stack ids we can just look at the next one).
1606+
// If so, save them in the CallToMatchingCall map so that they get
1607+
// assigned to the same context node, and skip them.
15871608
bool DuplicateContextIds = false;
1588-
if (I + 1 < Calls.size()) {
1589-
auto NextIds = std::get<1>(Calls[I + 1]);
1590-
DuplicateContextIds = Ids == NextIds;
1609+
for (unsigned J = I + 1; J < Calls.size(); J++) {
1610+
auto &NextIds = std::get<1>(Calls[J]);
1611+
if (NextIds != Ids)
1612+
break;
1613+
auto *NextFunc = std::get<2>(Calls[J]);
1614+
if (NextFunc != Func) {
1615+
// We have another Call with the same ids but that cannot share this
1616+
// node, must duplicate ids for it.
1617+
DuplicateContextIds = true;
1618+
break;
1619+
}
1620+
auto &NextCall = std::get<0>(Calls[J]);
1621+
CallToMatchingCall[NextCall] = Call;
1622+
// Update I so that it gets incremented correctly to skip this call.
1623+
I = J;
15911624
}
15921625

15931626
// If we don't have duplicate context ids, then we can assign all the
@@ -1611,14 +1644,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
16111644
set_subtract(LastNodeContextIds, StackSequenceContextIds);
16121645
if (LastNodeContextIds.empty())
16131646
break;
1614-
// No longer possibly in a sequence of calls with duplicate stack ids,
1615-
// clear the map.
1616-
FuncToCallMap.clear();
1617-
} else
1618-
// Record the call with its function, so we can locate it the next time
1619-
// we find a call from this function when processing the calls with the
1620-
// same stack ids.
1621-
FuncToCallMap[Func] = Call;
1647+
}
16221648
}
16231649
}
16241650

0 commit comments

Comments
 (0)