Skip to content

Commit 776e456

Browse files
committed
[llvm-cov gcov] Make recursive propagateCounts iterative. NFC
propagateCounts computes unmeasured arc counts (see commit b9d0866). In a x86-64 build using -O3 -fno-omit-frame-pointer, propagateCounts uses 80 bytes per stack frame. If a function contains 1e5 basic blocks on a tree path (Kirchoff's circuit law optimization), the used stack space will be 8MB (default ulimit -s in many configurations). (In a -O0 build, a stack frame costs 224 bytes.) 1e5 is ample for most configurations. However, for library users using threads (e.g. in RPC handlers), a remaining thread stack of 64KiB allows just 819 stack frames, which is too limited. Switch to an iterative form to avoid stack overflow issues. Iterative forms match other iterative form functions in this file (https://reviews.llvm.org/D93073). Alternative to #68455
1 parent 8d52097 commit 776e456

File tree

2 files changed

+55
-20
lines changed

2 files changed

+55
-20
lines changed

llvm/include/llvm/ProfileData/GCOV.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ class GCOVFunction {
249249
return make_range(blocks.begin(), blocks.end());
250250
}
251251

252-
uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *pred);
252+
void propagateCounts(const GCOVBlock &v, GCOVArc *pred);
253253
void print(raw_ostream &OS) const;
254254
void dump() const;
255255

llvm/lib/ProfileData/GCOV.cpp

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -365,25 +365,60 @@ GCOVBlock &GCOVFunction::getExitBlock() const {
365365
// For each basic block, the sum of incoming edge counts equals the sum of
366366
// outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a
367367
// spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be
368-
// uniquely identified.
369-
uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
370-
// If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed; otherwise
371-
// this prevents infinite recursion.
372-
if (!visited.insert(&v).second)
373-
return 0;
374-
375-
uint64_t excess = 0;
376-
for (GCOVArc *e : v.srcs())
377-
if (e != pred)
378-
excess += e->onTree() ? propagateCounts(e->src, e) : e->count;
379-
for (GCOVArc *e : v.dsts())
380-
if (e != pred)
381-
excess -= e->onTree() ? propagateCounts(e->dst, e) : e->count;
382-
if (int64_t(excess) < 0)
383-
excess = -excess;
384-
if (pred)
385-
pred->count = excess;
386-
return excess;
368+
// uniquely identified. Use an iterative algorithm to decrease stack usage for
369+
// library users in threads. See the edge propagation algorithm in Optimally
370+
// Profiling and Tracing Programs, ACM Transactions on Programming Languages and
371+
// Systems, 1994.
372+
void GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
373+
struct Elem {
374+
const GCOVBlock &v;
375+
GCOVArc *pred;
376+
bool inDst;
377+
size_t i = 0;
378+
uint64_t excess = 0;
379+
};
380+
381+
SmallVector<Elem, 0> stack;
382+
stack.push_back({v, pred, false});
383+
for (;;) {
384+
Elem &u = stack.back();
385+
// If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed;
386+
// otherwise, this prevents infinite recursion for bad input.
387+
if (u.i == 0 && !visited.insert(&u.v).second) {
388+
stack.pop_back();
389+
if (stack.empty())
390+
break;
391+
continue;
392+
}
393+
if (u.i < u.v.pred.size()) {
394+
GCOVArc *e = u.v.pred[u.i++];
395+
if (e != u.pred) {
396+
if (e->onTree())
397+
stack.push_back({e->src, e, /*inDst=*/false});
398+
else
399+
u.excess += e->count;
400+
}
401+
} else if (u.i < u.v.pred.size() + u.v.succ.size()) {
402+
GCOVArc *e = u.v.succ[u.i++ - u.v.pred.size()];
403+
if (e != u.pred) {
404+
if (e->onTree())
405+
stack.push_back({e->dst, e, /*inDst=*/true});
406+
else
407+
u.excess -= e->count;
408+
}
409+
} else {
410+
uint64_t excess = u.excess;
411+
if (static_cast<int64_t>(excess) < 0)
412+
excess = -excess;
413+
if (u.pred)
414+
u.pred->count = excess;
415+
bool inDst = u.inDst;
416+
stack.pop_back();
417+
if (stack.empty())
418+
break;
419+
stack.back().excess += inDst ? -excess : excess;
420+
}
421+
}
387422
}
388423

389424
void GCOVFunction::print(raw_ostream &OS) const {

0 commit comments

Comments
 (0)