Skip to content

Commit 3f164d5

Browse files
committed
RuntimeCallsiteTrie
1 parent 4485e25 commit 3f164d5

File tree

5 files changed

+293
-1
lines changed

5 files changed

+293
-1
lines changed

compiler-rt/lib/ctx_profile/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ add_compiler_rt_component(ctx_profile)
22

33
set(CTX_PROFILE_SOURCES
44
CtxInstrProfiling.cpp
5+
RootAutoDetector.cpp
56
)
67

78
set(CTX_PROFILE_HEADERS
89
CtxInstrContextNode.h
910
CtxInstrProfiling.h
11+
RootAutoDetector.h
1012
)
1113

1214
include_directories(..)
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
//===- RootAutodetector.cpp - detect contextual profiling roots -----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "RootAutoDetector.h"
10+
11+
#include "sanitizer_common/sanitizer_common.h"
12+
#include <assert.h>
13+
#include <dlfcn.h>
14+
#include <pthread.h>
15+
16+
using namespace __ctx_profile;
17+
template <typename T> using Set = DenseMap<T, bool>;
18+
19+
uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const {
20+
// this requires --linkopt=-Wl,--export-dynamic
21+
Dl_info Info;
22+
if (dladdr(reinterpret_cast<const void *>(CallsiteAddress), &Info) != 0)
23+
return reinterpret_cast<uptr>(Info.dli_saddr);
24+
return 0;
25+
}
26+
27+
void PerThreadCallsiteTrie::insertStack(const StackTrace &ST) {
28+
auto *Current = &TheTrie;
29+
// the stack is backwards - the first callsite is at the top.
30+
for (int I = ST.size - 1; I >= 0; --I) {
31+
uptr ChildAddr = ST.trace[I];
32+
auto [Iter, _] = Current->Children.insert({ChildAddr, Trie(ChildAddr)});
33+
++Current->Count;
34+
Current = &Iter->second;
35+
}
36+
}
37+
38+
DenseMap<uptr, uint64_t> PerThreadCallsiteTrie::determineRoots() const {
39+
// Assuming a message pump design, roots are those functions called by the
40+
// message pump. The message pump is an infinite loop (for all practical
41+
// considerations) fetching data from a queue. The root functions return -
42+
// otherwise the message pump doesn't work. This function detects roots as the
43+
// first place in the trie (starting from the root) where a function calls 2
44+
// or more functions.
45+
//
46+
// We start with a callsite trie - the nodes are callsites. Different child
47+
// nodes may actually correspond to the same function.
48+
//
49+
// For example: using function(callsite)
50+
// f1(csf1_1) -> f2(csf2_1) -> f3
51+
// -> f2(csf2_2) -> f4
52+
//
53+
// would be represented in our trie as:
54+
// csf1_1 -> csf2_1 -> f3
55+
// -> csf2_2 -> f4
56+
//
57+
// While we can assert the control flow returns to f2, we don't know if it
58+
// ever returns to f1. f2 could be the message pump.
59+
//
60+
// We need to convert our callsite tree into a function tree. We can also,
61+
// more economically, just see how many distinct functions there are at a
62+
// certain depth. When that count is greater than 1, we got to potential roots
63+
// and everything above should be considered as non-roots.
64+
DenseMap<uptr, uint64_t> Result;
65+
Set<const Trie *> Worklist;
66+
Worklist.insert({&start(), {}});
67+
68+
while (!Worklist.empty()) {
69+
Set<const Trie *> NextWorklist;
70+
DenseMap<uptr, uint64_t> Candidates;
71+
Worklist.forEach([&](auto &KVP) {
72+
auto [Node, _] = KVP;
73+
auto SA = getFctStartAddr(Node->address());
74+
Candidates[SA] += Node->count();
75+
Node->children().forEach([&](auto &ChildKVP) {
76+
NextWorklist.insert({&ChildKVP.second, true});
77+
return true;
78+
});
79+
return true;
80+
});
81+
if (Candidates.size() > 1) {
82+
Result.swap(Candidates);
83+
break;
84+
}
85+
Worklist.swap(NextWorklist);
86+
}
87+
return Result;
88+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*===- RootAutodetector.h- auto-detect roots for ctxprof -----------------===*\
2+
|*
3+
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
|* See https://llvm.org/LICENSE.txt for license information.
5+
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
|*
7+
\*===----------------------------------------------------------------------===*/
8+
9+
#ifndef CTX_PROFILE_ROOTAUTODETECTOR_H_
10+
#define CTX_PROFILE_ROOTAUTODETECTOR_H_
11+
12+
#include "sanitizer_common/sanitizer_dense_map.h"
13+
#include "sanitizer_common/sanitizer_internal_defs.h"
14+
#include "sanitizer_common/sanitizer_stacktrace.h"
15+
#include <pthread.h>
16+
#include <sanitizer/common_interface_defs.h>
17+
18+
using namespace __asan;
19+
using namespace __sanitizer;
20+
21+
namespace __ctx_profile {
22+
23+
/// A trie. A node is the address of a callsite in a function activation. A
24+
/// child is a callsite in the activation made from the callsite corresponding
25+
/// to the parent.
26+
class Trie final {
27+
friend class PerThreadCallsiteTrie;
28+
const uptr CallsiteAddress;
29+
uint64_t Count = 0;
30+
DenseMap<uptr, Trie> Children;
31+
32+
public:
33+
uptr address() const { return CallsiteAddress; }
34+
uint64_t count() const { return Count; }
35+
const DenseMap<uptr, Trie> &children() const { return Children; }
36+
37+
Trie(uptr CallsiteAddress = 0) : CallsiteAddress(CallsiteAddress) {}
38+
};
39+
40+
/// Capture all the stack traces observed for a specific thread. The "for a
41+
/// specific thread" part is not enforced, but assumed in determineRoots.
42+
class PerThreadCallsiteTrie {
43+
Trie TheTrie;
44+
45+
protected:
46+
/// Return the runtime start address of the function that contains the call at
47+
/// the runtime address CallsiteAddress. May be overriden for easy testing.
48+
virtual uptr getFctStartAddr(uptr CallsiteAddress) const;
49+
50+
public:
51+
PerThreadCallsiteTrie(const PerThreadCallsiteTrie &) = delete;
52+
PerThreadCallsiteTrie(PerThreadCallsiteTrie &&) = default;
53+
PerThreadCallsiteTrie() = default;
54+
55+
virtual ~PerThreadCallsiteTrie() = default;
56+
57+
void insertStack(const StackTrace &ST);
58+
59+
/// Return the runtime address of root functions, as determined for this
60+
/// thread, together with the number of samples that included them.
61+
DenseMap<uptr, uint64_t> determineRoots() const;
62+
63+
const Trie &start() const { return TheTrie; }
64+
};
65+
} // namespace __ctx_profile
66+
#endif

compiler-rt/lib/ctx_profile/tests/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ append_list_if(COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG -Wno-variadic-macros CTX_PR
2222
file(GLOB CTX_PROFILE_HEADERS ../*.h)
2323

2424
set(CTX_PROFILE_SOURCES
25-
../CtxInstrProfiling.cpp)
25+
../CtxInstrProfiling.cpp
26+
../RootAutoDetector.cpp)
2627

2728
set(CTX_PROFILE_UNITTESTS
2829
CtxInstrProfilingTest.cpp
30+
RootAutoDetectorTest.cpp
2931
driver.cpp)
3032

3133
include_directories(../../../include)
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#include "../RootAutoDetector.h"
2+
#include "sanitizer_common/sanitizer_array_ref.h"
3+
#include "gmock/gmock.h"
4+
#include "gtest/gtest.h"
5+
6+
using namespace __ctx_profile;
7+
using ::testing::IsEmpty;
8+
using ::testing::SizeIs;
9+
class MockCallsiteTree final : public PerThreadCallsiteTrie {
10+
// Return the first multiple of 100.
11+
uptr getFctStartAddr(uptr CallsiteAddress) const override {
12+
return (CallsiteAddress / 100) * 100;
13+
}
14+
};
15+
16+
// Utility for describing a preorder traversal. By default it captures a value -
17+
// the value of a node. Implicitly nodes are expected to have 1 child. If they
18+
// have none, we place a Marker::term and if they have more than one, we place a
19+
// Marker::split(nr_of_children)
20+
// For example, using lists: (1 (2 3) (4 (5 6)))
21+
// is a list of markers:
22+
// 1, split(2), 2, term, 3, term, 4, split(2), 5, term, 6, term
23+
class Marker {
24+
enum class Kind { End, Value, Split };
25+
const uptr Value;
26+
const Kind K;
27+
Marker(uptr V, Kind S) : Value(V), K(S) {}
28+
29+
public:
30+
Marker(uptr V) : Marker(V, Kind::Value) {}
31+
32+
static Marker split(uptr V) { return Marker(V, Kind::Split); }
33+
static Marker term() { return Marker(0, Kind::End); }
34+
35+
bool isSplit() const { return K == Kind::Split; }
36+
bool isTerm() const { return K == Kind::End; }
37+
bool isVal() const { return K == Kind::Value; }
38+
39+
bool operator==(const Marker &M) const {
40+
return Value == M.Value && K == M.K;
41+
}
42+
};
43+
44+
void popAndCheck(ArrayRef<Marker> &Preorder, Marker M) {
45+
ASSERT_FALSE(Preorder.empty());
46+
ASSERT_EQ(Preorder[0], M);
47+
Preorder = Preorder.drop_front();
48+
}
49+
50+
void checkSameImpl(const Trie &T, ArrayRef<Marker> &Preorder) {
51+
popAndCheck(Preorder, T.address());
52+
53+
if (T.children().size() == 0) {
54+
popAndCheck(Preorder, Marker::term());
55+
return;
56+
}
57+
58+
if (T.children().size() > 1)
59+
popAndCheck(Preorder, Marker::split(T.children().size()));
60+
61+
T.children().forEach([&](const auto &KVP) {
62+
checkSameImpl(KVP.second, Preorder);
63+
return true;
64+
});
65+
}
66+
67+
void checkSame(const PerThreadCallsiteTrie &RCT, ArrayRef<Marker> Preorder) {
68+
checkSameImpl(RCT.start(), Preorder);
69+
ASSERT_TRUE(Preorder.empty());
70+
}
71+
72+
TEST(PerThreadCallsiteTrieTest, Insert) {
73+
PerThreadCallsiteTrie R;
74+
uptr Stack1[]{4, 3, 2, 1};
75+
R.insertStack(StackTrace(Stack1, 4));
76+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, Marker::term()}));
77+
CHECK_EQ(R.start().count(), 1);
78+
79+
uptr Stack2[]{5, 4, 3, 2, 1};
80+
R.insertStack(StackTrace(Stack2, 5));
81+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, 5, Marker::term()}));
82+
CHECK_EQ(R.start().count(), 2);
83+
84+
uptr Stack3[]{6, 3, 2, 1};
85+
R.insertStack(StackTrace(Stack3, 4));
86+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, Marker::split(2), 4, 5,
87+
Marker::term(), 6, Marker::term()}));
88+
89+
uptr Stack4[]{7, 2, 1};
90+
R.insertStack(StackTrace(Stack4, 3));
91+
checkSame(R, ArrayRef<Marker>({0, 1, 2, Marker::split(2), 7, Marker::term(),
92+
3, Marker::split(2), 4, 5, Marker::term(), 6,
93+
Marker::term()}));
94+
}
95+
96+
TEST(PerThreadCallsiteTrieTest, DetectRoots) {
97+
MockCallsiteTree T;
98+
99+
uptr Stack1[]{501, 302, 202, 102};
100+
uptr Stack2[]{601, 402, 203, 102};
101+
T.insertStack({Stack1, 4});
102+
T.insertStack({Stack2, 4});
103+
104+
auto R = T.determineRoots();
105+
EXPECT_EQ(R.size(), 2U);
106+
EXPECT_TRUE(R.contains(300));
107+
EXPECT_TRUE(R.contains(400));
108+
}
109+
110+
TEST(PerThreadCallsiteTrieTest, DetectRootsNoBranches) {
111+
MockCallsiteTree T;
112+
113+
uptr Stack1[]{501, 302, 202, 102};
114+
T.insertStack({Stack1, 4});
115+
116+
auto R = T.determineRoots();
117+
EXPECT_THAT(R, IsEmpty());
118+
}
119+
120+
TEST(PerThreadCallsiteTrieTest, DetectRootsUnknownFct) {
121+
MockCallsiteTree T;
122+
123+
uptr Stack1[]{501, 302, 202, 102};
124+
// The MockCallsiteTree address resolver resolves addresses over 100, so 40
125+
// will be mapped to 0.
126+
uptr Stack2[]{601, 40, 203, 102};
127+
T.insertStack({Stack1, 4});
128+
T.insertStack({Stack2, 4});
129+
130+
auto R = T.determineRoots();
131+
ASSERT_THAT(R, SizeIs(2U));
132+
EXPECT_TRUE(R.contains(300));
133+
EXPECT_TRUE(R.contains(0));
134+
}

0 commit comments

Comments
 (0)