-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctxprof] Auto root detection: trie for stack samples #133106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
//===- RootAutodetector.cpp - detect contextual profiling roots -----------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "RootAutoDetector.h" | ||
|
||
#include "sanitizer_common/sanitizer_common.h" | ||
#include "sanitizer_common/sanitizer_placement_new.h" // IWYU pragma: keep (DenseMap) | ||
#include <assert.h> | ||
#include <dlfcn.h> | ||
#include <pthread.h> | ||
|
||
using namespace __ctx_profile; | ||
template <typename T> using Set = DenseMap<T, bool>; | ||
|
||
uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const { | ||
// this requires --linkopt=-Wl,--export-dynamic | ||
Dl_info Info; | ||
if (dladdr(reinterpret_cast<const void *>(CallsiteAddress), &Info) != 0) | ||
return reinterpret_cast<uptr>(Info.dli_saddr); | ||
return 0; | ||
} | ||
|
||
void PerThreadCallsiteTrie::insertStack(const StackTrace &ST) { | ||
++TheTrie.Count; | ||
auto *Current = &TheTrie; | ||
// the stack is backwards - the first callsite is at the top. | ||
for (int I = ST.size - 1; I >= 0; --I) { | ||
uptr ChildAddr = ST.trace[I]; | ||
auto [Iter, _] = Current->Children.insert({ChildAddr, Trie(ChildAddr)}); | ||
++Iter->second.Count; | ||
Current = &Iter->second; | ||
} | ||
} | ||
|
||
DenseMap<uptr, uint64_t> PerThreadCallsiteTrie::determineRoots() const { | ||
// Assuming a message pump design, roots are those functions called by the | ||
// message pump. The message pump is an infinite loop (for all practical | ||
// considerations) fetching data from a queue. The root functions return - | ||
// otherwise the message pump doesn't work. This function detects roots as the | ||
// first place in the trie (starting from the root) where a function calls 2 | ||
// or more functions. | ||
// | ||
// We start with a callsite trie - the nodes are callsites. Different child | ||
// nodes may actually correspond to the same function. | ||
// | ||
// For example: using function(callsite) | ||
// f1(csf1_1) -> f2(csf2_1) -> f3 | ||
// -> f2(csf2_2) -> f4 | ||
// | ||
// would be represented in our trie as: | ||
// csf1_1 -> csf2_1 -> f3 | ||
// -> csf2_2 -> f4 | ||
// | ||
// While we can assert the control flow returns to f2, we don't know if it | ||
// ever returns to f1. f2 could be the message pump. | ||
// | ||
// We need to convert our callsite tree into a function tree. We can also, | ||
// more economically, just see how many distinct functions there are at a | ||
// certain depth. When that count is greater than 1, we got to potential roots | ||
// and everything above should be considered as non-roots. | ||
DenseMap<uptr, uint64_t> Result; | ||
Set<const Trie *> Worklist; | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Worklist.insert({&TheTrie, {}}); | ||
|
||
while (!Worklist.empty()) { | ||
Set<const Trie *> NextWorklist; | ||
DenseMap<uptr, uint64_t> Candidates; | ||
Worklist.forEach([&](const auto &KVP) { | ||
auto [Node, _] = KVP; | ||
auto SA = getFctStartAddr(Node->CallsiteAddress); | ||
Candidates[SA] += Node->Count; | ||
Node->Children.forEach([&](auto &ChildKVP) { | ||
NextWorklist.insert({&ChildKVP.second, true}); | ||
return true; | ||
}); | ||
return true; | ||
}); | ||
if (Candidates.size() > 1) { | ||
Result.swap(Candidates); | ||
break; | ||
} | ||
Worklist.swap(NextWorklist); | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
return Result; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/*===- RootAutodetector.h- auto-detect roots for ctxprof -----------------===*\ | ||
|* | ||
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
|* See https://llvm.org/LICENSE.txt for license information. | ||
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|* | ||
\*===----------------------------------------------------------------------===*/ | ||
|
||
#ifndef CTX_PROFILE_ROOTAUTODETECTOR_H_ | ||
#define CTX_PROFILE_ROOTAUTODETECTOR_H_ | ||
|
||
#include "sanitizer_common/sanitizer_dense_map.h" | ||
#include "sanitizer_common/sanitizer_internal_defs.h" | ||
#include "sanitizer_common/sanitizer_stacktrace.h" | ||
#include <pthread.h> | ||
#include <sanitizer/common_interface_defs.h> | ||
|
||
using namespace __asan; | ||
using namespace __sanitizer; | ||
|
||
namespace __ctx_profile { | ||
|
||
/// Capture all the stack traces observed for a specific thread. The "for a | ||
/// specific thread" part is not enforced, but assumed in determineRoots. | ||
class PerThreadCallsiteTrie { | ||
protected: | ||
/// A trie. A node is the address of a callsite in a function activation. A | ||
/// child is a callsite in the activation made from the callsite | ||
/// corresponding to the parent. | ||
struct Trie final { | ||
const uptr CallsiteAddress; | ||
uint64_t Count = 0; | ||
DenseMap<uptr, Trie> Children; | ||
|
||
Trie(uptr CallsiteAddress = 0) : CallsiteAddress(CallsiteAddress) {} | ||
}; | ||
Trie TheTrie; | ||
|
||
/// Return the runtime start address of the function that contains the call at | ||
/// the runtime address CallsiteAddress. May be overriden for easy testing. | ||
virtual uptr getFctStartAddr(uptr CallsiteAddress) const; | ||
|
||
public: | ||
PerThreadCallsiteTrie(const PerThreadCallsiteTrie &) = delete; | ||
PerThreadCallsiteTrie(PerThreadCallsiteTrie &&) = default; | ||
PerThreadCallsiteTrie() = default; | ||
|
||
virtual ~PerThreadCallsiteTrie() = default; | ||
|
||
void insertStack(const StackTrace &ST); | ||
|
||
/// Return the runtime address of root functions, as determined for this | ||
/// thread, together with the number of samples that included them. | ||
DenseMap<uptr, uint64_t> determineRoots() const; | ||
}; | ||
} // namespace __ctx_profile | ||
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
155 changes: 155 additions & 0 deletions
155
compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
#include "../RootAutoDetector.h" | ||
#include "sanitizer_common/sanitizer_array_ref.h" | ||
#include "gmock/gmock.h" | ||
#include "gtest/gtest.h" | ||
|
||
using namespace __ctx_profile; | ||
using ::testing::IsEmpty; | ||
using ::testing::Not; | ||
using ::testing::SizeIs; | ||
|
||
// Utility for describing a preorder traversal. By default it captures the | ||
// address and count at a callsite node. Implicitly nodes are expected to have 1 | ||
// child. If they have none, we place a Marker::term and if they have more than | ||
// one, we place a Marker::split(nr_of_children) For example, using a list | ||
// notation, and letters to denote a pair of address and count: | ||
// (A (B C) (D (E F))) is a list of markers: A, split(2), B, term, C, | ||
// term, D, split(2), E, term, F, term | ||
class Marker { | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
enum class Kind { End, Value, Split }; | ||
const uptr Value; | ||
const uptr Count; | ||
const Kind K; | ||
Marker(uptr V, uptr C, Kind S) : Value(V), Count(C), K(S) {} | ||
|
||
public: | ||
Marker(uptr V, uptr C) : Marker(V, C, Kind::Value) {} | ||
|
||
static Marker split(uptr V) { return Marker(V, 0, Kind::Split); } | ||
static Marker term() { return Marker(0, 0, Kind::End); } | ||
|
||
bool isSplit() const { return K == Kind::Split; } | ||
bool isTerm() const { return K == Kind::End; } | ||
bool isVal() const { return K == Kind::Value; } | ||
|
||
bool operator==(const Marker &M) const { | ||
return Value == M.Value && Count == M.Count && K == M.K; | ||
} | ||
}; | ||
|
||
class MockCallsiteTrie final : public PerThreadCallsiteTrie { | ||
// Return the first multiple of 100. | ||
uptr getFctStartAddr(uptr CallsiteAddress) const override { | ||
return (CallsiteAddress / 100) * 100; | ||
} | ||
|
||
static void popAndCheck(ArrayRef<Marker> &Preorder, Marker M) { | ||
ASSERT_THAT(Preorder, Not(IsEmpty())); | ||
ASSERT_EQ(Preorder[0], M); | ||
Preorder = Preorder.drop_front(); | ||
} | ||
|
||
static void checkSameImpl(const Trie &T, ArrayRef<Marker> &Preorder) { | ||
popAndCheck(Preorder, {T.CallsiteAddress, T.Count}); | ||
|
||
if (T.Children.empty()) { | ||
popAndCheck(Preorder, Marker::term()); | ||
return; | ||
} | ||
|
||
if (T.Children.size() > 1) | ||
popAndCheck(Preorder, Marker::split(T.Children.size())); | ||
|
||
T.Children.forEach([&](const auto &KVP) { | ||
checkSameImpl(KVP.second, Preorder); | ||
return true; | ||
}); | ||
} | ||
|
||
public: | ||
void checkSame(ArrayRef<Marker> Preorder) const { | ||
checkSameImpl(TheTrie, Preorder); | ||
ASSERT_THAT(Preorder, IsEmpty()); | ||
} | ||
}; | ||
|
||
TEST(PerThreadCallsiteTrieTest, Insert) { | ||
MockCallsiteTrie R; | ||
uptr Stack1[]{4, 3, 2, 1}; | ||
R.insertStack(StackTrace(Stack1, 4)); | ||
R.checkSame(ArrayRef<Marker>( | ||
{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}, Marker::term()})); | ||
|
||
uptr Stack2[]{5, 4, 3, 2, 1}; | ||
R.insertStack(StackTrace(Stack2, 5)); | ||
R.checkSame(ArrayRef<Marker>( | ||
{{0, 2}, {1, 2}, {2, 2}, {3, 2}, {4, 2}, {5, 1}, Marker::term()})); | ||
|
||
uptr Stack3[]{6, 3, 2, 1}; | ||
R.insertStack(StackTrace(Stack3, 4)); | ||
R.checkSame(ArrayRef<Marker>({{0, 3}, | ||
{1, 3}, | ||
{2, 3}, | ||
{3, 3}, | ||
Marker::split(2), | ||
{4, 2}, | ||
{5, 1}, | ||
Marker::term(), | ||
{6, 1}, | ||
Marker::term()})); | ||
uptr Stack4[]{7, 2, 1}; | ||
R.insertStack(StackTrace(Stack4, 3)); | ||
R.checkSame(ArrayRef<Marker>({{0, 4}, | ||
{1, 4}, | ||
{2, 4}, | ||
Marker::split(2), | ||
{7, 1}, | ||
Marker::term(), | ||
{3, 3}, | ||
Marker::split(2), | ||
{4, 2}, | ||
{5, 1}, | ||
Marker::term(), | ||
{6, 1}, | ||
Marker::term()})); | ||
} | ||
|
||
TEST(PerThreadCallsiteTrieTest, DetectRoots) { | ||
MockCallsiteTrie T; | ||
|
||
uptr Stack1[]{501, 302, 202, 102}; | ||
uptr Stack2[]{601, 402, 203, 102}; | ||
T.insertStack({Stack1, 4}); | ||
T.insertStack({Stack2, 4}); | ||
|
||
auto R = T.determineRoots(); | ||
EXPECT_THAT(R, SizeIs(2U)); | ||
EXPECT_TRUE(R.contains(300)); | ||
EXPECT_TRUE(R.contains(400)); | ||
} | ||
|
||
TEST(PerThreadCallsiteTrieTest, DetectRootsNoBranches) { | ||
MockCallsiteTrie T; | ||
|
||
uptr Stack1[]{501, 302, 202, 102}; | ||
T.insertStack({Stack1, 4}); | ||
|
||
auto R = T.determineRoots(); | ||
EXPECT_THAT(R, IsEmpty()); | ||
} | ||
|
||
TEST(PerThreadCallsiteTrieTest, DetectRootsUnknownFct) { | ||
MockCallsiteTrie T; | ||
|
||
uptr Stack1[]{501, 302, 202, 102}; | ||
// The MockCallsiteTree address resolver resolves addresses over 100, so 40 | ||
// will be mapped to 0. | ||
uptr Stack2[]{601, 40, 203, 102}; | ||
T.insertStack({Stack1, 4}); | ||
T.insertStack({Stack2, 4}); | ||
|
||
auto R = T.determineRoots(); | ||
ASSERT_THAT(R, SizeIs(2U)); | ||
EXPECT_TRUE(R.contains(300)); | ||
EXPECT_TRUE(R.contains(0)); | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.