-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctxprof] Flat profile collection #130655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ | |
|
||
#include "CtxInstrProfiling.h" | ||
#include "sanitizer_common/sanitizer_allocator_internal.h" | ||
#include "sanitizer_common/sanitizer_atomic.h" | ||
#include "sanitizer_common/sanitizer_atomic_clang.h" | ||
#include "sanitizer_common/sanitizer_common.h" | ||
#include "sanitizer_common/sanitizer_dense_map.h" | ||
#include "sanitizer_common/sanitizer_libc.h" | ||
|
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex; | |
SANITIZER_GUARDED_BY(AllContextsMutex) | ||
__sanitizer::Vector<ContextRoot *> AllContextRoots; | ||
|
||
__sanitizer::atomic_uintptr_t AllFunctionsData = {}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Prefer AllFunctionsData{0} I think semantically these mean the same thing, just a matter of preference. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand what you mean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I use the |
||
|
||
// Keep all the functions for which we collect a flat profile in a linked list. | ||
__sanitizer::SpinMutex FlatCtxArenaMutex; | ||
SANITIZER_GUARDED_BY(FlatCtxArenaMutex) | ||
Arena *FlatCtxArenaHead = nullptr; | ||
SANITIZER_GUARDED_BY(FlatCtxArenaMutex) | ||
Arena *FlatCtxArena = nullptr; | ||
|
||
// Set to true when we enter a root, and false when we exit - regardless if this | ||
// thread collects a contextual profile for that root. | ||
__thread bool IsUnderContext = false; | ||
__sanitizer::atomic_uint8_t ProfilingStarted = {}; | ||
|
||
// utility to taint a pointer by setting the LSB. There is an assumption | ||
// throughout that the addresses of contexts are even (really, they should be | ||
// align(8), but "even"-ness is the minimum assumption) | ||
|
@@ -109,7 +125,10 @@ void resetContextNode(ContextNode &Node) { | |
resetContextNode(*Next); | ||
} | ||
|
||
void onContextEnter(ContextNode &Node) { ++Node.counters()[0]; } | ||
ContextNode *onContextEnter(ContextNode &Node) { | ||
++Node.counters()[0]; | ||
return &Node; | ||
} | ||
|
||
} // namespace | ||
|
||
|
@@ -182,12 +201,75 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint, | |
return Ret; | ||
} | ||
|
||
ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid, | ||
uint32_t NumCounters, | ||
ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, | ||
uint32_t NumCounters) { | ||
if (ContextNode *Existing = Data.FlatCtx) | ||
return Existing; | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need the additional scope (and indentation) if the entire function needs to be under the lock? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wdym. there's a fast bail-out if FlatCtx was set. |
||
// We could instead try to take the lock and, if that fails, return | ||
// TheScratchContext. But that could leave message pump loops more sparsely | ||
// profiled than everything else. Maybe that doesn't matter, and we can | ||
// optimize this later. | ||
__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Data.Mutex); | ||
if (ContextNode *Existing = Data.FlatCtx) | ||
return Existing; | ||
|
||
auto NeededSize = ContextNode::getAllocSize(NumCounters, 0); | ||
char *AllocBuff = nullptr; | ||
{ | ||
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL( | ||
&FlatCtxArenaMutex); | ||
if (FlatCtxArena) | ||
AllocBuff = FlatCtxArena->tryBumpAllocate(NeededSize); | ||
if (!AllocBuff) { | ||
FlatCtxArena = Arena::allocateNewArena(getArenaAllocSize(NeededSize), | ||
FlatCtxArena); | ||
AllocBuff = FlatCtxArena->tryBumpAllocate(NeededSize); | ||
} | ||
if (!FlatCtxArenaHead) | ||
FlatCtxArenaHead = FlatCtxArena; | ||
} | ||
auto *Ret = allocContextNode(AllocBuff, Guid, NumCounters, 0); | ||
Data.FlatCtx = Ret; | ||
|
||
Data.Next = reinterpret_cast<FunctionData *>( | ||
__sanitizer::atomic_load_relaxed(&AllFunctionsData)); | ||
while (!__sanitizer::atomic_compare_exchange_strong( | ||
&AllFunctionsData, reinterpret_cast<uintptr_t *>(&Data.Next), | ||
reinterpret_cast<uintptr_t>(&Data), | ||
__sanitizer::memory_order_release)) { | ||
} | ||
} | ||
|
||
return Data.FlatCtx; | ||
} | ||
|
||
ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid, | ||
uint32_t NumCounters) { | ||
// 1) if we are under a root (regardless if this thread is collecting or not a | ||
// contextual profile for that root), do not collect a flat profile. We want | ||
// to keep flat profiles only for activations that can't happen under a root, | ||
// to avoid confusing profiles. We can, for example, combine flattened and | ||
// flat profiles meaningfully, as we wouldn't double-count anything. | ||
// | ||
// 2) to avoid lengthy startup, don't bother with flat profiles until the | ||
// profiling started. We would reset them anyway when profiling starts. | ||
// HOWEVER. This does lose profiling for message pumps: those functions are | ||
// entered once and never exit. They should be assumed to be entered before | ||
// profiling starts - because profiling should start after the server is up | ||
// and running (which is equivalent to "message pumps are set up"). | ||
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted)) | ||
return TheScratchContext; | ||
return markAsScratch( | ||
onContextEnter(*getFlatProfile(Data, Guid, NumCounters))); | ||
} | ||
|
||
ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, | ||
GUID Guid, uint32_t NumCounters, | ||
uint32_t NumCallsites) { | ||
// fast "out" if we're not even doing contextual collection. | ||
if (!__llvm_ctx_profile_current_context_root) | ||
return TheScratchContext; | ||
return getUnhandledContext(*Data, Guid, NumCounters); | ||
|
||
// also fast "out" if the caller is scratch. We can see if it's scratch by | ||
// looking at the interior pointer into the subcontexts vector that the caller | ||
|
@@ -196,7 +278,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid, | |
// precisely, aligned - 8 values) | ||
auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]); | ||
if (!CallsiteContext || isScratch(CallsiteContext)) | ||
return TheScratchContext; | ||
return getUnhandledContext(*Data, Guid, NumCounters); | ||
|
||
// if the callee isn't the expected one, return scratch. | ||
// Signal handler(s) could have been invoked at any point in the execution. | ||
|
@@ -214,7 +296,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid, | |
// for that case. | ||
auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]); | ||
if (ExpectedCallee != Callee) | ||
return TheScratchContext; | ||
return getUnhandledContext(*Data, Guid, NumCounters); | ||
|
||
auto *Callsite = *CallsiteContext; | ||
// in the case of indirect calls, we will have all seen targets forming a | ||
|
@@ -257,6 +339,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters, | |
ContextNode *__llvm_ctx_profile_start_context( | ||
ContextRoot *Root, GUID Guid, uint32_t Counters, | ||
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { | ||
IsUnderContext = true; | ||
if (!Root->FirstMemBlock) { | ||
setupContext(Root, Guid, Counters, Callsites); | ||
} | ||
|
@@ -272,6 +355,7 @@ ContextNode *__llvm_ctx_profile_start_context( | |
|
||
void __llvm_ctx_profile_release_context(ContextRoot *Root) | ||
SANITIZER_NO_THREAD_SAFETY_ANALYSIS { | ||
IsUnderContext = false; | ||
if (__llvm_ctx_profile_current_context_root) { | ||
__llvm_ctx_profile_current_context_root = nullptr; | ||
Root->Taken.Unlock(); | ||
|
@@ -291,10 +375,12 @@ void __llvm_ctx_profile_start_collection() { | |
|
||
resetContextNode(*Root->FirstNode); | ||
} | ||
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true); | ||
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits); | ||
} | ||
|
||
bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) { | ||
__sanitizer::atomic_store_relaxed(&ProfilingStarted, false); | ||
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( | ||
&AllContextsMutex); | ||
|
||
|
@@ -310,17 +396,43 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) { | |
Writer.writeContextual(*Root->FirstNode); | ||
} | ||
Writer.endContextSection(); | ||
Writer.startFlatSection(); | ||
// The list progresses behind the head, so taking this snapshot allows the | ||
// list to grow concurrently without causing a race condition with our | ||
// traversing it. | ||
const auto *Pos = reinterpret_cast<const FunctionData *>( | ||
__sanitizer::atomic_load_relaxed(&AllFunctionsData)); | ||
for (; Pos; Pos = Pos->Next) | ||
Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(), | ||
Pos->FlatCtx->counters_size()); | ||
Writer.endFlatSection(); | ||
return true; | ||
} | ||
|
||
void __llvm_ctx_profile_free() { | ||
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( | ||
&AllContextsMutex); | ||
for (int I = 0, E = AllContextRoots.Size(); I < E; ++I) | ||
for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) { | ||
__sanitizer::atomic_store_relaxed(&ProfilingStarted, false); | ||
{ | ||
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( | ||
&AllContextsMutex); | ||
for (int I = 0, E = AllContextRoots.Size(); I < E; ++I) | ||
for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) { | ||
auto *C = A; | ||
A = A->next(); | ||
__sanitizer::InternalFree(C); | ||
} | ||
AllContextRoots.Reset(); | ||
} | ||
__sanitizer::atomic_store_relaxed(&AllFunctionsData, 0U); | ||
{ | ||
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( | ||
&FlatCtxArenaMutex); | ||
FlatCtxArena = nullptr; | ||
for (auto *A = FlatCtxArenaHead; A;) { | ||
auto *C = A; | ||
A = A->next(); | ||
A = C->next(); | ||
__sanitizer::InternalFree(C); | ||
} | ||
AllContextRoots.Reset(); | ||
|
||
FlatCtxArenaHead = nullptr; | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.