Skip to content

Commit 0080eef

Browse files
committed
RootAutodetect
1 parent 9065433 commit 0080eef

File tree

11 files changed

+416
-62
lines changed

11 files changed

+416
-62
lines changed

compiler-rt/lib/ctx_profile/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ endif()
2727
add_compiler_rt_runtime(clang_rt.ctx_profile
2828
STATIC
2929
ARCHS ${CTX_PROFILE_SUPPORTED_ARCH}
30-
OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc
30+
OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc RTSanitizerCommonSymbolizer
3131
CFLAGS ${EXTRA_FLAGS}
3232
SOURCES ${CTX_PROFILE_SOURCES}
3333
ADDITIONAL_HEADERS ${CTX_PROFILE_HEADERS}

compiler-rt/lib/ctx_profile/CtxInstrContextNode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ class ContextNode final {
127127
/// MUTEXDECL takes one parameter, the name of a field that is a mutex.
128128
#define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL) \
129129
PTRDECL(FunctionData, Next) \
130+
PTRDECL(void, EntryAddress) \
130131
VOLATILE_PTRDECL(ContextRoot, CtxRoot) \
131132
VOLATILE_PTRDECL(ContextNode, FlatCtx) \
132133
MUTEXDECL(Mutex)

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 77 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "CtxInstrProfiling.h"
10+
#include "RootAutoDetector.h"
1011
#include "sanitizer_common/sanitizer_allocator_internal.h"
1112
#include "sanitizer_common/sanitizer_atomic.h"
1213
#include "sanitizer_common/sanitizer_atomic_clang.h"
@@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr;
4344
__thread bool IsUnderContext = false;
4445
__sanitizer::atomic_uint8_t ProfilingStarted = {};
4546

47+
__sanitizer::atomic_uintptr_t RootDetector = {};
48+
RootAutoDetector *getRootDetector() {
49+
return reinterpret_cast<RootAutoDetector *>(
50+
__sanitizer::atomic_load_relaxed(&RootDetector));
51+
}
52+
4653
// utility to taint a pointer by setting the LSB. There is an assumption
4754
// throughout that the addresses of contexts are even (really, they should be
4855
// align(8), but "even"-ness is the minimum assumption)
@@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
201208
return Ret;
202209
}
203210

204-
ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
211+
ContextNode *getFlatProfile(FunctionData &Data, void *Callee, GUID Guid,
205212
uint32_t NumCounters) {
206213
if (ContextNode *Existing = Data.FlatCtx)
207214
return Existing;
@@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
232239
auto *Ret = allocContextNode(AllocBuff, Guid, NumCounters, 0);
233240
Data.FlatCtx = Ret;
234241

242+
Data.EntryAddress = Callee;
235243
Data.Next = reinterpret_cast<FunctionData *>(
236244
__sanitizer::atomic_load_relaxed(&AllFunctionsData));
237245
while (!__sanitizer::atomic_compare_exchange_strong(
@@ -277,8 +285,29 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
277285
return Root;
278286
}
279287

280-
ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
281-
uint32_t NumCounters) {
288+
ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid,
289+
uint32_t Counters, uint32_t Callsites)
290+
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
291+
IsUnderContext = true;
292+
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
293+
__sanitizer::memory_order_relaxed);
294+
295+
if (!Root->FirstMemBlock) {
296+
setupContext(Root, Guid, Counters, Callsites);
297+
}
298+
if (Root->Taken.TryLock()) {
299+
__llvm_ctx_profile_current_context_root = Root;
300+
onContextEnter(*Root->FirstNode);
301+
return Root->FirstNode;
302+
}
303+
// If this thread couldn't take the lock, return scratch context.
304+
__llvm_ctx_profile_current_context_root = nullptr;
305+
return TheScratchContext;
306+
}
307+
308+
ContextNode *getUnhandledContext(FunctionData &Data, void *Callee, GUID Guid,
309+
uint32_t NumCounters, uint32_t NumCallsites,
310+
ContextRoot *CtxRoot) {
282311

283312
// 1) if we are currently collecting a contextual profile, fetch a ContextNode
284313
// in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
@@ -297,27 +326,32 @@ ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
297326
// entered once and never exit. They should be assumed to be entered before
298327
// profiling starts - because profiling should start after the server is up
299328
// and running (which is equivalent to "message pumps are set up").
300-
ContextRoot *R = __llvm_ctx_profile_current_context_root;
301-
if (!R) {
329+
if (!CtxRoot) {
330+
if (auto *RAD = getRootDetector())
331+
RAD->sample();
332+
else if (auto *CR = Data.CtxRoot)
333+
return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites);
302334
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
303335
return TheScratchContext;
304336
else
305337
return markAsScratch(
306-
onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
338+
onContextEnter(*getFlatProfile(Data, Callee, Guid, NumCounters)));
307339
}
308-
auto [Iter, Ins] = R->Unhandled.insert({Guid, nullptr});
340+
auto [Iter, Ins] = CtxRoot->Unhandled.insert({Guid, nullptr});
309341
if (Ins)
310-
Iter->second =
311-
getCallsiteSlow(Guid, &R->FirstUnhandledCalleeNode, NumCounters, 0);
342+
Iter->second = getCallsiteSlow(Guid, &CtxRoot->FirstUnhandledCalleeNode,
343+
NumCounters, 0);
312344
return markAsScratch(onContextEnter(*Iter->second));
313345
}
314346

315347
ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
316348
GUID Guid, uint32_t NumCounters,
317349
uint32_t NumCallsites) {
350+
auto *CtxRoot = __llvm_ctx_profile_current_context_root;
318351
// fast "out" if we're not even doing contextual collection.
319-
if (!__llvm_ctx_profile_current_context_root)
320-
return getUnhandledContext(*Data, Guid, NumCounters);
352+
if (!CtxRoot)
353+
return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites,
354+
nullptr);
321355

322356
// also fast "out" if the caller is scratch. We can see if it's scratch by
323357
// looking at the interior pointer into the subcontexts vector that the caller
@@ -326,7 +360,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
326360
// precisely, aligned - 8 values)
327361
auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]);
328362
if (!CallsiteContext || isScratch(CallsiteContext))
329-
return getUnhandledContext(*Data, Guid, NumCounters);
363+
return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites,
364+
CtxRoot);
330365

331366
// if the callee isn't the expected one, return scratch.
332367
// Signal handler(s) could have been invoked at any point in the execution.
@@ -344,7 +379,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
344379
// for that case.
345380
auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]);
346381
if (ExpectedCallee != Callee)
347-
return getUnhandledContext(*Data, Guid, NumCounters);
382+
return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites,
383+
CtxRoot);
348384

349385
auto *Callsite = *CallsiteContext;
350386
// in the case of indirect calls, we will have all seen targets forming a
@@ -366,40 +402,26 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
366402
return Ret;
367403
}
368404

369-
ContextNode *__llvm_ctx_profile_start_context(
370-
FunctionData *FData, GUID Guid, uint32_t Counters,
371-
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
372-
IsUnderContext = true;
373-
374-
auto *Root = FData->getOrAllocateContextRoot();
375-
376-
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
377-
__sanitizer::memory_order_relaxed);
405+
ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid,
406+
uint32_t Counters,
407+
uint32_t Callsites) {
378408

379-
if (!Root->FirstMemBlock) {
380-
setupContext(Root, Guid, Counters, Callsites);
381-
}
382-
if (Root->Taken.TryLock()) {
383-
__llvm_ctx_profile_current_context_root = Root;
384-
onContextEnter(*Root->FirstNode);
385-
return Root->FirstNode;
386-
}
387-
// If this thread couldn't take the lock, return scratch context.
388-
__llvm_ctx_profile_current_context_root = nullptr;
389-
return TheScratchContext;
409+
return tryStartContextGivenRoot(FData->getOrAllocateContextRoot(), Guid,
410+
Counters, Callsites);
390411
}
391412

392413
void __llvm_ctx_profile_release_context(FunctionData *FData)
393414
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
415+
const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
416+
if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
417+
return;
394418
IsUnderContext = false;
395-
if (__llvm_ctx_profile_current_context_root) {
396-
__llvm_ctx_profile_current_context_root = nullptr;
397-
assert(FData->CtxRoot);
398-
FData->CtxRoot->Taken.Unlock();
399-
}
419+
assert(FData->CtxRoot);
420+
__llvm_ctx_profile_current_context_root = nullptr;
421+
FData->CtxRoot->Taken.Unlock();
400422
}
401423

402-
void __llvm_ctx_profile_start_collection() {
424+
void __llvm_ctx_profile_start_collection(unsigned AutodetectDuration) {
403425
size_t NumMemUnits = 0;
404426
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
405427
&AllContextsMutex);
@@ -415,12 +437,24 @@ void __llvm_ctx_profile_start_collection() {
415437
resetContextNode(*Root->FirstUnhandledCalleeNode);
416438
__sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0);
417439
}
440+
if (AutodetectDuration) {
441+
auto *RD = new (__sanitizer::InternalAlloc(sizeof(RootAutoDetector)))
442+
RootAutoDetector(AllFunctionsData, RootDetector, AutodetectDuration);
443+
RD->start();
444+
} else {
445+
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
446+
}
418447
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
419-
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
420448
}
421449

422450
bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
423451
__sanitizer::atomic_store_relaxed(&ProfilingStarted, false);
452+
if (auto *RD = getRootDetector()) {
453+
__sanitizer::Printf("[ctxprof] Expected the root autodetector to have "
454+
"finished well before attempting to fetch a context");
455+
RD->join();
456+
}
457+
424458
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
425459
&AllContextsMutex);
426460

@@ -445,8 +479,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
445479
const auto *Pos = reinterpret_cast<const FunctionData *>(
446480
__sanitizer::atomic_load_relaxed(&AllFunctionsData));
447481
for (; Pos; Pos = Pos->Next)
448-
Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(),
449-
Pos->FlatCtx->counters_size());
482+
if (!Pos->CtxRoot)
483+
Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(),
484+
Pos->FlatCtx->counters_size());
450485
Writer.endFlatSection();
451486
return true;
452487
}

compiler-rt/lib/ctx_profile/CtxInstrProfiling.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *FData,
207207

208208
/// Prepares for collection. Currently this resets counter values but preserves
209209
/// internal context tree structure.
210-
void __llvm_ctx_profile_start_collection();
210+
void __llvm_ctx_profile_start_collection(unsigned AutodetectDuration = 0);
211211

212212
/// Completely free allocated memory.
213213
void __llvm_ctx_profile_free();

compiler-rt/lib/ctx_profile/RootAutoDetector.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,89 @@
1717
using namespace __ctx_profile;
1818
template <typename T> using Set = DenseMap<T, bool>;
1919

20+
namespace __sanitizer {
21+
void BufferedStackTrace::UnwindImpl(uptr pc, uptr bp, void *context,
22+
bool request_fast, u32 max_depth) {
23+
// We can't implement the fast variant. The fast variant ends up invoking an
24+
// external allocator, because of pthread_attr_getstack. If this happens
25+
// during an allocation of the program being instrumented, a non-reentrant
26+
// lock may be taken (this was observed). The allocator called by
27+
// pthread_attr_getstack will also try to take that lock.
28+
UnwindSlow(pc, max_depth);
29+
}
30+
} // namespace __sanitizer
31+
32+
RootAutoDetector::PerThreadSamples::PerThreadSamples(RootAutoDetector &Parent) {
33+
GenericScopedLock<SpinMutex> L(&Parent.AllSamplesMutex);
34+
Parent.AllSamples.PushBack(this);
35+
}
36+
37+
void RootAutoDetector::start() {
38+
atomic_store_relaxed(&Self, reinterpret_cast<uintptr_t>(this));
39+
pthread_create(
40+
&WorkerThread, nullptr,
41+
+[](void *Ctx) -> void * {
42+
RootAutoDetector *RAD = reinterpret_cast<RootAutoDetector *>(Ctx);
43+
SleepForSeconds(RAD->WaitSeconds);
44+
Vector<PerThreadSamples*> Copy;
45+
{
46+
GenericScopedLock<SpinMutex> M(&RAD->AllSamplesMutex);
47+
Copy.Resize(RAD->AllSamples.Size());
48+
for (uptr I = 0; I < RAD->AllSamples.Size(); ++I)
49+
Copy[I] = RAD->AllSamples[I];
50+
}
51+
DenseMap<uptr, uint64_t> AllRoots;
52+
for (uptr I = 0; I < Copy.Size(); ++I) {
53+
GenericScopedLock<SpinMutex>(&Copy[I]->M);
54+
Copy[I]->TrieRoot.determineRoots().forEach([&](auto &KVP) {
55+
auto [FAddr, Count] = KVP;
56+
AllRoots[FAddr] += Count;
57+
return true;
58+
});
59+
}
60+
for (auto *FD = reinterpret_cast<FunctionData *>(
61+
atomic_load_relaxed(&RAD->FunctionDataListHead));
62+
FD; FD = FD->Next) {
63+
if (AllRoots.contains(reinterpret_cast<uptr>(FD->EntryAddress))) {
64+
FD->getOrAllocateContextRoot();
65+
}
66+
}
67+
atomic_store_relaxed(&RAD->Self, 0);
68+
return nullptr;
69+
},
70+
this);
71+
}
72+
73+
void RootAutoDetector::join() {
74+
pthread_join(WorkerThread, nullptr);
75+
}
76+
77+
void RootAutoDetector::sample() {
78+
static thread_local bool Entered = false;
79+
static thread_local uint64_t Entries = 0;
80+
if (Entered || (++Entries % SampleRate))
81+
return;
82+
Entered = true;
83+
collectStack();
84+
Entered = false;
85+
}
86+
87+
void RootAutoDetector::collectStack() {
88+
GET_CALLER_PC_BP;
89+
BufferedStackTrace CurrentStack;
90+
CurrentStack.Unwind(pc, bp, nullptr, false);
91+
if (CurrentStack.size <= 2) return;
92+
static thread_local PerThreadSamples *ThisThreadSamples =
93+
new (__sanitizer::InternalAlloc(sizeof(PerThreadSamples)))
94+
PerThreadSamples(*this);
95+
96+
if (!ThisThreadSamples->M.TryLock())
97+
return;
98+
99+
ThisThreadSamples->TrieRoot.insertStack(CurrentStack);
100+
ThisThreadSamples->M.Unlock();
101+
}
102+
20103
uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const {
21104
// this requires --linkopt=-Wl,--export-dynamic
22105
Dl_info Info;

compiler-rt/lib/ctx_profile/RootAutoDetector.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "sanitizer_common/sanitizer_dense_map.h"
1313
#include "sanitizer_common/sanitizer_internal_defs.h"
1414
#include "sanitizer_common/sanitizer_stacktrace.h"
15+
#include "sanitizer_common/sanitizer_vector.h"
1516
#include <pthread.h>
1617
#include <sanitizer/common_interface_defs.h>
1718

@@ -62,5 +63,35 @@ class PerThreadCallsiteTrie {
6263

6364
const Trie &start() const { return TheTrie; }
6465
};
66+
67+
class RootAutoDetector final {
68+
static const uint64_t SampleRate = 6113;
69+
const unsigned WaitSeconds;
70+
pthread_t WorkerThread;
71+
72+
struct PerThreadSamples {
73+
PerThreadSamples(RootAutoDetector &Parent);
74+
75+
PerThreadCallsiteTrie TrieRoot;
76+
SpinMutex M;
77+
};
78+
SpinMutex AllSamplesMutex;
79+
SANITIZER_GUARDED_BY(AllSamplesMutex)
80+
Vector<PerThreadSamples*> AllSamples;
81+
atomic_uintptr_t &FunctionDataListHead;
82+
atomic_uintptr_t &Self;
83+
void collectStack();
84+
85+
public:
86+
RootAutoDetector(atomic_uintptr_t &FunctionDataListHead,
87+
atomic_uintptr_t &Self, unsigned WaitSeconds)
88+
: WaitSeconds(WaitSeconds), FunctionDataListHead(FunctionDataListHead),
89+
Self(Self) {}
90+
91+
void sample();
92+
void start();
93+
void join();
94+
};
95+
6596
} // namespace __ctx_profile
6697
#endif

0 commit comments

Comments
 (0)