7
7
// ===----------------------------------------------------------------------===//
8
8
9
9
#include " CtxInstrProfiling.h"
10
+ #include " RootAutoDetector.h"
10
11
#include " sanitizer_common/sanitizer_allocator_internal.h"
11
12
#include " sanitizer_common/sanitizer_atomic.h"
12
13
#include " sanitizer_common/sanitizer_atomic_clang.h"
@@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr;
43
44
__thread bool IsUnderContext = false ;
44
45
__sanitizer::atomic_uint8_t ProfilingStarted = {};
45
46
47
+ __sanitizer::atomic_uintptr_t RootDetector = {};
48
+ RootAutoDetector *getRootDetector () {
49
+ return reinterpret_cast <RootAutoDetector *>(
50
+ __sanitizer::atomic_load_relaxed (&RootDetector));
51
+ }
52
+
46
53
// utility to taint a pointer by setting the LSB. There is an assumption
47
54
// throughout that the addresses of contexts are even (really, they should be
48
55
// align(8), but "even"-ness is the minimum assumption)
@@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
201
208
return Ret;
202
209
}
203
210
204
- ContextNode *getFlatProfile (FunctionData &Data, GUID Guid,
211
+ ContextNode *getFlatProfile (FunctionData &Data, void *Callee, GUID Guid,
205
212
uint32_t NumCounters) {
206
213
if (ContextNode *Existing = Data.FlatCtx )
207
214
return Existing;
@@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
232
239
auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
233
240
Data.FlatCtx = Ret;
234
241
242
+ Data.EntryAddress = Callee;
235
243
Data.Next = reinterpret_cast <FunctionData *>(
236
244
__sanitizer::atomic_load_relaxed (&AllFunctionsData));
237
245
while (!__sanitizer::atomic_compare_exchange_strong (
@@ -277,8 +285,29 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
277
285
return Root;
278
286
}
279
287
280
- ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
281
- uint32_t NumCounters) {
288
+ ContextNode *tryStartContextGivenRoot (ContextRoot *Root, GUID Guid,
289
+ uint32_t Counters, uint32_t Callsites)
290
+ SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
291
+ IsUnderContext = true ;
292
+ __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
293
+ __sanitizer::memory_order_relaxed);
294
+
295
+ if (!Root->FirstMemBlock ) {
296
+ setupContext (Root, Guid, Counters, Callsites);
297
+ }
298
+ if (Root->Taken .TryLock ()) {
299
+ __llvm_ctx_profile_current_context_root = Root;
300
+ onContextEnter (*Root->FirstNode );
301
+ return Root->FirstNode ;
302
+ }
303
+ // If this thread couldn't take the lock, return scratch context.
304
+ __llvm_ctx_profile_current_context_root = nullptr ;
305
+ return TheScratchContext;
306
+ }
307
+
308
+ ContextNode *getUnhandledContext (FunctionData &Data, void *Callee, GUID Guid,
309
+ uint32_t NumCounters, uint32_t NumCallsites,
310
+ ContextRoot *CtxRoot) {
282
311
283
312
// 1) if we are currently collecting a contextual profile, fetch a ContextNode
284
313
// in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
@@ -297,27 +326,32 @@ ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
297
326
// entered once and never exit. They should be assumed to be entered before
298
327
// profiling starts - because profiling should start after the server is up
299
328
// and running (which is equivalent to "message pumps are set up").
300
- ContextRoot *R = __llvm_ctx_profile_current_context_root;
301
- if (!R) {
329
+ if (!CtxRoot) {
330
+ if (auto *RAD = getRootDetector ())
331
+ RAD->sample ();
332
+ else if (auto *CR = Data.CtxRoot )
333
+ return tryStartContextGivenRoot (CR, Guid, NumCounters, NumCallsites);
302
334
if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
303
335
return TheScratchContext;
304
336
else
305
337
return markAsScratch (
306
- onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
338
+ onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
307
339
}
308
- auto [Iter, Ins] = R ->Unhandled .insert ({Guid, nullptr });
340
+ auto [Iter, Ins] = CtxRoot ->Unhandled .insert ({Guid, nullptr });
309
341
if (Ins)
310
- Iter->second =
311
- getCallsiteSlow (Guid, &R-> FirstUnhandledCalleeNode , NumCounters, 0 );
342
+ Iter->second = getCallsiteSlow (Guid, &CtxRoot-> FirstUnhandledCalleeNode ,
343
+ NumCounters, 0 );
312
344
return markAsScratch (onContextEnter (*Iter->second ));
313
345
}
314
346
315
347
ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
316
348
GUID Guid, uint32_t NumCounters,
317
349
uint32_t NumCallsites) {
350
+ auto *CtxRoot = __llvm_ctx_profile_current_context_root;
318
351
// fast "out" if we're not even doing contextual collection.
319
- if (!__llvm_ctx_profile_current_context_root)
320
- return getUnhandledContext (*Data, Guid, NumCounters);
352
+ if (!CtxRoot)
353
+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
354
+ nullptr );
321
355
322
356
// also fast "out" if the caller is scratch. We can see if it's scratch by
323
357
// looking at the interior pointer into the subcontexts vector that the caller
@@ -326,7 +360,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
326
360
// precisely, aligned - 8 values)
327
361
auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
328
362
if (!CallsiteContext || isScratch (CallsiteContext))
329
- return getUnhandledContext (*Data, Guid, NumCounters);
363
+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
364
+ CtxRoot);
330
365
331
366
// if the callee isn't the expected one, return scratch.
332
367
// Signal handler(s) could have been invoked at any point in the execution.
@@ -344,7 +379,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
344
379
// for that case.
345
380
auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
346
381
if (ExpectedCallee != Callee)
347
- return getUnhandledContext (*Data, Guid, NumCounters);
382
+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
383
+ CtxRoot);
348
384
349
385
auto *Callsite = *CallsiteContext;
350
386
// in the case of indirect calls, we will have all seen targets forming a
@@ -366,40 +402,26 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
366
402
return Ret;
367
403
}
368
404
369
- ContextNode *__llvm_ctx_profile_start_context (
370
- FunctionData *FData, GUID Guid, uint32_t Counters,
371
- uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
372
- IsUnderContext = true ;
373
-
374
- auto *Root = FData->getOrAllocateContextRoot ();
375
-
376
- __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
377
- __sanitizer::memory_order_relaxed);
405
+ ContextNode *__llvm_ctx_profile_start_context (FunctionData *FData, GUID Guid,
406
+ uint32_t Counters,
407
+ uint32_t Callsites) {
378
408
379
- if (!Root->FirstMemBlock ) {
380
- setupContext (Root, Guid, Counters, Callsites);
381
- }
382
- if (Root->Taken .TryLock ()) {
383
- __llvm_ctx_profile_current_context_root = Root;
384
- onContextEnter (*Root->FirstNode );
385
- return Root->FirstNode ;
386
- }
387
- // If this thread couldn't take the lock, return scratch context.
388
- __llvm_ctx_profile_current_context_root = nullptr ;
389
- return TheScratchContext;
409
+ return tryStartContextGivenRoot (FData->getOrAllocateContextRoot (), Guid,
410
+ Counters, Callsites);
390
411
}
391
412
392
413
void __llvm_ctx_profile_release_context (FunctionData *FData)
393
414
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
415
+ const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
416
+ if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
417
+ return ;
394
418
IsUnderContext = false ;
395
- if (__llvm_ctx_profile_current_context_root) {
396
- __llvm_ctx_profile_current_context_root = nullptr ;
397
- assert (FData->CtxRoot );
398
- FData->CtxRoot ->Taken .Unlock ();
399
- }
419
+ assert (FData->CtxRoot );
420
+ __llvm_ctx_profile_current_context_root = nullptr ;
421
+ FData->CtxRoot ->Taken .Unlock ();
400
422
}
401
423
402
- void __llvm_ctx_profile_start_collection () {
424
+ void __llvm_ctx_profile_start_collection (unsigned AutodetectDuration ) {
403
425
size_t NumMemUnits = 0 ;
404
426
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
405
427
&AllContextsMutex);
@@ -415,12 +437,24 @@ void __llvm_ctx_profile_start_collection() {
415
437
resetContextNode (*Root->FirstUnhandledCalleeNode );
416
438
__sanitizer::atomic_store_relaxed (&Root->TotalEntries , 0 );
417
439
}
440
+ if (AutodetectDuration) {
441
+ auto *RD = new (__sanitizer::InternalAlloc (sizeof (RootAutoDetector)))
442
+ RootAutoDetector (AllFunctionsData, RootDetector, AutodetectDuration);
443
+ RD->start ();
444
+ } else {
445
+ __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
446
+ }
418
447
__sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
419
- __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
420
448
}
421
449
422
450
bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
423
451
__sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
452
+ if (auto *RD = getRootDetector ()) {
453
+ __sanitizer::Printf (" [ctxprof] Expected the root autodetector to have "
454
+ " finished well before attempting to fetch a context" );
455
+ RD->join ();
456
+ }
457
+
424
458
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
425
459
&AllContextsMutex);
426
460
@@ -445,8 +479,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
445
479
const auto *Pos = reinterpret_cast <const FunctionData *>(
446
480
__sanitizer::atomic_load_relaxed (&AllFunctionsData));
447
481
for (; Pos; Pos = Pos->Next )
448
- Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
449
- Pos->FlatCtx ->counters_size ());
482
+ if (!Pos->CtxRoot )
483
+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
484
+ Pos->FlatCtx ->counters_size ());
450
485
Writer.endFlatSection ();
451
486
return true ;
452
487
}
0 commit comments