@@ -41,7 +41,44 @@ Arena *FlatCtxArena = nullptr;
41
41
42
42
// Set to true when we enter a root, and false when we exit - regardless if this
43
43
// thread collects a contextual profile for that root.
44
- __thread bool IsUnderContext = false ;
44
+ __thread int UnderContextRefCount = 0 ;
45
+ __thread void *volatile EnteredContextAddress = 0 ;
46
+
47
+ void onFunctionEntered (void *Address) {
48
+ UnderContextRefCount += (Address == EnteredContextAddress);
49
+ assert (UnderContextRefCount > 0 );
50
+ }
51
+
52
+ void onFunctionExited (void *Address) {
53
+ UnderContextRefCount -= (Address == EnteredContextAddress);
54
+ assert (UnderContextRefCount >= 0 );
55
+ }
56
+
57
+ // Returns true if it was entered the first time
58
+ bool rootEnterIsFirst (void * Address) {
59
+ bool Ret = true ;
60
+ if (!EnteredContextAddress) {
61
+ EnteredContextAddress = Address;
62
+ assert (UnderContextRefCount == 0 );
63
+ Ret = true ;
64
+ }
65
+ onFunctionEntered (Address);
66
+ return Ret;
67
+ }
68
+
69
+ // Return true if this also exits the root.
70
+ bool exitsRoot (void * Address) {
71
+ onFunctionExited (Address);
72
+ if (UnderContextRefCount == 0 ) {
73
+ EnteredContextAddress = nullptr ;
74
+ return true ;
75
+ }
76
+ return false ;
77
+
78
+ }
79
+
80
+ bool hasEnteredARoot () { return UnderContextRefCount > 0 ; }
81
+
45
82
__sanitizer::atomic_uint8_t ProfilingStarted = {};
46
83
47
84
__sanitizer::atomic_uintptr_t RootDetector = {};
@@ -287,62 +324,65 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
287
324
return Root;
288
325
}
289
326
290
- ContextNode *tryStartContextGivenRoot (ContextRoot *Root, GUID Guid,
291
- uint32_t Counters, uint32_t Callsites)
292
- SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
293
- IsUnderContext = true ;
294
- __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
295
- __sanitizer::memory_order_relaxed);
327
+ ContextNode *tryStartContextGivenRoot (
328
+ ContextRoot *Root, void *EntryAddress, GUID Guid, uint32_t Counters,
329
+ uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
330
+
331
+ if (rootEnterIsFirst (EntryAddress))
332
+ __sanitizer::atomic_fetch_add (&Root->TotalEntries , 1 ,
333
+ __sanitizer::memory_order_relaxed);
296
334
if (!Root->FirstMemBlock ) {
297
335
setupContext (Root, Guid, Counters, Callsites);
298
336
}
299
337
if (Root->Taken .TryLock ()) {
338
+ assert (__llvm_ctx_profile_current_context_root == nullptr );
300
339
__llvm_ctx_profile_current_context_root = Root;
301
340
onContextEnter (*Root->FirstNode );
302
341
return Root->FirstNode ;
303
342
}
304
343
// If this thread couldn't take the lock, return scratch context.
305
- __llvm_ctx_profile_current_context_root = nullptr ;
306
344
return TheScratchContext;
307
345
}
308
346
347
+ ContextNode *getOrStartContextOutsideCollection (FunctionData &Data,
348
+ ContextRoot *OwnCtxRoot,
349
+ void *Callee, GUID Guid,
350
+ uint32_t NumCounters,
351
+ uint32_t NumCallsites) {
352
+ // This must only be called when __llvm_ctx_profile_current_context_root is
353
+ // null.
354
+ assert (__llvm_ctx_profile_current_context_root == nullptr );
355
+ // OwnCtxRoot is Data.CtxRoot. Since it's volatile, and is used by the caller,
356
+ // pre-load it.
357
+ assert (Data.CtxRoot == OwnCtxRoot);
358
+ // If we have a root detector, try sampling.
359
+ // Otherwise - regardless if we started profiling or not, if Data.CtxRoot is
360
+ // allocated, try starting a context tree - basically, as-if
361
+ // __llvm_ctx_profile_start_context were called.
362
+ if (auto *RAD = getRootDetector ())
363
+ RAD->sample ();
364
+ else if (reinterpret_cast <uintptr_t >(OwnCtxRoot) > 1 )
365
+ return tryStartContextGivenRoot (OwnCtxRoot, Data.EntryAddress , Guid,
366
+ NumCounters, NumCallsites);
367
+
368
+ // If we didn't start profiling, or if we are under a context, just not
369
+ // collecting, return the scratch buffer.
370
+ if (hasEnteredARoot () ||
371
+ !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
372
+ return TheScratchContext;
373
+ return markAsScratch (
374
+ onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
375
+ }
376
+
309
377
ContextNode *getUnhandledContext (FunctionData &Data, void *Callee, GUID Guid,
310
378
uint32_t NumCounters, uint32_t NumCallsites,
311
- ContextRoot *CtxRoot) {
312
-
313
- // 1) if we are currently collecting a contextual profile, fetch a ContextNode
314
- // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
315
- // to (hopefully) offset the penalty of creating these contexts to before
316
- // profiling.
317
- //
318
- // 2) if we are under a root (regardless if this thread is collecting or not a
319
- // contextual profile for that root), do not collect a flat profile. We want
320
- // to keep flat profiles only for activations that can't happen under a root,
321
- // to avoid confusing profiles. We can, for example, combine flattened and
322
- // flat profiles meaningfully, as we wouldn't double-count anything.
323
- //
324
- // 3) to avoid lengthy startup, don't bother with flat profiles until the
325
- // profiling has started. We would reset them anyway when profiling starts.
326
- // HOWEVER. This does lose profiling for message pumps: those functions are
327
- // entered once and never exit. They should be assumed to be entered before
328
- // profiling starts - because profiling should start after the server is up
329
- // and running (which is equivalent to "message pumps are set up").
330
- if (!CtxRoot) {
331
- if (auto *RAD = getRootDetector ())
332
- RAD->sample ();
333
- else if (auto *CR = Data.CtxRoot ) {
334
- if (canBeRoot (CR))
335
- return tryStartContextGivenRoot (CR, Guid, NumCounters, NumCallsites);
336
- }
337
- if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
338
- return TheScratchContext;
339
- else
340
- return markAsScratch (
341
- onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
342
- }
343
- auto [Iter, Ins] = CtxRoot->Unhandled .insert ({Guid, nullptr });
379
+ ContextRoot &CtxRoot) {
380
+ // This nust only be called when
381
+ // __llvm_ctx_profile_current_context_root is not null
382
+ assert (__llvm_ctx_profile_current_context_root != nullptr );
383
+ auto [Iter, Ins] = CtxRoot.Unhandled .insert ({Guid, nullptr });
344
384
if (Ins)
345
- Iter->second = getCallsiteSlow (Guid, &CtxRoot-> FirstUnhandledCalleeNode ,
385
+ Iter->second = getCallsiteSlow (Guid, &CtxRoot. FirstUnhandledCalleeNode ,
346
386
NumCounters, 0 );
347
387
return markAsScratch (onContextEnter (*Iter->second ));
348
388
}
@@ -351,10 +391,13 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
351
391
GUID Guid, uint32_t NumCounters,
352
392
uint32_t NumCallsites) {
353
393
auto *CtxRoot = __llvm_ctx_profile_current_context_root;
354
- // fast "out" if we're not even doing contextual collection.
394
+ auto *OwnCtxRoot = Data-> CtxRoot ;
355
395
if (!CtxRoot)
356
- return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
357
- nullptr );
396
+ return getOrStartContextOutsideCollection (*Data, OwnCtxRoot, Callee, Guid,
397
+ NumCounters, NumCallsites);
398
+ onFunctionEntered (Callee);
399
+ assert (canBeRoot (CtxRoot));
400
+ // should we re-enter the root we're currently collecting,
358
401
359
402
// also fast "out" if the caller is scratch. We can see if it's scratch by
360
403
// looking at the interior pointer into the subcontexts vector that the caller
@@ -364,7 +407,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
364
407
auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
365
408
if (!CallsiteContext || isScratch (CallsiteContext))
366
409
return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
367
- CtxRoot);
410
+ * CtxRoot);
368
411
369
412
// if the callee isn't the expected one, return scratch.
370
413
// Signal handler(s) could have been invoked at any point in the execution.
@@ -383,7 +426,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
383
426
auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
384
427
if (ExpectedCallee != Callee)
385
428
return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
386
- CtxRoot);
429
+ * CtxRoot);
387
430
388
431
auto *Callsite = *CallsiteContext;
389
432
// in the case of indirect calls, we will have all seen targets forming a
@@ -410,16 +453,20 @@ ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid,
410
453
uint32_t Callsites) {
411
454
auto *Root = FData->getOrAllocateContextRoot ();
412
455
assert (canBeRoot (Root));
413
- return tryStartContextGivenRoot (Root, Guid, Counters, Callsites);
456
+ auto *EntryAddress = FData->EntryAddress ;
457
+ return tryStartContextGivenRoot (Root, EntryAddress, Guid, Counters,
458
+ Callsites);
414
459
}
415
460
416
461
void __llvm_ctx_profile_release_context (FunctionData *FData)
417
462
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
463
+ if (!exitsRoot (FData->EntryAddress ))
464
+ return ;
418
465
const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
419
466
auto *CR = FData->CtxRoot ;
420
467
if (!CurrentRoot || CR != CurrentRoot)
421
468
return ;
422
- IsUnderContext = false ;
469
+
423
470
assert (CR && canBeRoot (CR));
424
471
__llvm_ctx_profile_current_context_root = nullptr ;
425
472
CR->Taken .Unlock ();
@@ -500,6 +547,10 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
500
547
void __llvm_ctx_profile_free () {
501
548
__sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
502
549
{
550
+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
551
+ if (auto *RD = getRootDetector ()) {
552
+ RD->join ();
553
+ }
503
554
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
504
555
&AllContextsMutex);
505
556
for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
@@ -522,5 +573,7 @@ void __llvm_ctx_profile_free() {
522
573
}
523
574
524
575
FlatCtxArenaHead = nullptr ;
576
+ UnderContextRefCount = 0 ;
577
+ EnteredContextAddress = nullptr ;
525
578
}
526
579
}
0 commit comments