8
8
9
9
#include " CtxInstrProfiling.h"
10
10
#include " sanitizer_common/sanitizer_allocator_internal.h"
11
+ #include " sanitizer_common/sanitizer_atomic.h"
12
+ #include " sanitizer_common/sanitizer_atomic_clang.h"
11
13
#include " sanitizer_common/sanitizer_common.h"
12
14
#include " sanitizer_common/sanitizer_dense_map.h"
13
15
#include " sanitizer_common/sanitizer_libc.h"
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex;
27
29
SANITIZER_GUARDED_BY (AllContextsMutex)
28
30
__sanitizer::Vector<ContextRoot *> AllContextRoots;
29
31
32
+ __sanitizer::atomic_uintptr_t AllFunctionsData = {};
33
+
34
+ // Keep all the functions for which we collect a flat profile in a linked list.
35
+ __sanitizer::SpinMutex FlatCtxArenaMutex;
36
+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
37
+ Arena* FlatCtxArenaHead = nullptr ;
38
+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
39
+ Arena* FlatCtxArena = nullptr ;
40
+
41
+ // Set to true when we enter a root, and false when we exit - regardless if this
42
+ // thread collects a contextual profile for that root.
43
+ __thread bool IsUnderContext = false ;
44
+ __sanitizer::atomic_uint8_t ProfilingStarted = {};
45
+
30
46
// utility to taint a pointer by setting the LSB. There is an assumption
31
47
// throughout that the addresses of contexts are even (really, they should be
32
48
// align(8), but "even"-ness is the minimum assumption)
@@ -109,7 +125,10 @@ void resetContextNode(ContextNode &Node) {
109
125
resetContextNode (*Next);
110
126
}
111
127
112
- void onContextEnter (ContextNode &Node) { ++Node.counters ()[0 ]; }
128
+ ContextNode *onContextEnter (ContextNode &Node) {
129
+ ++Node.counters ()[0 ];
130
+ return &Node;
131
+ }
113
132
114
133
} // namespace
115
134
@@ -182,12 +201,74 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
182
201
return Ret;
183
202
}
184
203
185
- ContextNode *__llvm_ctx_profile_get_context (void *Callee, GUID Guid,
186
- uint32_t NumCounters,
204
+ ContextNode *getFlatProfile (FunctionData &Data, GUID Guid, uint32_t NumCounters) {
205
+ if (ContextNode *Existing = Data.FlatCtx )
206
+ return Existing;
207
+ {
208
+ // We could instead try to take the lock and, if that fails, return
209
+ // TheScratchContext. But that could leave message pump loops more sparsely
210
+ // profiled than everything else. Maybe that doesn't matter, and we can
211
+ // optimize this later.
212
+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L (&Data.Mutex );
213
+ if (ContextNode *Existing = Data.FlatCtx )
214
+ return Existing;
215
+
216
+ auto NeededSize = ContextNode::getAllocSize (NumCounters, 0 );
217
+ char *AllocBuff = nullptr ;
218
+ {
219
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL (
220
+ &FlatCtxArenaMutex);
221
+ if (FlatCtxArena)
222
+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
223
+ if (!AllocBuff) {
224
+ FlatCtxArena = Arena::allocateNewArena (getArenaAllocSize (NeededSize),
225
+ FlatCtxArena);
226
+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
227
+ }
228
+ if (!FlatCtxArenaHead)
229
+ FlatCtxArenaHead = FlatCtxArena;
230
+ }
231
+ auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
232
+ Data.FlatCtx = Ret;
233
+
234
+ Data.Next = reinterpret_cast <FunctionData *>(
235
+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
236
+ while (!__sanitizer::atomic_compare_exchange_strong (
237
+ &AllFunctionsData, reinterpret_cast <uintptr_t *>(&Data.Next ),
238
+ reinterpret_cast <uintptr_t >(&Data),
239
+ __sanitizer::memory_order_release)) {
240
+ }
241
+ }
242
+
243
+ return Data.FlatCtx ;
244
+ }
245
+
246
+ ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
247
+ uint32_t NumCounters) {
248
+ // 1) if we are under a root (regardless if this thread is collecting or not a
249
+ // contextual profile for that root), do not collect a flat profile. We want
250
+ // to keep flat profiles only for activations that can't happen under a root,
251
+ // to avoid confusing profiles. We can, for example, combine flattened and
252
+ // flat profiles meaningfully, as we wouldn't double-count anything.
253
+ //
254
+ // 2) to avoid lengthy startup, don't bother with flat profiles until the
255
+ // profiling started. We would reset them anyway when profiling starts.
256
+ // HOWEVER. This does loose profiling for message pumps: those functions are
257
+ // entered once and never exit. They should be assumed to be entered before
258
+ // profiling starts - because profiling should start after the server is up
259
+ // and running (which is equivalent to "message pumps are set up").
260
+ if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
261
+ return TheScratchContext;
262
+ return markAsScratch (
263
+ onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
264
+ }
265
+
266
+ ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
267
+ GUID Guid, uint32_t NumCounters,
187
268
uint32_t NumCallsites) {
188
269
// fast "out" if we're not even doing contextual collection.
189
270
if (!__llvm_ctx_profile_current_context_root)
190
- return TheScratchContext ;
271
+ return getUnhandledContext (*Data, Guid, NumCounters) ;
191
272
192
273
// also fast "out" if the caller is scratch. We can see if it's scratch by
193
274
// looking at the interior pointer into the subcontexts vector that the caller
@@ -196,7 +277,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
196
277
// precisely, aligned - 8 values)
197
278
auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
198
279
if (!CallsiteContext || isScratch (CallsiteContext))
199
- return TheScratchContext ;
280
+ return getUnhandledContext (*Data, Guid, NumCounters) ;
200
281
201
282
// if the callee isn't the expected one, return scratch.
202
283
// Signal handler(s) could have been invoked at any point in the execution.
@@ -214,7 +295,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
214
295
// for that case.
215
296
auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
216
297
if (ExpectedCallee != Callee)
217
- return TheScratchContext ;
298
+ return getUnhandledContext (*Data, Guid, NumCounters) ;
218
299
219
300
auto *Callsite = *CallsiteContext;
220
301
// in the case of indirect calls, we will have all seen targets forming a
@@ -257,6 +338,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
257
338
ContextNode *__llvm_ctx_profile_start_context (
258
339
ContextRoot *Root, GUID Guid, uint32_t Counters,
259
340
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
341
+ IsUnderContext = true ;
260
342
if (!Root->FirstMemBlock ) {
261
343
setupContext (Root, Guid, Counters, Callsites);
262
344
}
@@ -272,6 +354,7 @@ ContextNode *__llvm_ctx_profile_start_context(
272
354
273
355
void __llvm_ctx_profile_release_context (ContextRoot *Root)
274
356
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
357
+ IsUnderContext = false ;
275
358
if (__llvm_ctx_profile_current_context_root) {
276
359
__llvm_ctx_profile_current_context_root = nullptr ;
277
360
Root->Taken .Unlock ();
@@ -291,10 +374,12 @@ void __llvm_ctx_profile_start_collection() {
291
374
292
375
resetContextNode (*Root->FirstNode );
293
376
}
377
+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
294
378
__sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
295
379
}
296
380
297
381
bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
382
+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
298
383
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
299
384
&AllContextsMutex);
300
385
@@ -310,17 +395,42 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
310
395
Writer.writeContextual (*Root->FirstNode );
311
396
}
312
397
Writer.endContextSection ();
398
+ Writer.startFlatSection ();
399
+ // The list progresses behind the head, so taking this snapshot allows the
400
+ // list to grow concurrently without causing a race condition with our
401
+ // traversing it.
402
+ const auto *Pos = reinterpret_cast <const FunctionData *>(
403
+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
404
+ for (; Pos; Pos = Pos->Next )
405
+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
406
+ Pos->FlatCtx ->counters_size ());
407
+ Writer.endFlatSection ();
313
408
return true ;
314
409
}
315
410
316
411
void __llvm_ctx_profile_free () {
317
- __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
318
- &AllContextsMutex);
319
- for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
320
- for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
412
+ {
413
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
414
+ &AllContextsMutex);
415
+ for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
416
+ for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
417
+ auto *C = A;
418
+ A = A->next ();
419
+ __sanitizer::InternalFree (C);
420
+ }
421
+ AllContextRoots.Reset ();
422
+ }
423
+ __sanitizer::atomic_store_relaxed (&AllFunctionsData, 0U );
424
+ {
425
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
426
+ &FlatCtxArenaMutex);
427
+ FlatCtxArena = nullptr ;
428
+ for (auto *A = FlatCtxArenaHead; A;) {
321
429
auto *C = A;
322
- A = A ->next ();
430
+ A = C ->next ();
323
431
__sanitizer::InternalFree (C);
324
432
}
325
- AllContextRoots.Reset ();
433
+
434
+ FlatCtxArenaHead = nullptr ;
435
+ }
326
436
}
0 commit comments