8
8
9
9
#include " CtxInstrProfiling.h"
10
10
#include " sanitizer_common/sanitizer_allocator_internal.h"
11
+ #include " sanitizer_common/sanitizer_atomic.h"
12
+ #include " sanitizer_common/sanitizer_atomic_clang.h"
11
13
#include " sanitizer_common/sanitizer_common.h"
12
14
#include " sanitizer_common/sanitizer_dense_map.h"
13
15
#include " sanitizer_common/sanitizer_libc.h"
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex;
27
29
SANITIZER_GUARDED_BY (AllContextsMutex)
28
30
__sanitizer::Vector<ContextRoot *> AllContextRoots;
29
31
32
+ __sanitizer::atomic_uintptr_t AllFunctionsData = {};
33
+
34
+ // Keep all the functions for which we collect a flat profile in a linked list.
35
+ __sanitizer::SpinMutex FlatCtxArenaMutex;
36
+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
37
+ Arena *FlatCtxArenaHead = nullptr ;
38
+ SANITIZER_GUARDED_BY (FlatCtxArenaMutex)
39
+ Arena *FlatCtxArena = nullptr ;
40
+
41
+ // Set to true when we enter a root, and false when we exit - regardless if this
42
+ // thread collects a contextual profile for that root.
43
+ __thread bool IsUnderContext = false ;
44
+ __sanitizer::atomic_uint8_t ProfilingStarted = {};
45
+
30
46
// utility to taint a pointer by setting the LSB. There is an assumption
31
47
// throughout that the addresses of contexts are even (really, they should be
32
48
// align(8), but "even"-ness is the minimum assumption)
@@ -109,7 +125,10 @@ void resetContextNode(ContextNode &Node) {
109
125
resetContextNode (*Next);
110
126
}
111
127
112
- void onContextEnter (ContextNode &Node) { ++Node.counters ()[0 ]; }
128
+ ContextNode *onContextEnter (ContextNode &Node) {
129
+ ++Node.counters ()[0 ];
130
+ return &Node;
131
+ }
113
132
114
133
} // namespace
115
134
@@ -182,12 +201,75 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
182
201
return Ret;
183
202
}
184
203
185
- ContextNode *__llvm_ctx_profile_get_context (void *Callee, GUID Guid,
186
- uint32_t NumCounters,
204
+ ContextNode *getFlatProfile (FunctionData &Data, GUID Guid,
205
+ uint32_t NumCounters) {
206
+ if (ContextNode *Existing = Data.FlatCtx )
207
+ return Existing;
208
+ {
209
+ // We could instead try to take the lock and, if that fails, return
210
+ // TheScratchContext. But that could leave message pump loops more sparsely
211
+ // profiled than everything else. Maybe that doesn't matter, and we can
212
+ // optimize this later.
213
+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L (&Data.Mutex );
214
+ if (ContextNode *Existing = Data.FlatCtx )
215
+ return Existing;
216
+
217
+ auto NeededSize = ContextNode::getAllocSize (NumCounters, 0 );
218
+ char *AllocBuff = nullptr ;
219
+ {
220
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL (
221
+ &FlatCtxArenaMutex);
222
+ if (FlatCtxArena)
223
+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
224
+ if (!AllocBuff) {
225
+ FlatCtxArena = Arena::allocateNewArena (getArenaAllocSize (NeededSize),
226
+ FlatCtxArena);
227
+ AllocBuff = FlatCtxArena->tryBumpAllocate (NeededSize);
228
+ }
229
+ if (!FlatCtxArenaHead)
230
+ FlatCtxArenaHead = FlatCtxArena;
231
+ }
232
+ auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
233
+ Data.FlatCtx = Ret;
234
+
235
+ Data.Next = reinterpret_cast <FunctionData *>(
236
+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
237
+ while (!__sanitizer::atomic_compare_exchange_strong (
238
+ &AllFunctionsData, reinterpret_cast <uintptr_t *>(&Data.Next ),
239
+ reinterpret_cast <uintptr_t >(&Data),
240
+ __sanitizer::memory_order_release)) {
241
+ }
242
+ }
243
+
244
+ return Data.FlatCtx ;
245
+ }
246
+
247
+ ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
248
+ uint32_t NumCounters) {
249
+ // 1) if we are under a root (regardless if this thread is collecting or not a
250
+ // contextual profile for that root), do not collect a flat profile. We want
251
+ // to keep flat profiles only for activations that can't happen under a root,
252
+ // to avoid confusing profiles. We can, for example, combine flattened and
253
+ // flat profiles meaningfully, as we wouldn't double-count anything.
254
+ //
255
+ // 2) to avoid lengthy startup, don't bother with flat profiles until the
256
+ // profiling started. We would reset them anyway when profiling starts.
257
+ // HOWEVER. This does loose profiling for message pumps: those functions are
258
+ // entered once and never exit. They should be assumed to be entered before
259
+ // profiling starts - because profiling should start after the server is up
260
+ // and running (which is equivalent to "message pumps are set up").
261
+ if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
262
+ return TheScratchContext;
263
+ return markAsScratch (
264
+ onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
265
+ }
266
+
267
+ ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
268
+ GUID Guid, uint32_t NumCounters,
187
269
uint32_t NumCallsites) {
188
270
// fast "out" if we're not even doing contextual collection.
189
271
if (!__llvm_ctx_profile_current_context_root)
190
- return TheScratchContext ;
272
+ return getUnhandledContext (*Data, Guid, NumCounters) ;
191
273
192
274
// also fast "out" if the caller is scratch. We can see if it's scratch by
193
275
// looking at the interior pointer into the subcontexts vector that the caller
@@ -196,7 +278,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
196
278
// precisely, aligned - 8 values)
197
279
auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
198
280
if (!CallsiteContext || isScratch (CallsiteContext))
199
- return TheScratchContext ;
281
+ return getUnhandledContext (*Data, Guid, NumCounters) ;
200
282
201
283
// if the callee isn't the expected one, return scratch.
202
284
// Signal handler(s) could have been invoked at any point in the execution.
@@ -214,7 +296,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
214
296
// for that case.
215
297
auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
216
298
if (ExpectedCallee != Callee)
217
- return TheScratchContext ;
299
+ return getUnhandledContext (*Data, Guid, NumCounters) ;
218
300
219
301
auto *Callsite = *CallsiteContext;
220
302
// in the case of indirect calls, we will have all seen targets forming a
@@ -257,6 +339,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
257
339
ContextNode *__llvm_ctx_profile_start_context (
258
340
ContextRoot *Root, GUID Guid, uint32_t Counters,
259
341
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
342
+ IsUnderContext = true ;
260
343
if (!Root->FirstMemBlock ) {
261
344
setupContext (Root, Guid, Counters, Callsites);
262
345
}
@@ -272,6 +355,7 @@ ContextNode *__llvm_ctx_profile_start_context(
272
355
273
356
void __llvm_ctx_profile_release_context (ContextRoot *Root)
274
357
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
358
+ IsUnderContext = false ;
275
359
if (__llvm_ctx_profile_current_context_root) {
276
360
__llvm_ctx_profile_current_context_root = nullptr ;
277
361
Root->Taken .Unlock ();
@@ -291,10 +375,12 @@ void __llvm_ctx_profile_start_collection() {
291
375
292
376
resetContextNode (*Root->FirstNode );
293
377
}
378
+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
294
379
__sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
295
380
}
296
381
297
382
bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
383
+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
298
384
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
299
385
&AllContextsMutex);
300
386
@@ -310,17 +396,43 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
310
396
Writer.writeContextual (*Root->FirstNode );
311
397
}
312
398
Writer.endContextSection ();
399
+ Writer.startFlatSection ();
400
+ // The list progresses behind the head, so taking this snapshot allows the
401
+ // list to grow concurrently without causing a race condition with our
402
+ // traversing it.
403
+ const auto *Pos = reinterpret_cast <const FunctionData *>(
404
+ __sanitizer::atomic_load_relaxed (&AllFunctionsData));
405
+ for (; Pos; Pos = Pos->Next )
406
+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
407
+ Pos->FlatCtx ->counters_size ());
408
+ Writer.endFlatSection ();
313
409
return true ;
314
410
}
315
411
316
412
void __llvm_ctx_profile_free () {
317
- __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
318
- &AllContextsMutex);
319
- for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
320
- for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
413
+ __sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
414
+ {
415
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
416
+ &AllContextsMutex);
417
+ for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
418
+ for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
419
+ auto *C = A;
420
+ A = A->next ();
421
+ __sanitizer::InternalFree (C);
422
+ }
423
+ AllContextRoots.Reset ();
424
+ }
425
+ __sanitizer::atomic_store_relaxed (&AllFunctionsData, 0U );
426
+ {
427
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
428
+ &FlatCtxArenaMutex);
429
+ FlatCtxArena = nullptr ;
430
+ for (auto *A = FlatCtxArenaHead; A;) {
321
431
auto *C = A;
322
- A = A ->next ();
432
+ A = C ->next ();
323
433
__sanitizer::InternalFree (C);
324
434
}
325
- AllContextRoots.Reset ();
435
+
436
+ FlatCtxArenaHead = nullptr ;
437
+ }
326
438
}
0 commit comments