Skip to content

Commit d2502c8

Browse files
committed
Reentry
1 parent 928963c commit d2502c8

File tree

5 files changed

+269
-89
lines changed

5 files changed

+269
-89
lines changed

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 102 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,44 @@ Arena *FlatCtxArena = nullptr;
4141

4242
// Set to true when we enter a root, and false when we exit - regardless if this
4343
// thread collects a contextual profile for that root.
44-
__thread bool IsUnderContext = false;
44+
__thread int UnderContextRefCount = 0;
45+
__thread void *volatile EnteredContextAddress = 0;
46+
47+
void onFunctionEntered(void *Address) {
48+
UnderContextRefCount += (Address == EnteredContextAddress);
49+
assert(UnderContextRefCount > 0);
50+
}
51+
52+
void onFunctionExited(void *Address) {
53+
UnderContextRefCount -= (Address == EnteredContextAddress);
54+
assert(UnderContextRefCount >= 0);
55+
}
56+
57+
// Returns true if it was entered the first time
58+
bool rootEnterIsFirst(void* Address) {
59+
bool Ret = true;
60+
if (!EnteredContextAddress) {
61+
EnteredContextAddress = Address;
62+
assert(UnderContextRefCount == 0);
63+
Ret = true;
64+
}
65+
onFunctionEntered(Address);
66+
return Ret;
67+
}
68+
69+
// Return true if this also exits the root.
70+
bool exitsRoot(void* Address) {
71+
onFunctionExited(Address);
72+
if (UnderContextRefCount == 0) {
73+
EnteredContextAddress = nullptr;
74+
return true;
75+
}
76+
return false;
77+
78+
}
79+
80+
bool hasEnteredARoot() { return UnderContextRefCount > 0; }
81+
4582
__sanitizer::atomic_uint8_t ProfilingStarted = {};
4683

4784
__sanitizer::atomic_uintptr_t RootDetector = {};
@@ -287,62 +324,65 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() {
287324
return Root;
288325
}
289326

290-
ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid,
291-
uint32_t Counters, uint32_t Callsites)
292-
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
293-
IsUnderContext = true;
294-
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
295-
__sanitizer::memory_order_relaxed);
327+
ContextNode *tryStartContextGivenRoot(
328+
ContextRoot *Root, void *EntryAddress, GUID Guid, uint32_t Counters,
329+
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
330+
331+
if (rootEnterIsFirst(EntryAddress))
332+
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
333+
__sanitizer::memory_order_relaxed);
296334
if (!Root->FirstMemBlock) {
297335
setupContext(Root, Guid, Counters, Callsites);
298336
}
299337
if (Root->Taken.TryLock()) {
338+
assert(__llvm_ctx_profile_current_context_root == nullptr);
300339
__llvm_ctx_profile_current_context_root = Root;
301340
onContextEnter(*Root->FirstNode);
302341
return Root->FirstNode;
303342
}
304343
// If this thread couldn't take the lock, return scratch context.
305-
__llvm_ctx_profile_current_context_root = nullptr;
306344
return TheScratchContext;
307345
}
308346

347+
ContextNode *getOrStartContextOutsideCollection(FunctionData &Data,
348+
ContextRoot *OwnCtxRoot,
349+
void *Callee, GUID Guid,
350+
uint32_t NumCounters,
351+
uint32_t NumCallsites) {
352+
// This must only be called when __llvm_ctx_profile_current_context_root is
353+
// null.
354+
assert(__llvm_ctx_profile_current_context_root == nullptr);
355+
// OwnCtxRoot is Data.CtxRoot. Since it's volatile, and is used by the caller,
356+
// pre-load it.
357+
assert(Data.CtxRoot == OwnCtxRoot);
358+
// If we have a root detector, try sampling.
359+
// Otherwise - regardless if we started profiling or not, if Data.CtxRoot is
360+
// allocated, try starting a context tree - basically, as-if
361+
// __llvm_ctx_profile_start_context were called.
362+
if (auto *RAD = getRootDetector())
363+
RAD->sample();
364+
else if (reinterpret_cast<uintptr_t>(OwnCtxRoot) > 1)
365+
return tryStartContextGivenRoot(OwnCtxRoot, Data.EntryAddress, Guid,
366+
NumCounters, NumCallsites);
367+
368+
// If we didn't start profiling, or if we are under a context, just not
369+
// collecting, return the scratch buffer.
370+
if (hasEnteredARoot() ||
371+
!__sanitizer::atomic_load_relaxed(&ProfilingStarted))
372+
return TheScratchContext;
373+
return markAsScratch(
374+
onContextEnter(*getFlatProfile(Data, Callee, Guid, NumCounters)));
375+
}
376+
309377
ContextNode *getUnhandledContext(FunctionData &Data, void *Callee, GUID Guid,
310378
uint32_t NumCounters, uint32_t NumCallsites,
311-
ContextRoot *CtxRoot) {
312-
313-
// 1) if we are currently collecting a contextual profile, fetch a ContextNode
314-
// in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
315-
// to (hopefully) offset the penalty of creating these contexts to before
316-
// profiling.
317-
//
318-
// 2) if we are under a root (regardless if this thread is collecting or not a
319-
// contextual profile for that root), do not collect a flat profile. We want
320-
// to keep flat profiles only for activations that can't happen under a root,
321-
// to avoid confusing profiles. We can, for example, combine flattened and
322-
// flat profiles meaningfully, as we wouldn't double-count anything.
323-
//
324-
// 3) to avoid lengthy startup, don't bother with flat profiles until the
325-
// profiling has started. We would reset them anyway when profiling starts.
326-
// HOWEVER. This does lose profiling for message pumps: those functions are
327-
// entered once and never exit. They should be assumed to be entered before
328-
// profiling starts - because profiling should start after the server is up
329-
// and running (which is equivalent to "message pumps are set up").
330-
if (!CtxRoot) {
331-
if (auto *RAD = getRootDetector())
332-
RAD->sample();
333-
else if (auto *CR = Data.CtxRoot) {
334-
if (canBeRoot(CR))
335-
return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites);
336-
}
337-
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
338-
return TheScratchContext;
339-
else
340-
return markAsScratch(
341-
onContextEnter(*getFlatProfile(Data, Callee, Guid, NumCounters)));
342-
}
343-
auto [Iter, Ins] = CtxRoot->Unhandled.insert({Guid, nullptr});
379+
ContextRoot &CtxRoot) {
380+
// This nust only be called when
381+
// __llvm_ctx_profile_current_context_root is not null
382+
assert(__llvm_ctx_profile_current_context_root != nullptr);
383+
auto [Iter, Ins] = CtxRoot.Unhandled.insert({Guid, nullptr});
344384
if (Ins)
345-
Iter->second = getCallsiteSlow(Guid, &CtxRoot->FirstUnhandledCalleeNode,
385+
Iter->second = getCallsiteSlow(Guid, &CtxRoot.FirstUnhandledCalleeNode,
346386
NumCounters, 0);
347387
return markAsScratch(onContextEnter(*Iter->second));
348388
}
@@ -351,10 +391,13 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
351391
GUID Guid, uint32_t NumCounters,
352392
uint32_t NumCallsites) {
353393
auto *CtxRoot = __llvm_ctx_profile_current_context_root;
354-
// fast "out" if we're not even doing contextual collection.
394+
auto *OwnCtxRoot = Data->CtxRoot;
355395
if (!CtxRoot)
356-
return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites,
357-
nullptr);
396+
return getOrStartContextOutsideCollection(*Data, OwnCtxRoot, Callee, Guid,
397+
NumCounters, NumCallsites);
398+
onFunctionEntered(Callee);
399+
assert(canBeRoot(CtxRoot));
400+
// should we re-enter the root we're currently collecting,
358401

359402
// also fast "out" if the caller is scratch. We can see if it's scratch by
360403
// looking at the interior pointer into the subcontexts vector that the caller
@@ -364,7 +407,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
364407
auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]);
365408
if (!CallsiteContext || isScratch(CallsiteContext))
366409
return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites,
367-
CtxRoot);
410+
*CtxRoot);
368411

369412
// if the callee isn't the expected one, return scratch.
370413
// Signal handler(s) could have been invoked at any point in the execution.
@@ -383,7 +426,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
383426
auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]);
384427
if (ExpectedCallee != Callee)
385428
return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites,
386-
CtxRoot);
429+
*CtxRoot);
387430

388431
auto *Callsite = *CallsiteContext;
389432
// in the case of indirect calls, we will have all seen targets forming a
@@ -410,16 +453,20 @@ ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid,
410453
uint32_t Callsites) {
411454
auto *Root = FData->getOrAllocateContextRoot();
412455
assert(canBeRoot(Root));
413-
return tryStartContextGivenRoot(Root, Guid, Counters, Callsites);
456+
auto *EntryAddress = FData->EntryAddress;
457+
return tryStartContextGivenRoot(Root, EntryAddress, Guid, Counters,
458+
Callsites);
414459
}
415460

416461
void __llvm_ctx_profile_release_context(FunctionData *FData)
417462
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
463+
if (!exitsRoot(FData->EntryAddress))
464+
return;
418465
const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
419466
auto *CR = FData->CtxRoot;
420467
if (!CurrentRoot || CR != CurrentRoot)
421468
return;
422-
IsUnderContext = false;
469+
423470
assert(CR && canBeRoot(CR));
424471
__llvm_ctx_profile_current_context_root = nullptr;
425472
CR->Taken.Unlock();
@@ -500,6 +547,10 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
500547
void __llvm_ctx_profile_free() {
501548
__sanitizer::atomic_store_relaxed(&ProfilingStarted, false);
502549
{
550+
__sanitizer::atomic_store_relaxed(&ProfilingStarted, false);
551+
if (auto *RD = getRootDetector()) {
552+
RD->join();
553+
}
503554
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
504555
&AllContextsMutex);
505556
for (int I = 0, E = AllContextRoots.Size(); I < E; ++I)
@@ -522,5 +573,7 @@ void __llvm_ctx_profile_free() {
522573
}
523574

524575
FlatCtxArenaHead = nullptr;
576+
UnderContextRefCount = 0;
577+
EnteredContextAddress = nullptr;
525578
}
526579
}

compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,27 @@
33
#include <thread>
44

55
using namespace __ctx_profile;
6+
using namespace __asan;
67

78
class ContextTest : public ::testing::Test {
8-
void SetUp() override { Root.getOrAllocateContextRoot(); }
9+
int SomethingWithAddress = 0;
10+
void SetUp() override {
11+
Root.EntryAddress = &SomethingWithAddress;
12+
Root.getOrAllocateContextRoot();
13+
}
914
void TearDown() override { __llvm_ctx_profile_free(); }
1015

1116
public:
1217
FunctionData Root;
18+
void initializeFData(std::vector<FunctionData> &FData,
19+
const std::vector<int> &FuncAddresses, bool AsRoots) {
20+
ASSERT_EQ(FData.size(), FuncAddresses.size());
21+
for (size_t I = 0, E = FData.size(); I < E; ++I) {
22+
FData[I].EntryAddress = &FuncAddresses[I];
23+
if (AsRoots)
24+
FData[I].getOrAllocateContextRoot();
25+
}
26+
}
1327
};
1428

1529
TEST(ArenaTest, ZeroInit) {
@@ -85,7 +99,11 @@ TEST_F(ContextTest, Callsite) {
8599

86100
EXPECT_EQ(Subctx->size(), sizeof(ContextNode) + 3 * sizeof(uint64_t) +
87101
1 * sizeof(ContextNode *));
102+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
103+
__llvm_ctx_profile_release_context(&FData);
104+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
88105
__llvm_ctx_profile_release_context(&Root);
106+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
89107
}
90108

91109
TEST_F(ContextTest, ScratchNoCollectionProfilingNotStarted) {
@@ -122,11 +140,41 @@ TEST_F(ContextTest, ScratchNoCollectionProfilingStarted) {
122140
EXPECT_NE(FData.FlatCtx, nullptr);
123141
EXPECT_EQ(reinterpret_cast<uintptr_t>(FData.FlatCtx) + 1,
124142
reinterpret_cast<uintptr_t>(Ctx));
143+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
144+
__llvm_ctx_profile_release_context(&FData);
145+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
146+
}
147+
148+
TEST_F(ContextTest, RootCallingRootDoesNotChangeCurrentContext) {
149+
ASSERT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
150+
int FakeCalleeAddress[2]{0, 0};
151+
FunctionData FData[2];
152+
FData[0].EntryAddress = &FakeCalleeAddress[0];
153+
FData[1].EntryAddress = &FakeCalleeAddress[1];
154+
FData[0].getOrAllocateContextRoot();
155+
FData[1].getOrAllocateContextRoot();
156+
__llvm_ctx_profile_start_collection();
157+
auto *Ctx1 = __llvm_ctx_profile_get_context(&FData[0], &FakeCalleeAddress[0],
158+
1234U, 1U, 1U);
159+
EXPECT_EQ(Ctx1, FData[0].CtxRoot->FirstNode);
160+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, FData[0].CtxRoot);
161+
162+
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress[0];
163+
__llvm_ctx_profile_callsite[0] = &Ctx1->subContexts()[0];
164+
auto *Ctx2 =
165+
__llvm_ctx_profile_get_context(&FData[1], &FakeCalleeAddress[1], 2, 1, 0);
166+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, FData[0].CtxRoot);
167+
__llvm_ctx_profile_release_context(&FData[1]);
168+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, FData[0].CtxRoot);
169+
__llvm_ctx_profile_release_context(&FData[0]);
170+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
125171
}
126172

127173
TEST_F(ContextTest, ScratchDuringCollection) {
128174
__llvm_ctx_profile_start_collection();
129175
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
176+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
177+
130178
int FakeCalleeAddress = 0;
131179
int OtherFakeCalleeAddress = 0;
132180
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
@@ -164,7 +212,71 @@ TEST_F(ContextTest, ScratchDuringCollection) {
164212
EXPECT_TRUE(isScratch(Subctx3));
165213
EXPECT_EQ(FData[2].FlatCtx, nullptr);
166214

215+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
216+
__llvm_ctx_profile_release_context(&FData[2]);
217+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
218+
__llvm_ctx_profile_release_context(&FData[1]);
219+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
220+
__llvm_ctx_profile_release_context(&FData[0]);
221+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
222+
__llvm_ctx_profile_release_context(&Root);
223+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
224+
}
225+
226+
TEST_F(ContextTest, RecursiveRootExplicitlyRegistered) {
227+
__llvm_ctx_profile_start_collection();
228+
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
229+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
230+
231+
auto *Subctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
232+
EXPECT_TRUE(isScratch(Subctx));
233+
234+
EXPECT_EQ(__sanitizer::atomic_load_relaxed(&Root.CtxRoot->TotalEntries), 1U);
235+
236+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
237+
__llvm_ctx_profile_release_context(&Root);
238+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
239+
__llvm_ctx_profile_release_context(&Root);
240+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
241+
}
242+
243+
TEST_F(ContextTest, RecursiveRootAutoDiscovered) {
244+
__llvm_ctx_profile_start_collection();
245+
auto *Ctx =
246+
__llvm_ctx_profile_get_context(&Root, Root.EntryAddress, 1, 10, 4);
247+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
248+
249+
auto *Subctx =
250+
__llvm_ctx_profile_get_context(&Root, Root.EntryAddress, 1, 10, 4);
251+
EXPECT_TRUE(isScratch(Subctx));
252+
253+
EXPECT_EQ(__sanitizer::atomic_load_relaxed(&Root.CtxRoot->TotalEntries), 1U);
254+
255+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
167256
__llvm_ctx_profile_release_context(&Root);
257+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
258+
__llvm_ctx_profile_release_context(&Root);
259+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
260+
}
261+
262+
TEST_F(ContextTest, RootEntersOtherRoot) {
263+
__llvm_ctx_profile_start_collection();
264+
FData Roots[2];
265+
std::vector<int> Addresses(2);
266+
initializeFData(Roots, Addresses, true);
267+
auto *Ctx = __llvm_ctx_profile_start_context(&Roots[0], 1, 10, 4);
268+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Roots[0].CtxRoot);
269+
270+
auto *Subctx = __llvm_ctx_profile_start_context(&Roots[1], 1, 10, 4);
271+
EXPECT_FALSE(isScratch(Subctx));
272+
273+
EXPECT_EQ(__sanitizer::atomic_load_relaxed(&Root.CtxRoot->TotalEntries), 1U);
274+
275+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
276+
__llvm_ctx_profile_release_context(&Root);
277+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot);
278+
__llvm_ctx_profile_release_context(&Root);
279+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
168280
}
169281

170282
TEST_F(ContextTest, NeedMoreMemory) {
@@ -185,6 +297,7 @@ TEST_F(ContextTest, NeedMoreMemory) {
185297
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
186298
EXPECT_NE(CurrentMem, CtxRoot.CurrentMem);
187299
EXPECT_NE(CtxRoot.CurrentMem, nullptr);
300+
__llvm_ctx_profile_release_context(&Root);
188301
}
189302

190303
TEST_F(ContextTest, ConcurrentRootCollection) {

0 commit comments

Comments
 (0)