Skip to content

Commit 3949d7e

Browse files
committed
[ctxprof] Flat profile collection
1 parent a316516 commit 3949d7e

File tree

8 files changed

+253
-34
lines changed

8 files changed

+253
-34
lines changed

compiler-rt/lib/ctx_profile/CtxInstrContextNode.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ class ProfileWriter {
122122
virtual void startContextSection() = 0;
123123
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
124124
virtual void endContextSection() = 0;
125+
126+
virtual void startFlatSection() = 0;
127+
virtual void writeFlat(ctx_profile::GUID Guid, const uint64_t *Buffer,
128+
size_t BufferSize) = 0;
129+
virtual void endFlatSection() = 0;
130+
125131
virtual ~ProfileWriter() = default;
126132
};
127133
} // namespace ctx_profile

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include "CtxInstrProfiling.h"
1010
#include "sanitizer_common/sanitizer_allocator_internal.h"
11+
#include "sanitizer_common/sanitizer_atomic.h"
12+
#include "sanitizer_common/sanitizer_atomic_clang.h"
1113
#include "sanitizer_common/sanitizer_common.h"
1214
#include "sanitizer_common/sanitizer_dense_map.h"
1315
#include "sanitizer_common/sanitizer_libc.h"
@@ -27,6 +29,17 @@ __sanitizer::SpinMutex AllContextsMutex;
2729
SANITIZER_GUARDED_BY(AllContextsMutex)
2830
__sanitizer::Vector<ContextRoot *> AllContextRoots;
2931

32+
__sanitizer::atomic_uintptr_t AllFunctionsData = {};
33+
34+
__sanitizer::SpinMutex FlatCtxArenaMutex;
35+
SANITIZER_GUARDED_BY(FlatCtxArenaMutex)
36+
Arena* FlatCtxArenaHead = nullptr;
37+
SANITIZER_GUARDED_BY(FlatCtxArenaMutex)
38+
Arena* FlatCtxArena = nullptr;
39+
40+
__thread bool IsUnderContext = false;
41+
__sanitizer::atomic_uint8_t ProfilingStarted = {};
42+
3043
// utility to taint a pointer by setting the LSB. There is an assumption
3144
// throughout that the addresses of contexts are even (really, they should be
3245
// align(8), but "even"-ness is the minimum assumption)
@@ -109,7 +122,10 @@ void resetContextNode(ContextNode &Node) {
109122
resetContextNode(*Next);
110123
}
111124

112-
void onContextEnter(ContextNode &Node) { ++Node.counters()[0]; }
125+
ContextNode *onContextEnter(ContextNode &Node) {
126+
++Node.counters()[0];
127+
return &Node;
128+
}
113129

114130
} // namespace
115131

@@ -182,12 +198,62 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
182198
return Ret;
183199
}
184200

185-
ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
186-
uint32_t NumCounters,
201+
ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, uint32_t NumCounters) {
202+
if (ContextNode *Existing = Data.FlatCtx)
203+
return Existing;
204+
{
205+
// We could instead try to take the lock and, if that fails, return
206+
// TheScratchContext. But that could leave message pump loops more sparsely
207+
// profiled than everything else. Maybe that doesn't matter, and we can
208+
// optimize this later.
209+
__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Data.Mutex);
210+
if (ContextNode *Existing = Data.FlatCtx)
211+
return Existing;
212+
213+
auto NeededSize = ContextNode::getAllocSize(NumCounters, 0);
214+
char *AllocBuff = nullptr;
215+
{
216+
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL(
217+
&FlatCtxArenaMutex);
218+
if (FlatCtxArena)
219+
AllocBuff = FlatCtxArena->tryBumpAllocate(NeededSize);
220+
if (!AllocBuff) {
221+
FlatCtxArena = Arena::allocateNewArena(getArenaAllocSize(NeededSize),
222+
FlatCtxArena);
223+
AllocBuff = FlatCtxArena->tryBumpAllocate(NeededSize);
224+
}
225+
if (!FlatCtxArenaHead)
226+
FlatCtxArenaHead = FlatCtxArena;
227+
}
228+
auto *Ret = allocContextNode(AllocBuff, Guid, NumCounters, 0);
229+
Data.FlatCtx = Ret;
230+
231+
Data.Next = reinterpret_cast<FunctionData *>(
232+
__sanitizer::atomic_load_relaxed(&AllFunctionsData));
233+
while (!__sanitizer::atomic_compare_exchange_strong(
234+
&AllFunctionsData, reinterpret_cast<uintptr_t *>(&Data.Next),
235+
reinterpret_cast<uintptr_t>(&Data),
236+
__sanitizer::memory_order_release)) {
237+
}
238+
}
239+
240+
return Data.FlatCtx;
241+
}
242+
243+
ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
244+
uint32_t NumCounters) {
245+
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
246+
return TheScratchContext;
247+
return markAsScratch(
248+
onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
249+
}
250+
251+
ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
252+
GUID Guid, uint32_t NumCounters,
187253
uint32_t NumCallsites) {
188254
// fast "out" if we're not even doing contextual collection.
189255
if (!__llvm_ctx_profile_current_context_root)
190-
return TheScratchContext;
256+
return getUnhandledContext(*Data, Guid, NumCounters);
191257

192258
// also fast "out" if the caller is scratch. We can see if it's scratch by
193259
// looking at the interior pointer into the subcontexts vector that the caller
@@ -257,6 +323,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
257323
ContextNode *__llvm_ctx_profile_start_context(
258324
ContextRoot *Root, GUID Guid, uint32_t Counters,
259325
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
326+
IsUnderContext = true;
260327
if (!Root->FirstMemBlock) {
261328
setupContext(Root, Guid, Counters, Callsites);
262329
}
@@ -272,6 +339,7 @@ ContextNode *__llvm_ctx_profile_start_context(
272339

273340
void __llvm_ctx_profile_release_context(ContextRoot *Root)
274341
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
342+
IsUnderContext = false;
275343
if (__llvm_ctx_profile_current_context_root) {
276344
__llvm_ctx_profile_current_context_root = nullptr;
277345
Root->Taken.Unlock();
@@ -291,10 +359,12 @@ void __llvm_ctx_profile_start_collection() {
291359

292360
resetContextNode(*Root->FirstNode);
293361
}
362+
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
294363
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
295364
}
296365

297366
bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
367+
__sanitizer::atomic_store_relaxed(&ProfilingStarted, false);
298368
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
299369
&AllContextsMutex);
300370

@@ -310,17 +380,42 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
310380
Writer.writeContextual(*Root->FirstNode);
311381
}
312382
Writer.endContextSection();
383+
Writer.startFlatSection();
384+
// The list progresses behind the head, so taking this snapshot allows the
385+
// list to grow concurrently without causing a race condition with our
386+
// traversing it.
387+
const auto *Pos = reinterpret_cast<const FunctionData *>(
388+
__sanitizer::atomic_load_relaxed(&AllFunctionsData));
389+
for (; Pos; Pos = Pos->Next)
390+
Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(),
391+
Pos->FlatCtx->counters_size());
392+
Writer.endFlatSection();
313393
return true;
314394
}
315395

316396
void __llvm_ctx_profile_free() {
317-
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
318-
&AllContextsMutex);
319-
for (int I = 0, E = AllContextRoots.Size(); I < E; ++I)
320-
for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) {
397+
{
398+
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
399+
&AllContextsMutex);
400+
for (int I = 0, E = AllContextRoots.Size(); I < E; ++I)
401+
for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) {
402+
auto *C = A;
403+
A = A->next();
404+
__sanitizer::InternalFree(C);
405+
}
406+
AllContextRoots.Reset();
407+
}
408+
__sanitizer::atomic_store_relaxed(&AllFunctionsData, 0U);
409+
{
410+
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
411+
&FlatCtxArenaMutex);
412+
FlatCtxArena = nullptr;
413+
for (auto *A = FlatCtxArenaHead; A;) {
321414
auto *C = A;
322-
A = A->next();
415+
A = C->next();
323416
__sanitizer::InternalFree(C);
324417
}
325-
AllContextRoots.Reset();
418+
419+
FlatCtxArenaHead = nullptr;
420+
}
326421
}

compiler-rt/lib/ctx_profile/CtxInstrProfiling.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ struct ContextRoot {
113113
static_assert(sizeof(Taken) == 1);
114114
};
115115

116+
struct FunctionData {
117+
FunctionData *Next = nullptr;
118+
ContextNode *volatile FlatCtx = nullptr;
119+
::__sanitizer::StaticSpinMutex Mutex;
120+
};
121+
116122
/// This API is exposed for testing. See the APIs below about the contract with
117123
/// LLVM.
118124
inline bool isScratch(const void *Ctx) {
@@ -152,7 +158,8 @@ void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
152158

153159
/// called for any other function than entry points, in the entry BB of such
154160
/// function. Same consideration about LSB of returned value as .._start_context
155-
ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
161+
ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *Data,
162+
void *Callee, GUID Guid,
156163
uint32_t NumCounters,
157164
uint32_t NumCallsites);
158165

compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,11 @@ TEST_F(ContextTest, Callsite) {
6767
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
6868
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
6969
// This is what the callee does
70-
auto *Subctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
70+
FunctionData FData = {0};
71+
auto *Subctx =
72+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
73+
// This should not have required creating a flat context.
74+
EXPECT_EQ(FData.FlatCtx, nullptr);
7175
// We expect the subcontext to be appropriately placed and dimensioned
7276
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
7377
EXPECT_EQ(Subctx->counters_size(), 3U);
@@ -81,15 +85,38 @@ TEST_F(ContextTest, Callsite) {
8185
__llvm_ctx_profile_release_context(&Root);
8286
}
8387

84-
TEST_F(ContextTest, ScratchNoCollection) {
88+
TEST_F(ContextTest, ScratchNoCollectionProfilingNotStarted) {
8589
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
8690
int FakeCalleeAddress = 0;
8791
// this would be the very first function executing this. the TLS is empty,
8892
// too.
89-
auto *Ctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
93+
FunctionData FData = {0};
94+
auto *Ctx =
95+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
9096
// We never entered a context (_start_context was never called) - so the
91-
// returned context must be scratch.
97+
// returned context must be a tagged pointer.
9298
EXPECT_TRUE(isScratch(Ctx));
99+
// Because we never entered a context, we should have allocated a flat context
100+
EXPECT_EQ(FData.FlatCtx, nullptr);
101+
}
102+
103+
TEST_F(ContextTest, ScratchNoCollectionProfilingStarted) {
104+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
105+
int FakeCalleeAddress = 0;
106+
// Start collection, so the function gets a flat profile instead of scratch.
107+
__llvm_ctx_profile_start_collection();
108+
// this would be the very first function executing this. the TLS is empty,
109+
// too.
110+
FunctionData FData = {0};
111+
auto *Ctx =
112+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
113+
// We never entered a context (_start_context was never called) - so the
114+
// returned context must be a tagged pointer.
115+
EXPECT_TRUE(isScratch(Ctx));
116+
// Because we never entered a context, we should have allocated a flat context
117+
EXPECT_NE(FData.FlatCtx, nullptr);
118+
EXPECT_EQ(reinterpret_cast<uintptr_t>(FData.FlatCtx) + 1,
119+
reinterpret_cast<uintptr_t>(Ctx));
93120
}
94121

95122
TEST_F(ContextTest, ScratchDuringCollection) {
@@ -98,32 +125,38 @@ TEST_F(ContextTest, ScratchDuringCollection) {
98125
int OtherFakeCalleeAddress = 0;
99126
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
100127
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
101-
auto *Subctx =
102-
__llvm_ctx_profile_get_context(&OtherFakeCalleeAddress, 2, 3, 1);
128+
FunctionData FData[3] = {0};
129+
auto *Subctx = __llvm_ctx_profile_get_context(
130+
&FData[0], &OtherFakeCalleeAddress, 2, 3, 1);
103131
// We expected a different callee - so return scratch. It mimics what happens
104132
// in the case of a signal handler - in this case, OtherFakeCalleeAddress is
105133
// the signal handler.
106134
EXPECT_TRUE(isScratch(Subctx));
135+
// We shouldn't have tried to return a flat context because we're under a
136+
// root.
137+
EXPECT_EQ(FData[0].FlatCtx, nullptr);
107138
EXPECT_EQ(__llvm_ctx_profile_expected_callee[0], nullptr);
108139
EXPECT_EQ(__llvm_ctx_profile_callsite[0], nullptr);
109140

110141
int ThirdFakeCalleeAddress = 0;
111142
__llvm_ctx_profile_expected_callee[1] = &ThirdFakeCalleeAddress;
112143
__llvm_ctx_profile_callsite[1] = &Subctx->subContexts()[0];
113144

114-
auto *Subctx2 =
115-
__llvm_ctx_profile_get_context(&ThirdFakeCalleeAddress, 3, 0, 0);
145+
auto *Subctx2 = __llvm_ctx_profile_get_context(
146+
&FData[1], &ThirdFakeCalleeAddress, 3, 0, 0);
116147
// We again expect scratch because the '0' position is where the runtime
117148
// looks, so it doesn't matter the '1' position is populated correctly.
118149
EXPECT_TRUE(isScratch(Subctx2));
150+
EXPECT_EQ(FData[1].FlatCtx, nullptr);
119151

120152
__llvm_ctx_profile_expected_callee[0] = &ThirdFakeCalleeAddress;
121153
__llvm_ctx_profile_callsite[0] = &Subctx->subContexts()[0];
122-
auto *Subctx3 =
123-
__llvm_ctx_profile_get_context(&ThirdFakeCalleeAddress, 3, 0, 0);
154+
auto *Subctx3 = __llvm_ctx_profile_get_context(
155+
&FData[2], &ThirdFakeCalleeAddress, 3, 0, 0);
124156
// We expect scratch here, too, because the value placed in
125157
// __llvm_ctx_profile_callsite is scratch
126158
EXPECT_TRUE(isScratch(Subctx3));
159+
EXPECT_EQ(FData[2].FlatCtx, nullptr);
127160

128161
__llvm_ctx_profile_release_context(&Root);
129162
}
@@ -136,9 +169,11 @@ TEST_F(ContextTest, NeedMoreMemory) {
136169
const auto *CurrentMem = Root.CurrentMem;
137170
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
138171
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
172+
FunctionData FData = {0};
139173
// Allocate a massive subcontext to force new arena allocation
140174
auto *Subctx =
141-
__llvm_ctx_profile_get_context(&FakeCalleeAddress, 3, 1 << 20, 1);
175+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 3, 1 << 20, 1);
176+
EXPECT_EQ(FData.FlatCtx, nullptr);
142177
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
143178
EXPECT_NE(CurrentMem, Root.CurrentMem);
144179
EXPECT_NE(Root.CurrentMem, nullptr);
@@ -175,7 +210,9 @@ TEST_F(ContextTest, Dump) {
175210
int FakeCalleeAddress = 0;
176211
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
177212
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
178-
auto *Subctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
213+
FunctionData FData = {0};
214+
auto *Subctx =
215+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
179216
(void)Subctx;
180217
__llvm_ctx_profile_release_context(&Root);
181218

@@ -186,6 +223,9 @@ TEST_F(ContextTest, Dump) {
186223

187224
int EnteredSectionCount = 0;
188225
int ExitedSectionCount = 0;
226+
int EnteredFlatCount = 0;
227+
int ExitedFlatCount = 0;
228+
int FlatsWritten = 0;
189229

190230
bool State = false;
191231

@@ -217,6 +257,16 @@ TEST_F(ContextTest, Dump) {
217257
EXPECT_EQ(EnteredSectionCount, 1);
218258
++ExitedSectionCount;
219259
}
260+
void startFlatSection() override { ++EnteredFlatCount; }
261+
void writeFlat(GUID Guid, const uint64_t *Buffer,
262+
size_t BufferSize) override {
263+
++FlatsWritten;
264+
EXPECT_EQ(BufferSize, 3);
265+
EXPECT_EQ(Buffer[0], 15U);
266+
EXPECT_EQ(Buffer[1], 0U);
267+
EXPECT_EQ(Buffer[2], 0U);
268+
}
269+
void endFlatSection() override { ++ExitedFlatCount; }
220270
};
221271

222272
TestProfileWriter W(&Root, 1);
@@ -226,10 +276,17 @@ TEST_F(ContextTest, Dump) {
226276

227277
// this resets all counters but not the internal structure.
228278
__llvm_ctx_profile_start_collection();
279+
auto *Flat =
280+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
281+
EXPECT_NE(FData.FlatCtx, nullptr);
282+
FData.FlatCtx->counters()[0] = 15U;
229283
TestProfileWriter W2(&Root, 0);
230284
EXPECT_FALSE(W2.State);
231285
__llvm_ctx_profile_fetch(W2);
232286
EXPECT_TRUE(W2.State);
233287
EXPECT_EQ(W2.EnteredSectionCount, 1);
234288
EXPECT_EQ(W2.ExitedSectionCount, 1);
289+
EXPECT_EQ(W2.EnteredFlatCount, 1);
290+
EXPECT_EQ(W2.FlatsWritten, 1);
291+
EXPECT_EQ(W2.ExitedFlatCount, 1);
235292
}

0 commit comments

Comments
 (0)