Skip to content

Commit 94a272a

Browse files
committed
[ctxprof] Make ContextRoot an implementation detail
1 parent 215c47e commit 94a272a

File tree

5 files changed

+88
-77
lines changed

5 files changed

+88
-77
lines changed

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,28 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
336336
AllContextRoots.PushBack(Root);
337337
}
338338

339+
ContextRoot *FunctionData::getOrAllocateContextRoot() {
340+
auto *Root = CtxRoot;
341+
if (!Root) {
342+
__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
343+
Root = CtxRoot;
344+
if (!Root) {
345+
Root =
346+
new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot();
347+
CtxRoot = Root;
348+
}
349+
}
350+
assert(Root);
351+
return Root;
352+
}
353+
339354
ContextNode *__llvm_ctx_profile_start_context(
340-
ContextRoot *Root, GUID Guid, uint32_t Counters,
355+
FunctionData *FData, GUID Guid, uint32_t Counters,
341356
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
342357
IsUnderContext = true;
358+
359+
auto *Root = FData->getOrAllocateContextRoot();
360+
343361
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
344362
__sanitizer::memory_order_relaxed);
345363

@@ -356,12 +374,13 @@ ContextNode *__llvm_ctx_profile_start_context(
356374
return TheScratchContext;
357375
}
358376

359-
void __llvm_ctx_profile_release_context(ContextRoot *Root)
377+
void __llvm_ctx_profile_release_context(FunctionData *FData)
360378
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
361379
IsUnderContext = false;
362380
if (__llvm_ctx_profile_current_context_root) {
363381
__llvm_ctx_profile_current_context_root = nullptr;
364-
Root->Taken.Unlock();
382+
assert(FData->CtxRoot);
383+
FData->CtxRoot->Taken.Unlock();
365384
}
366385
}
367386

compiler-rt/lib/ctx_profile/CtxInstrProfiling.h

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ struct ContextRoot {
8484
// Count the number of entries - regardless if we could take the `Taken` mutex
8585
::__sanitizer::atomic_uint64_t TotalEntries = {};
8686

87-
// This is init-ed by the static zero initializer in LLVM.
8887
// Taken is used to ensure only one thread traverses the contextual graph -
8988
// either to read it or to write it. On server side, the same entrypoint will
9089
// be entered by numerous threads, but over time, the profile aggregated by
@@ -109,12 +108,7 @@ struct ContextRoot {
109108
// or with more concurrent collections (==more memory) and less collection
110109
// time. Note that concurrent collection does happen for different
111110
// entrypoints, regardless.
112-
::__sanitizer::StaticSpinMutex Taken;
113-
114-
// If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
115-
// instrumentation lowering side because it is responsible for allocating and
116-
// zero-initializing ContextRoots.
117-
static_assert(sizeof(Taken) == 1);
111+
::__sanitizer::SpinMutex Taken;
118112
};
119113

120114
// This is allocated and zero-initialized by the compiler, the in-place
@@ -139,8 +133,16 @@ struct FunctionData {
139133
FunctionData() { Mutex.Init(); }
140134

141135
FunctionData *Next = nullptr;
136+
ContextRoot *volatile CtxRoot = nullptr;
142137
ContextNode *volatile FlatCtx = nullptr;
138+
139+
ContextRoot *getOrAllocateContextRoot();
140+
143141
::__sanitizer::StaticSpinMutex Mutex;
142+
// If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
143+
// instrumentation lowering side because it is responsible for allocating and
144+
// zero-initializing ContextRoots.
145+
static_assert(sizeof(Mutex) == 1);
144146
};
145147

146148
/// This API is exposed for testing. See the APIs below about the contract with
@@ -172,17 +174,17 @@ extern __thread __ctx_profile::ContextRoot
172174

173175
/// called by LLVM in the entry BB of a "entry point" function. The returned
174176
/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
175-
ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
176-
GUID Guid, uint32_t Counters,
177-
uint32_t Callsites);
177+
ContextNode *
178+
__llvm_ctx_profile_start_context(__ctx_profile::FunctionData *FData, GUID Guid,
179+
uint32_t Counters, uint32_t Callsites);
178180

179181
/// paired with __llvm_ctx_profile_start_context, and called at the exit of the
180182
/// entry point function.
181-
void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
183+
void __llvm_ctx_profile_release_context(__ctx_profile::FunctionData *FData);
182184

183185
/// called for any other function than entry points, in the entry BB of such
184186
/// function. Same consideration about LSB of returned value as .._start_context
185-
ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *Data,
187+
ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *FData,
186188
void *Callee, GUID Guid,
187189
uint32_t NumCounters,
188190
uint32_t NumCallsites);

compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
using namespace __ctx_profile;
66

77
class ContextTest : public ::testing::Test {
8-
void SetUp() override { memset(&Root, 0, sizeof(ContextRoot)); }
8+
void SetUp() override { Root.getOrAllocateContextRoot(); }
99
void TearDown() override { __llvm_ctx_profile_free(); }
1010

1111
public:
12-
ContextRoot Root;
12+
FunctionData Root;
1313
};
1414

1515
TEST(ArenaTest, ZeroInit) {
@@ -43,19 +43,20 @@ TEST_F(ContextTest, Basic) {
4343
__llvm_ctx_profile_start_collection();
4444
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
4545
ASSERT_NE(Ctx, nullptr);
46-
EXPECT_NE(Root.CurrentMem, nullptr);
47-
EXPECT_EQ(Root.FirstMemBlock, Root.CurrentMem);
46+
auto &CtxRoot = *Root.CtxRoot;
47+
EXPECT_NE(CtxRoot.CurrentMem, nullptr);
48+
EXPECT_EQ(CtxRoot.FirstMemBlock, CtxRoot.CurrentMem);
4849
EXPECT_EQ(Ctx->size(), sizeof(ContextNode) + 10 * sizeof(uint64_t) +
4950
4 * sizeof(ContextNode *));
5051
EXPECT_EQ(Ctx->counters_size(), 10U);
5152
EXPECT_EQ(Ctx->callsites_size(), 4U);
52-
EXPECT_EQ(__llvm_ctx_profile_current_context_root, &Root);
53-
Root.Taken.CheckLocked();
54-
EXPECT_FALSE(Root.Taken.TryLock());
53+
EXPECT_EQ(__llvm_ctx_profile_current_context_root, &CtxRoot);
54+
CtxRoot.Taken.CheckLocked();
55+
EXPECT_FALSE(CtxRoot.Taken.TryLock());
5556
__llvm_ctx_profile_release_context(&Root);
5657
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
57-
EXPECT_TRUE(Root.Taken.TryLock());
58-
Root.Taken.Unlock();
58+
EXPECT_TRUE(CtxRoot.Taken.TryLock());
59+
CtxRoot.Taken.Unlock();
5960
}
6061

6162
TEST_F(ContextTest, Callsite) {
@@ -172,7 +173,8 @@ TEST_F(ContextTest, NeedMoreMemory) {
172173
int FakeCalleeAddress = 0;
173174
const bool IsScratch = isScratch(Ctx);
174175
EXPECT_FALSE(IsScratch);
175-
const auto *CurrentMem = Root.CurrentMem;
176+
auto &CtxRoot = *Root.CtxRoot;
177+
const auto *CurrentMem = CtxRoot.CurrentMem;
176178
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
177179
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
178180
FunctionData FData;
@@ -181,8 +183,8 @@ TEST_F(ContextTest, NeedMoreMemory) {
181183
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 3, 1 << 20, 1);
182184
EXPECT_EQ(FData.FlatCtx, nullptr);
183185
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
184-
EXPECT_NE(CurrentMem, Root.CurrentMem);
185-
EXPECT_NE(Root.CurrentMem, nullptr);
186+
EXPECT_NE(CurrentMem, CtxRoot.CurrentMem);
187+
EXPECT_NE(CtxRoot.CurrentMem, nullptr);
186188
}
187189

188190
TEST_F(ContextTest, ConcurrentRootCollection) {
@@ -277,7 +279,7 @@ TEST_F(ContextTest, Dump) {
277279
void endFlatSection() override { ++ExitedFlatCount; }
278280
};
279281

280-
TestProfileWriter W(&Root, 1);
282+
TestProfileWriter W(Root.CtxRoot, 1);
281283
EXPECT_FALSE(W.State);
282284
__llvm_ctx_profile_fetch(W);
283285
EXPECT_TRUE(W.State);
@@ -289,7 +291,7 @@ TEST_F(ContextTest, Dump) {
289291
(void)Flat;
290292
EXPECT_NE(FData.FlatCtx, nullptr);
291293
FData.FlatCtx->counters()[0] = 15U;
292-
TestProfileWriter W2(&Root, 0);
294+
TestProfileWriter W2(Root.CtxRoot, 0);
293295
EXPECT_FALSE(W2.State);
294296
__llvm_ctx_profile_fetch(W2);
295297
EXPECT_TRUE(W2.State);

llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,9 @@ class CtxInstrumentationLowerer final {
5353
Module &M;
5454
ModuleAnalysisManager &MAM;
5555
Type *ContextNodeTy = nullptr;
56-
Type *ContextRootTy = nullptr;
5756
Type *FunctionDataTy = nullptr;
5857

59-
DenseMap<const Function *, Constant *> ContextRootMap;
58+
DenseSet<const Function *> ContextRootSet;
6059
Function *StartCtx = nullptr;
6160
Function *GetCtx = nullptr;
6261
Function *ReleaseCtx = nullptr;
@@ -114,18 +113,10 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
114113
auto *I32Ty = Type::getInt32Ty(M.getContext());
115114
auto *I64Ty = Type::getInt64Ty(M.getContext());
116115

117-
// The ContextRoot type
118-
ContextRootTy =
119-
StructType::get(M.getContext(), {
120-
PointerTy, /*FirstNode*/
121-
PointerTy, /*FirstMemBlock*/
122-
PointerTy, /*CurrentMem*/
123-
I64Ty, /*TotalEntries*/
124-
SanitizerMutexType, /*Taken*/
125-
});
126116
FunctionDataTy =
127117
StructType::get(M.getContext(), {
128118
PointerTy, /*Next*/
119+
PointerTy, /*CtxRoot*/
129120
PointerTy, /*FlatCtx*/
130121
SanitizerMutexType, /*Mutex*/
131122
});
@@ -144,10 +135,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
144135
if (const auto *F = M.getFunction(Fname)) {
145136
if (F->isDeclaration())
146137
continue;
147-
auto *G = M.getOrInsertGlobal(Fname + "_ctx_root", ContextRootTy);
148-
cast<GlobalVariable>(G)->setInitializer(
149-
Constant::getNullValue(ContextRootTy));
150-
ContextRootMap.insert(std::make_pair(F, G));
138+
ContextRootSet.insert(F);
151139
for (const auto &BB : *F)
152140
for (const auto &I : BB)
153141
if (const auto *CB = dyn_cast<CallBase>(&I))
@@ -165,7 +153,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
165153
M.getOrInsertFunction(
166154
CompilerRtAPINames::StartCtx,
167155
FunctionType::get(PointerTy,
168-
{PointerTy, /*ContextRoot*/
156+
{PointerTy, /*FunctionData*/
169157
I64Ty, /*Guid*/ I32Ty,
170158
/*NumCounters*/ I32Ty /*NumCallsites*/},
171159
false))
@@ -184,7 +172,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
184172
M.getOrInsertFunction(CompilerRtAPINames::ReleaseCtx,
185173
FunctionType::get(Type::getVoidTy(M.getContext()),
186174
{
187-
PointerTy, /*ContextRoot*/
175+
PointerTy, /*FunctionData*/
188176
},
189177
false))
190178
.getCallee());
@@ -224,7 +212,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
224212
Value *RealContext = nullptr;
225213

226214
StructType *ThisContextType = nullptr;
227-
Value *TheRootContext = nullptr;
215+
Value *TheRootFuctionData = nullptr;
228216
Value *ExpectedCalleeTLSAddr = nullptr;
229217
Value *CallsiteInfoTLSAddr = nullptr;
230218

@@ -246,23 +234,23 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
246234
ArrayType::get(Builder.getPtrTy(), NumCallsites)});
247235
// Figure out which way we obtain the context object for this function -
248236
// if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
249-
// former case, we also set TheRootContext since we need to release it
237+
// former case, we also set TheRootFuctionData since we need to release it
250238
// at the end (plus it can be used to know if we have an entrypoint or a
251239
// regular function)
252-
auto Iter = ContextRootMap.find(&F);
253-
if (Iter != ContextRootMap.end()) {
254-
TheRootContext = Iter->second;
240+
// Make up a compact name, these names end up taking up a lot of space
241+
// in the binary.
242+
auto *FData = new GlobalVariable(M, FunctionDataTy, false,
243+
GlobalVariable::InternalLinkage,
244+
Constant::getNullValue(FunctionDataTy));
245+
246+
if (ContextRootSet.contains(&F)) {
255247
Context = Builder.CreateCall(
256-
StartCtx, {TheRootContext, Guid, Builder.getInt32(NumCounters),
248+
StartCtx, {FData, Guid, Builder.getInt32(NumCounters),
257249
Builder.getInt32(NumCallsites)});
250+
TheRootFuctionData = FData;
258251
ORE.emit(
259252
[&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
260253
} else {
261-
// Make up a compact name, these names end up taking up a lot of space
262-
// in the binary.
263-
auto *FData = new GlobalVariable(
264-
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
265-
Constant::getNullValue(FunctionDataTy));
266254
Context = Builder.CreateCall(GetCtx, {FData, &F, Guid,
267255
Builder.getInt32(NumCounters),
268256
Builder.getInt32(NumCallsites)});
@@ -347,10 +335,10 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
347335
break;
348336
}
349337
I.eraseFromParent();
350-
} else if (TheRootContext && isa<ReturnInst>(I)) {
338+
} else if (TheRootFuctionData && isa<ReturnInst>(I)) {
351339
// Remember to release the context if we are an entrypoint.
352340
IRBuilder<> Builder(&I);
353-
Builder.CreateCall(ReleaseCtx, {TheRootContext});
341+
Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
354342
ContextWasReleased = true;
355343
}
356344
}
@@ -359,7 +347,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
359347
// to disallow this, (so this then stays as an error), another is to detect
360348
// that and then do a wrapper or disallow the tail call. This only affects
361349
// instrumentation, when we want to detect the call graph.
362-
if (TheRootContext && !ContextWasReleased)
350+
if (TheRootFuctionData && !ContextWasReleased)
363351
F.getContext().emitError(
364352
"[ctx_prof] An entrypoint was instrumented but it has no `ret` "
365353
"instructions above which to release the context: " +

0 commit comments

Comments
 (0)