-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctxprof] Make ContextRoot an implementation detail #131416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ctxprof] Make ContextRoot an implementation detail #131416
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) ChangesThis patch makes This is for two reasons. First, it is a step towards root autodetection - in a separate patch. An autodetection mechanism would instantiate the The second reason is that we will hang off Patch is 21.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131416.diff 5 Files Affected:
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index 1c2cad1ca506e..6ef7076d93e31 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -336,10 +336,28 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
AllContextRoots.PushBack(Root);
}
+ContextRoot *FunctionData::getOrAllocateContextRoot() {
+ auto *Root = CtxRoot;
+ if (!Root) {
+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
+ Root = CtxRoot;
+ if (!Root) {
+ Root =
+ new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot();
+ CtxRoot = Root;
+ }
+ }
+ assert(Root);
+ return Root;
+}
+
ContextNode *__llvm_ctx_profile_start_context(
- ContextRoot *Root, GUID Guid, uint32_t Counters,
+ FunctionData *FData, GUID Guid, uint32_t Counters,
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
IsUnderContext = true;
+
+ auto *Root = FData->getOrAllocateContextRoot();
+
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
__sanitizer::memory_order_relaxed);
@@ -356,12 +374,13 @@ ContextNode *__llvm_ctx_profile_start_context(
return TheScratchContext;
}
-void __llvm_ctx_profile_release_context(ContextRoot *Root)
+void __llvm_ctx_profile_release_context(FunctionData *FData)
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
IsUnderContext = false;
if (__llvm_ctx_profile_current_context_root) {
__llvm_ctx_profile_current_context_root = nullptr;
- Root->Taken.Unlock();
+ assert(FData->CtxRoot);
+ FData->CtxRoot->Taken.Unlock();
}
}
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index 72cc60bf523e1..6bb954da950c4 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -84,7 +84,6 @@ struct ContextRoot {
// Count the number of entries - regardless if we could take the `Taken` mutex
::__sanitizer::atomic_uint64_t TotalEntries = {};
- // This is init-ed by the static zero initializer in LLVM.
// Taken is used to ensure only one thread traverses the contextual graph -
// either to read it or to write it. On server side, the same entrypoint will
// be entered by numerous threads, but over time, the profile aggregated by
@@ -109,12 +108,7 @@ struct ContextRoot {
// or with more concurrent collections (==more memory) and less collection
// time. Note that concurrent collection does happen for different
// entrypoints, regardless.
- ::__sanitizer::StaticSpinMutex Taken;
-
- // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
- // instrumentation lowering side because it is responsible for allocating and
- // zero-initializing ContextRoots.
- static_assert(sizeof(Taken) == 1);
+ ::__sanitizer::SpinMutex Taken;
};
// This is allocated and zero-initialized by the compiler, the in-place
@@ -139,8 +133,16 @@ struct FunctionData {
FunctionData() { Mutex.Init(); }
FunctionData *Next = nullptr;
+ ContextRoot *volatile CtxRoot = nullptr;
ContextNode *volatile FlatCtx = nullptr;
+
+ ContextRoot *getOrAllocateContextRoot();
+
::__sanitizer::StaticSpinMutex Mutex;
+ // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
+ // instrumentation lowering side because it is responsible for allocating and
+ // zero-initializing ContextRoots.
+ static_assert(sizeof(Mutex) == 1);
};
/// This API is exposed for testing. See the APIs below about the contract with
@@ -172,17 +174,17 @@ extern __thread __ctx_profile::ContextRoot
/// called by LLVM in the entry BB of a "entry point" function. The returned
/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
-ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
- GUID Guid, uint32_t Counters,
- uint32_t Callsites);
+ContextNode *
+__llvm_ctx_profile_start_context(__ctx_profile::FunctionData *FData, GUID Guid,
+ uint32_t Counters, uint32_t Callsites);
/// paired with __llvm_ctx_profile_start_context, and called at the exit of the
/// entry point function.
-void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
+void __llvm_ctx_profile_release_context(__ctx_profile::FunctionData *FData);
/// called for any other function than entry points, in the entry BB of such
/// function. Same consideration about LSB of returned value as .._start_context
-ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *Data,
+ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *FData,
void *Callee, GUID Guid,
uint32_t NumCounters,
uint32_t NumCallsites);
diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index 62c7f53acec5f..ccb8f0e87fcdd 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -5,11 +5,11 @@
using namespace __ctx_profile;
class ContextTest : public ::testing::Test {
- void SetUp() override { memset(&Root, 0, sizeof(ContextRoot)); }
+ void SetUp() override { Root.getOrAllocateContextRoot(); }
void TearDown() override { __llvm_ctx_profile_free(); }
public:
- ContextRoot Root;
+ FunctionData Root;
};
TEST(ArenaTest, ZeroInit) {
@@ -43,19 +43,20 @@ TEST_F(ContextTest, Basic) {
__llvm_ctx_profile_start_collection();
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
ASSERT_NE(Ctx, nullptr);
- EXPECT_NE(Root.CurrentMem, nullptr);
- EXPECT_EQ(Root.FirstMemBlock, Root.CurrentMem);
+ auto &CtxRoot = *Root.CtxRoot;
+ EXPECT_NE(CtxRoot.CurrentMem, nullptr);
+ EXPECT_EQ(CtxRoot.FirstMemBlock, CtxRoot.CurrentMem);
EXPECT_EQ(Ctx->size(), sizeof(ContextNode) + 10 * sizeof(uint64_t) +
4 * sizeof(ContextNode *));
EXPECT_EQ(Ctx->counters_size(), 10U);
EXPECT_EQ(Ctx->callsites_size(), 4U);
- EXPECT_EQ(__llvm_ctx_profile_current_context_root, &Root);
- Root.Taken.CheckLocked();
- EXPECT_FALSE(Root.Taken.TryLock());
+ EXPECT_EQ(__llvm_ctx_profile_current_context_root, &CtxRoot);
+ CtxRoot.Taken.CheckLocked();
+ EXPECT_FALSE(CtxRoot.Taken.TryLock());
__llvm_ctx_profile_release_context(&Root);
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
- EXPECT_TRUE(Root.Taken.TryLock());
- Root.Taken.Unlock();
+ EXPECT_TRUE(CtxRoot.Taken.TryLock());
+ CtxRoot.Taken.Unlock();
}
TEST_F(ContextTest, Callsite) {
@@ -172,7 +173,8 @@ TEST_F(ContextTest, NeedMoreMemory) {
int FakeCalleeAddress = 0;
const bool IsScratch = isScratch(Ctx);
EXPECT_FALSE(IsScratch);
- const auto *CurrentMem = Root.CurrentMem;
+ auto &CtxRoot = *Root.CtxRoot;
+ const auto *CurrentMem = CtxRoot.CurrentMem;
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
FunctionData FData;
@@ -181,8 +183,8 @@ TEST_F(ContextTest, NeedMoreMemory) {
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 3, 1 << 20, 1);
EXPECT_EQ(FData.FlatCtx, nullptr);
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
- EXPECT_NE(CurrentMem, Root.CurrentMem);
- EXPECT_NE(Root.CurrentMem, nullptr);
+ EXPECT_NE(CurrentMem, CtxRoot.CurrentMem);
+ EXPECT_NE(CtxRoot.CurrentMem, nullptr);
}
TEST_F(ContextTest, ConcurrentRootCollection) {
@@ -277,7 +279,7 @@ TEST_F(ContextTest, Dump) {
void endFlatSection() override { ++ExitedFlatCount; }
};
- TestProfileWriter W(&Root, 1);
+ TestProfileWriter W(Root.CtxRoot, 1);
EXPECT_FALSE(W.State);
__llvm_ctx_profile_fetch(W);
EXPECT_TRUE(W.State);
@@ -289,7 +291,7 @@ TEST_F(ContextTest, Dump) {
(void)Flat;
EXPECT_NE(FData.FlatCtx, nullptr);
FData.FlatCtx->counters()[0] = 15U;
- TestProfileWriter W2(&Root, 0);
+ TestProfileWriter W2(Root.CtxRoot, 0);
EXPECT_FALSE(W2.State);
__llvm_ctx_profile_fetch(W2);
EXPECT_TRUE(W2.State);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 9f2b2d6212a8e..cecce31fc5f42 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -53,10 +53,9 @@ class CtxInstrumentationLowerer final {
Module &M;
ModuleAnalysisManager &MAM;
Type *ContextNodeTy = nullptr;
- Type *ContextRootTy = nullptr;
Type *FunctionDataTy = nullptr;
- DenseMap<const Function *, Constant *> ContextRootMap;
+ DenseSet<const Function *> ContextRootSet;
Function *StartCtx = nullptr;
Function *GetCtx = nullptr;
Function *ReleaseCtx = nullptr;
@@ -114,14 +113,6 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
auto *I32Ty = Type::getInt32Ty(M.getContext());
auto *I64Ty = Type::getInt64Ty(M.getContext());
- // The ContextRoot type
- ContextRootTy =
- StructType::get(M.getContext(), {
- PointerTy, /*FirstNode*/
- PointerTy, /*FirstMemBlock*/
- PointerTy, /*CurrentMem*/
- SanitizerMutexType, /*Taken*/
- });
FunctionDataTy =
StructType::get(M.getContext(), {
PointerTy, /*Next*/
@@ -143,10 +134,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
if (const auto *F = M.getFunction(Fname)) {
if (F->isDeclaration())
continue;
- auto *G = M.getOrInsertGlobal(Fname + "_ctx_root", ContextRootTy);
- cast<GlobalVariable>(G)->setInitializer(
- Constant::getNullValue(ContextRootTy));
- ContextRootMap.insert(std::make_pair(F, G));
+ ContextRootSet.insert(F);
for (const auto &BB : *F)
for (const auto &I : BB)
if (const auto *CB = dyn_cast<CallBase>(&I))
@@ -164,7 +152,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
M.getOrInsertFunction(
CompilerRtAPINames::StartCtx,
FunctionType::get(PointerTy,
- {PointerTy, /*ContextRoot*/
+ {PointerTy, /*FunctionData*/
I64Ty, /*Guid*/ I32Ty,
/*NumCounters*/ I32Ty /*NumCallsites*/},
false))
@@ -183,7 +171,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
M.getOrInsertFunction(CompilerRtAPINames::ReleaseCtx,
FunctionType::get(Type::getVoidTy(M.getContext()),
{
- PointerTy, /*ContextRoot*/
+ PointerTy, /*FunctionData*/
},
false))
.getCallee());
@@ -223,7 +211,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Value *RealContext = nullptr;
StructType *ThisContextType = nullptr;
- Value *TheRootContext = nullptr;
+ Value *TheRootFuctionData = nullptr;
Value *ExpectedCalleeTLSAddr = nullptr;
Value *CallsiteInfoTLSAddr = nullptr;
@@ -245,23 +233,23 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
ArrayType::get(Builder.getPtrTy(), NumCallsites)});
// Figure out which way we obtain the context object for this function -
// if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
- // former case, we also set TheRootContext since we need to release it
+ // former case, we also set TheRootFuctionData since we need to release it
// at the end (plus it can be used to know if we have an entrypoint or a
// regular function)
- auto Iter = ContextRootMap.find(&F);
- if (Iter != ContextRootMap.end()) {
- TheRootContext = Iter->second;
+ // Make up a compact name, these names end up taking up a lot of space
+ // in the binary.
+ auto *FData = new GlobalVariable(M, FunctionDataTy, false,
+ GlobalVariable::InternalLinkage,
+ Constant::getNullValue(FunctionDataTy));
+
+ if (ContextRootSet.contains(&F)) {
Context = Builder.CreateCall(
- StartCtx, {TheRootContext, Guid, Builder.getInt32(NumCounters),
+ StartCtx, {FData, Guid, Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
+ TheRootFuctionData = FData;
ORE.emit(
[&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
} else {
- // Make up a compact name, these names end up taking up a lot of space
- // in the binary.
- auto *FData = new GlobalVariable(
- M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
- Constant::getNullValue(FunctionDataTy));
Context = Builder.CreateCall(GetCtx, {FData, &F, Guid,
Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
@@ -346,10 +334,10 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
break;
}
I.eraseFromParent();
- } else if (TheRootContext && isa<ReturnInst>(I)) {
+ } else if (TheRootFuctionData && isa<ReturnInst>(I)) {
// Remember to release the context if we are an entrypoint.
IRBuilder<> Builder(&I);
- Builder.CreateCall(ReleaseCtx, {TheRootContext});
+ Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
ContextWasReleased = true;
}
}
@@ -358,7 +346,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
// to disallow this, (so this then stays as an error), another is to detect
// that and then do a wrapper or disallow the tail call. This only affects
// instrumentation, when we want to detect the call graph.
- if (TheRootContext && !ContextWasReleased)
+ if (TheRootFuctionData && !ContextWasReleased)
F.getContext().emitError(
"[ctx_prof] An entrypoint was instrumented but it has no `ret` "
"instructions above which to release the context: " +
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
index d7ce45d44b9d1..07fcbbd62e213 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -9,8 +9,6 @@
declare void @bar()
;.
-; LOWERING: @an_entrypoint_ctx_root = global { ptr, ptr, ptr, i8 } zeroinitializer
-; LOWERING: @another_entrypoint_no_callees_ctx_root = global { ptr, ptr, ptr, i8 } zeroinitializer
; LOWERING: @__llvm_ctx_profile_callsite = external hidden thread_local global ptr
; LOWERING: @__llvm_ctx_profile_expected_callee = external hidden thread_local global ptr
; LOWERING: @[[GLOB0:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
@@ -18,6 +16,8 @@ declare void @bar()
; LOWERING: @[[GLOB2:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
; LOWERING: @[[GLOB3:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
; LOWERING: @[[GLOB4:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB5:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB6:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
;.
define void @foo(i32 %a, ptr %fct) {
; INSTRUMENT-LABEL: define void @foo(
@@ -97,7 +97,7 @@ define void @an_entrypoint(i32 %a) {
;
; LOWERING-LABEL: define void @an_entrypoint(
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META1:![0-9]+]] {
-; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @an_entrypoint_ctx_root, i64 4909520559318251808, i32 2, i32 1)
+; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @[[GLOB1]], i64 4909520559318251808, i32 2, i32 1)
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1
; LOWERING-NEXT: [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_expected_callee)
@@ -117,10 +117,10 @@ define void @an_entrypoint(i32 %a) {
; LOWERING-NEXT: [[TMP13:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [1 x ptr] }, ptr [[TMP1]], i32 0, i32 2, i32 0
; LOWERING-NEXT: store volatile ptr [[TMP13]], ptr [[TMP7]], align 8
; LOWERING-NEXT: call void @foo(i32 1, ptr null)
-; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @an_entrypoint_ctx_root)
+; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @[[GLOB1]])
; LOWERING-NEXT: ret void
; LOWERING: no:
-; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @an_entrypoint_ctx_root)
+; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @[[GLOB1]])
; LOWERING-NEXT: ret void
;
%t = icmp eq i32 %a, 0
@@ -147,7 +147,7 @@ define void @another_entrypoint_no_callees(i32 %a) {
;
; LOWERING-LABEL: define void @another_entrypoint_no_callees(
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META2:![0-9]+]] {
-; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @another_entrypoint_no_callees_ctx_root, i64 -6371873725078000974, i32 2, i32 0)
+; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @[[GLOB2]], i64 -6371873725078000974, i32 2, i32 0)
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2
; LOWERING-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
@@ -158,10 +158,10 @@ define void @another_entrypoint_no_callees(i32 %a) {
; LOWERING-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
; LOWERING-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 1
; LOWERING-NEXT: store i64 [[TMP7]], ptr [[TMP5]], align 4
-; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @another_entrypoint_no_callees_ctx_root)
+; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @[[GLOB2]])
; LOWERING-NEXT: ret void
; LOWERING: no:
-; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @another_entrypoint_no_callees_ctx_root)
+; LOWERING-NEXT: call void @__llvm_ctx_profile_release_context(ptr @[[GLOB2]])
; LOWERING-NEXT: ret void
;
%t = icmp eq i32 %a, 0
@@ -181,7 +181,7 @@ define void @simple(i32 %a) {
;
; LOWERING-LABEL: define void @simple(
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META3:![0-9]+]] {
-; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB1]], ptr @simple, i64 -3006003237940970099, i32 1, i32 0)
+; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB3]], ptr @simple, i64 -3006003237940970099, i32 1, i32 0)
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2
; LOWERING-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
@@ -205,7 +205,7 @@ define i32 @no_callsites(i32 %a) {
;
; LOWERING-LABEL: define i32 @no_callsites(
; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META4:![0-9]+]] {
-; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB2]], ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
+; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB4]], ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2
; LOWERING-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
@@ -237,7 +237,7 @@ define void @no_counters() {
;
; LOWERING-LABEL: define void @no_counters(
; L...
[truncated]
|
760bc2b
to
c4b136b
Compare
e3478bd
to
f671b9b
Compare
c4b136b
to
891e3b9
Compare
f671b9b
to
e6d651d
Compare
551cb1a
to
94a272a
Compare
94a272a
to
35e3208
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with a couple of nits.
35e3208
to
64ae963
Compare
ContextRoot
FunctionData
are currently known by the llvm side, which has to instantiate and zero-initialize them.This patch makes
FunctionData
the only global value that needs to be known and instantiated by the compiler. On the compiler-rt side,ContextRoot
s are hung offFunctionData
, when applicable.This is for two reasons. First, it is a step towards root autodetection (in a subsequent patch). An autodetection mechanism would instantiate the
ContextRoot
for the detected roots, and then__llvm_ctx_profile_get_context
would detect that and route to__llvm_ctx_profile_start_context
.The second reason is that we will hang off
ContextRoot
more complex datatypes (next patch), and we want to avoid too deep of a coupling between llvm and compiler-rt. Acting as a place to hang related data,FunctionData
can stay simple - pointers and an (atomic) int (the mutex).