-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctxprof] Flat profile collection #130655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ctxprof] Flat profile collection #130655
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
3949d7e
to
1efa69b
Compare
ba68088
to
d4fe31b
Compare
uint32_t NumCounters) { | ||
if (ContextNode *Existing = Data.FlatCtx) | ||
return Existing; | ||
{ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need the additional scope (and indentation) if the entire function needs to be under the lock?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wdym. there's a fast bail-out if FlatCtx was set.
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex; | |||
SANITIZER_GUARDED_BY(AllContextsMutex) | |||
__sanitizer::Vector<ContextRoot *> AllContextRoots; | |||
|
|||
__sanitizer::atomic_uintptr_t AllFunctionsData = {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Prefer AllFunctionsData{0}
I think semantically these mean the same thing, just a matter of preference.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did = something
elsewhere in this file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure I understand what you mean?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I use the = {}
notation elsewhere in this file and the .h.
d4fe31b
to
9c35df6
Compare
9c35df6
to
aacc2ad
Compare
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex; | |||
SANITIZER_GUARDED_BY(AllContextsMutex) | |||
__sanitizer::Vector<ContextRoot *> AllContextRoots; | |||
|
|||
__sanitizer::atomic_uintptr_t AllFunctionsData = {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure I understand what you mean?
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesCollect flat profiles. We only do this for function activations that aren't otherwise collectible under a context root are encountered. This allows us to reason about the full profile without concerning ourselves weather we are double-counting. For example we can combine (during profile use) flattened contextual profiles with flat profiles. Patch is 32.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130655.diff 9 Files Affected:
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
index fe8ddcdf79129..0fc4883305145 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
@@ -122,6 +122,12 @@ class ProfileWriter {
virtual void startContextSection() = 0;
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
virtual void endContextSection() = 0;
+
+ virtual void startFlatSection() = 0;
+ virtual void writeFlat(ctx_profile::GUID Guid, const uint64_t *Buffer,
+ size_t BufferSize) = 0;
+ virtual void endFlatSection() = 0;
+
virtual ~ProfileWriter() = default;
};
} // namespace ctx_profile
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index 992aa94a6631d..d7ec8fde4ec7d 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -8,6 +8,8 @@
#include "CtxInstrProfiling.h"
#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_atomic_clang.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_dense_map.h"
#include "sanitizer_common/sanitizer_libc.h"
@@ -27,6 +29,20 @@ __sanitizer::SpinMutex AllContextsMutex;
SANITIZER_GUARDED_BY(AllContextsMutex)
__sanitizer::Vector<ContextRoot *> AllContextRoots;
+__sanitizer::atomic_uintptr_t AllFunctionsData = {};
+
+// Keep all the functions for which we collect a flat profile in a linked list.
+__sanitizer::SpinMutex FlatCtxArenaMutex;
+SANITIZER_GUARDED_BY(FlatCtxArenaMutex)
+Arena *FlatCtxArenaHead = nullptr;
+SANITIZER_GUARDED_BY(FlatCtxArenaMutex)
+Arena *FlatCtxArena = nullptr;
+
+// Set to true when we enter a root, and false when we exit - regardless if this
+// thread collects a contextual profile for that root.
+__thread bool IsUnderContext = false;
+__sanitizer::atomic_uint8_t ProfilingStarted = {};
+
// utility to taint a pointer by setting the LSB. There is an assumption
// throughout that the addresses of contexts are even (really, they should be
// align(8), but "even"-ness is the minimum assumption)
@@ -109,7 +125,10 @@ void resetContextNode(ContextNode &Node) {
resetContextNode(*Next);
}
-void onContextEnter(ContextNode &Node) { ++Node.counters()[0]; }
+ContextNode *onContextEnter(ContextNode &Node) {
+ ++Node.counters()[0];
+ return &Node;
+}
} // namespace
@@ -182,12 +201,75 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
return Ret;
}
-ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
- uint32_t NumCounters,
+ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
+ uint32_t NumCounters) {
+ if (ContextNode *Existing = Data.FlatCtx)
+ return Existing;
+ {
+ // We could instead try to take the lock and, if that fails, return
+ // TheScratchContext. But that could leave message pump loops more sparsely
+ // profiled than everything else. Maybe that doesn't matter, and we can
+ // optimize this later.
+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Data.Mutex);
+ if (ContextNode *Existing = Data.FlatCtx)
+ return Existing;
+
+ auto NeededSize = ContextNode::getAllocSize(NumCounters, 0);
+ char *AllocBuff = nullptr;
+ {
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL(
+ &FlatCtxArenaMutex);
+ if (FlatCtxArena)
+ AllocBuff = FlatCtxArena->tryBumpAllocate(NeededSize);
+ if (!AllocBuff) {
+ FlatCtxArena = Arena::allocateNewArena(getArenaAllocSize(NeededSize),
+ FlatCtxArena);
+ AllocBuff = FlatCtxArena->tryBumpAllocate(NeededSize);
+ }
+ if (!FlatCtxArenaHead)
+ FlatCtxArenaHead = FlatCtxArena;
+ }
+ auto *Ret = allocContextNode(AllocBuff, Guid, NumCounters, 0);
+ Data.FlatCtx = Ret;
+
+ Data.Next = reinterpret_cast<FunctionData *>(
+ __sanitizer::atomic_load_relaxed(&AllFunctionsData));
+ while (!__sanitizer::atomic_compare_exchange_strong(
+ &AllFunctionsData, reinterpret_cast<uintptr_t *>(&Data.Next),
+ reinterpret_cast<uintptr_t>(&Data),
+ __sanitizer::memory_order_release)) {
+ }
+ }
+
+ return Data.FlatCtx;
+}
+
+ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
+ uint32_t NumCounters) {
+ // 1) if we are under a root (regardless if this thread is collecting or not a
+ // contextual profile for that root), do not collect a flat profile. We want
+ // to keep flat profiles only for activations that can't happen under a root,
+ // to avoid confusing profiles. We can, for example, combine flattened and
+ // flat profiles meaningfully, as we wouldn't double-count anything.
+ //
+ // 2) to avoid lengthy startup, don't bother with flat profiles until the
+ // profiling started. We would reset them anyway when profiling starts.
+ // HOWEVER. This does lose profiling for message pumps: those functions are
+ // entered once and never exit. They should be assumed to be entered before
+ // profiling starts - because profiling should start after the server is up
+ // and running (which is equivalent to "message pumps are set up").
+ if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
+ return TheScratchContext;
+ return markAsScratch(
+ onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
+}
+
+ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
+ GUID Guid, uint32_t NumCounters,
uint32_t NumCallsites) {
// fast "out" if we're not even doing contextual collection.
if (!__llvm_ctx_profile_current_context_root)
- return TheScratchContext;
+ return getUnhandledContext(*Data, Guid, NumCounters);
// also fast "out" if the caller is scratch. We can see if it's scratch by
// looking at the interior pointer into the subcontexts vector that the caller
@@ -196,7 +278,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
// precisely, aligned - 8 values)
auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]);
if (!CallsiteContext || isScratch(CallsiteContext))
- return TheScratchContext;
+ return getUnhandledContext(*Data, Guid, NumCounters);
// if the callee isn't the expected one, return scratch.
// Signal handler(s) could have been invoked at any point in the execution.
@@ -214,7 +296,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
// for that case.
auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]);
if (ExpectedCallee != Callee)
- return TheScratchContext;
+ return getUnhandledContext(*Data, Guid, NumCounters);
auto *Callsite = *CallsiteContext;
// in the case of indirect calls, we will have all seen targets forming a
@@ -257,6 +339,7 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
ContextNode *__llvm_ctx_profile_start_context(
ContextRoot *Root, GUID Guid, uint32_t Counters,
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ IsUnderContext = true;
if (!Root->FirstMemBlock) {
setupContext(Root, Guid, Counters, Callsites);
}
@@ -272,6 +355,7 @@ ContextNode *__llvm_ctx_profile_start_context(
void __llvm_ctx_profile_release_context(ContextRoot *Root)
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ IsUnderContext = false;
if (__llvm_ctx_profile_current_context_root) {
__llvm_ctx_profile_current_context_root = nullptr;
Root->Taken.Unlock();
@@ -291,10 +375,12 @@ void __llvm_ctx_profile_start_collection() {
resetContextNode(*Root->FirstNode);
}
+ __sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
}
bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
+ __sanitizer::atomic_store_relaxed(&ProfilingStarted, false);
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
&AllContextsMutex);
@@ -310,17 +396,43 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
Writer.writeContextual(*Root->FirstNode);
}
Writer.endContextSection();
+ Writer.startFlatSection();
+ // The list progresses behind the head, so taking this snapshot allows the
+ // list to grow concurrently without causing a race condition with our
+ // traversing it.
+ const auto *Pos = reinterpret_cast<const FunctionData *>(
+ __sanitizer::atomic_load_relaxed(&AllFunctionsData));
+ for (; Pos; Pos = Pos->Next)
+ Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(),
+ Pos->FlatCtx->counters_size());
+ Writer.endFlatSection();
return true;
}
void __llvm_ctx_profile_free() {
- __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
- &AllContextsMutex);
- for (int I = 0, E = AllContextRoots.Size(); I < E; ++I)
- for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) {
+ __sanitizer::atomic_store_relaxed(&ProfilingStarted, false);
+ {
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+ &AllContextsMutex);
+ for (int I = 0, E = AllContextRoots.Size(); I < E; ++I)
+ for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) {
+ auto *C = A;
+ A = A->next();
+ __sanitizer::InternalFree(C);
+ }
+ AllContextRoots.Reset();
+ }
+ __sanitizer::atomic_store_relaxed(&AllFunctionsData, 0U);
+ {
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+ &FlatCtxArenaMutex);
+ FlatCtxArena = nullptr;
+ for (auto *A = FlatCtxArenaHead; A;) {
auto *C = A;
- A = A->next();
+ A = C->next();
__sanitizer::InternalFree(C);
}
- AllContextRoots.Reset();
+
+ FlatCtxArenaHead = nullptr;
+ }
}
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index 8a6949d4ec288..c41a77457178c 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -113,6 +113,28 @@ struct ContextRoot {
static_assert(sizeof(Taken) == 1);
};
+// This is allocated and zero-initialized by the compiler, the in-place
+// initialization serves mostly as self-documentation and for testing.
+// The design is influenced by the observation that typically (at least for
+// datacenter binaries, which is the motivating target of this profiler) less
+// than 10% of functions in a binary even appear in a profile (of any kind).
+//
+// 1) We could pre-allocate the flat profile storage in the compiler, just like
+// the flat instrumented profiling does. But that penalizes the static size of
+// the binary for little reason
+//
+// 2) We could do the above but zero-initialize the buffers (which should place
+// them in .bss), and dynamically populate them. This, though, would page-in
+// more memory upfront for the binary's runtime
+//
+// The current design trades off a bit of overhead at the first time a function
+// is encountered *for flat profiling* for avoiding size penalties.
+struct FunctionData {
+ FunctionData *Next = nullptr;
+ ContextNode *volatile FlatCtx = nullptr;
+ ::__sanitizer::StaticSpinMutex Mutex;
+};
+
/// This API is exposed for testing. See the APIs below about the contract with
/// LLVM.
inline bool isScratch(const void *Ctx) {
@@ -152,7 +174,8 @@ void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
/// called for any other function than entry points, in the entry BB of such
/// function. Same consideration about LSB of returned value as .._start_context
-ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
+ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *Data,
+ void *Callee, GUID Guid,
uint32_t NumCounters,
uint32_t NumCallsites);
diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index 97292f9f1abff..01a8274774ecb 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -40,6 +40,7 @@ TEST(ArenaTest, Basic) {
}
TEST_F(ContextTest, Basic) {
+ __llvm_ctx_profile_start_collection();
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
ASSERT_NE(Ctx, nullptr);
EXPECT_NE(Root.CurrentMem, nullptr);
@@ -58,6 +59,7 @@ TEST_F(ContextTest, Basic) {
}
TEST_F(ContextTest, Callsite) {
+ __llvm_ctx_profile_start_collection();
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
int FakeCalleeAddress = 0;
const bool IsScratch = isScratch(Ctx);
@@ -67,7 +69,11 @@ TEST_F(ContextTest, Callsite) {
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
// This is what the callee does
- auto *Subctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
+ FunctionData FData = {0};
+ auto *Subctx =
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
+ // This should not have required creating a flat context.
+ EXPECT_EQ(FData.FlatCtx, nullptr);
// We expect the subcontext to be appropriately placed and dimensioned
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
EXPECT_EQ(Subctx->counters_size(), 3U);
@@ -81,29 +87,59 @@ TEST_F(ContextTest, Callsite) {
__llvm_ctx_profile_release_context(&Root);
}
-TEST_F(ContextTest, ScratchNoCollection) {
+TEST_F(ContextTest, ScratchNoCollectionProfilingNotStarted) {
+ // This test intentionally does not call __llvm_ctx_profile_start_collection.
EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
int FakeCalleeAddress = 0;
// this would be the very first function executing this. the TLS is empty,
// too.
- auto *Ctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
+ FunctionData FData = {0};
+ auto *Ctx =
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
+ // We never entered a context (_start_context was never called) - so the
+ // returned context must be a tagged pointer.
+ EXPECT_TRUE(isScratch(Ctx));
+ // Because we didn't start collection, no flat profile should have been
+ // allocated.
+ EXPECT_EQ(FData.FlatCtx, nullptr);
+}
+
+TEST_F(ContextTest, ScratchNoCollectionProfilingStarted) {
+ ASSERT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
+ int FakeCalleeAddress = 0;
+ // Start collection, so the function gets a flat profile instead of scratch.
+ __llvm_ctx_profile_start_collection();
+ // this would be the very first function executing this. the TLS is empty,
+ // too.
+ FunctionData FData = {0};
+ auto *Ctx =
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
// We never entered a context (_start_context was never called) - so the
- // returned context must be scratch.
+ // returned context must be a tagged pointer.
EXPECT_TRUE(isScratch(Ctx));
+ // Because we never entered a context, we should have allocated a flat context
+ EXPECT_NE(FData.FlatCtx, nullptr);
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(FData.FlatCtx) + 1,
+ reinterpret_cast<uintptr_t>(Ctx));
}
TEST_F(ContextTest, ScratchDuringCollection) {
+ __llvm_ctx_profile_start_collection();
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
int FakeCalleeAddress = 0;
int OtherFakeCalleeAddress = 0;
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
- auto *Subctx =
- __llvm_ctx_profile_get_context(&OtherFakeCalleeAddress, 2, 3, 1);
+ FunctionData FData[3] = {0};
+ auto *Subctx = __llvm_ctx_profile_get_context(
+ &FData[0], &OtherFakeCalleeAddress, 2, 3, 1);
// We expected a different callee - so return scratch. It mimics what happens
// in the case of a signal handler - in this case, OtherFakeCalleeAddress is
// the signal handler.
EXPECT_TRUE(isScratch(Subctx));
+ // We shouldn't have tried to return a flat context because we're under a
+ // root.
+ EXPECT_EQ(FData[0].FlatCtx, nullptr);
EXPECT_EQ(__llvm_ctx_profile_expected_callee[0], nullptr);
EXPECT_EQ(__llvm_ctx_profile_callsite[0], nullptr);
@@ -111,24 +147,27 @@ TEST_F(ContextTest, ScratchDuringCollection) {
__llvm_ctx_profile_expected_callee[1] = &ThirdFakeCalleeAddress;
__llvm_ctx_profile_callsite[1] = &Subctx->subContexts()[0];
- auto *Subctx2 =
- __llvm_ctx_profile_get_context(&ThirdFakeCalleeAddress, 3, 0, 0);
+ auto *Subctx2 = __llvm_ctx_profile_get_context(
+ &FData[1], &ThirdFakeCalleeAddress, 3, 0, 0);
// We again expect scratch because the '0' position is where the runtime
// looks, so it doesn't matter the '1' position is populated correctly.
EXPECT_TRUE(isScratch(Subctx2));
+ EXPECT_EQ(FData[1].FlatCtx, nullptr);
__llvm_ctx_profile_expected_callee[0] = &ThirdFakeCalleeAddress;
__llvm_ctx_profile_callsite[0] = &Subctx->subContexts()[0];
- auto *Subctx3 =
- __llvm_ctx_profile_get_context(&ThirdFakeCalleeAddress, 3, 0, 0);
+ auto *Subctx3 = __llvm_ctx_profile_get_context(
+ &FData[2], &ThirdFakeCalleeAddress, 3, 0, 0);
// We expect scratch here, too, because the value placed in
// __llvm_ctx_profile_callsite is scratch
EXPECT_TRUE(isScratch(Subctx3));
+ EXPECT_EQ(FData[2].FlatCtx, nullptr);
__llvm_ctx_profile_release_context(&Root);
}
TEST_F(ContextTest, NeedMoreMemory) {
+ __llvm_ctx_profile_start_collection();
auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
int FakeCalleeAddress = 0;
const bool IsScratch = isScratch(Ctx);
@@ -136,9 +175,11 @@ TEST_F(ContextTest, NeedMoreMemory) {
const auto *CurrentMem = Root.CurrentMem;
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
+ FunctionData FData = {0};
// Allocate a massive subcontext to force new arena allocation
auto *Subctx =
- __llvm_ctx_profile_get_context(&FakeCalleeAddress, 3, 1 << 20, 1);
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 3, 1 << 20, 1);
+ EXPECT_EQ(FData.FlatCtx, nullptr);
EXPECT_EQ(Ctx->subContexts()[2], Subctx);
EXPECT_NE(CurrentMem, Root.CurrentMem);
EXPECT_NE(Root.CurrentMem, nullptr);
@@ -175,7 +216,9 @@ TEST_F(ContextTest, Dump) {
int FakeCalleeAddress = 0;
__llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
__llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
- auto *Subctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
+ FunctionData FData = {0};
+ auto *Subctx =
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
(void)Subctx;
__llvm_ctx_profile_release_context(&Root);
@@ -186,6 +229,9 @@ TEST_F(ContextTest, Dump) {
int EnteredSectionCount = 0;
int ExitedSectionCount = 0;
+ int EnteredFlatCount = 0;
+ int ExitedFlatCount = 0;
+ int FlatsWritten = 0;
bool State = false;
@@ -217,6 +263,16 @@ TEST_F(ContextTest, Dump) {
EXPECT_EQ(EnteredSectionCount, 1);
++ExitedSectionCount;
}
+ void startFlatSection() override { ++EnteredFlatCount; }
+ void writeFlat(GUID Guid, const uint64_t *Buffer,
+ size_t BufferSize) override {
+ ++FlatsWritten;
+ EXPECT_EQ(BufferSize, 3);
+ EXPECT_EQ(Buffer[0], 15U);
+ EXPECT_EQ(Buffer[1], 0U);
+ EXPECT_EQ(Buffer[2], 0U);
+ }
+ void endFlatSection() override { ++ExitedFlatCount; }
};
TestProfileWriter W(&Root, 1);
@@ -226,10 +282,17 @@ TEST_F(ContextTest, Dump) {
// this resets all counters but not the internal structure.
__llvm_ctx_profile_start_collection();
+ auto *Flat =
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
+ EXPECT_NE(FData.FlatCtx, nullptr);
+ FData.FlatCtx->counters()[0] = 15U;
TestProfileWriter W2(&Root, 0);
EXPECT_FALSE(W2.State);
__l...
[truncated]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/11078 Here is the relevant piece of the build log for the reference
|
It needs to match the structure of `FunctionData` in compiler-rt, but missed a field in PR #130655.
Collect flat profiles. We only do this for function activations that aren't otherwise collectible under a context root are encountered. This allows us to reason about the full profile without concerning ourselves wether we are double-counting. For example we can combine (during profile use) flattened contextual profiles with flat profiles.
It needs to match the structure of `FunctionData` in compiler-rt, but missed a field in PR llvm#130655.
Collect flat profiles. We only do this for function activations that aren't otherwise collectible under a context root are encountered.
This allows us to reason about the full profile without concerning ourselves weather we are double-counting. For example we can combine (during profile use) flattened contextual profiles with flat profiles.