Skip to content

[ctxprof] Capture sampling info for context roots #131201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ class ContextNode final {
class ProfileWriter {
public:
virtual void startContextSection() = 0;
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
uint64_t TotalRootEntryCount) = 0;
virtual void endContextSection() = 0;

virtual void startFlatSection() = 0;
Expand Down
7 changes: 6 additions & 1 deletion compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ ContextNode *__llvm_ctx_profile_start_context(
ContextRoot *Root, GUID Guid, uint32_t Counters,
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
IsUnderContext = true;
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
__sanitizer::memory_order_relaxed);

if (!Root->FirstMemBlock) {
setupContext(Root, Guid, Counters, Callsites);
}
Expand Down Expand Up @@ -374,6 +377,7 @@ void __llvm_ctx_profile_start_collection() {
++NumMemUnits;

resetContextNode(*Root->FirstNode);
__sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0);
}
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
Expand All @@ -393,7 +397,8 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
__sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid");
return false;
}
Writer.writeContextual(*Root->FirstNode);
Writer.writeContextual(*Root->FirstNode, __sanitizer::atomic_load_relaxed(
&Root->TotalEntries));
}
Writer.endContextSection();
Writer.startFlatSection();
Expand Down
4 changes: 4 additions & 0 deletions compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ struct ContextRoot {
ContextNode *FirstNode = nullptr;
Arena *FirstMemBlock = nullptr;
Arena *CurrentMem = nullptr;

// Count the number of entries - regardless if we could take the `Taken` mutex
::__sanitizer::atomic_uint64_t TotalEntries = {};

// This is init-ed by the static zero initializer in LLVM.
// Taken is used to ensure only one thread traverses the contextual graph -
// either to read it or to write it. On server side, the same entrypoint will
Expand Down
4 changes: 3 additions & 1 deletion compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,9 @@ TEST_F(ContextTest, Dump) {
TestProfileWriter(ContextRoot *Root, size_t Entries)
: Root(Root), Entries(Entries) {}

void writeContextual(const ContextNode &Node) override {
void writeContextual(const ContextNode &Node,
uint64_t TotalRootEntryCount) override {
EXPECT_EQ(TotalRootEntryCount, Entries);
EXPECT_EQ(EnteredSectionCount, 1);
EXPECT_EQ(ExitedSectionCount, 0);
EXPECT_FALSE(Root->Taken.TryLock());
Expand Down
6 changes: 5 additions & 1 deletion compiler-rt/test/ctx_profile/TestCases/generate-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,10 @@ class TestProfileWriter : public ProfileWriter {
std::cout << "Exited Context Section" << std::endl;
}

void writeContextual(const ContextNode &RootNode) override {
void writeContextual(const ContextNode &RootNode,
uint64_t EntryCount) override {
std::cout << "Entering Root " << RootNode.guid()
<< " with total entry count " << EntryCount << std::endl;
printProfile(RootNode, "", "");
}

Expand Down Expand Up @@ -115,6 +118,7 @@ class TestProfileWriter : public ProfileWriter {
// The second context is in the loop. We expect 2 entries and each of the
// branches would be taken once, so the second counter is 1.
// CHECK-NEXT: Entered Context Section
// CHECK-NEXT: Entering Root 8657661246551306189 with total entry count 1
// CHECK-NEXT: Guid: 8657661246551306189
// CHECK-NEXT: Entries: 1
// CHECK-NEXT: 2 counters and 3 callsites
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/ProfileData/CtxInstrContextNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ class ContextNode final {
class ProfileWriter {
public:
virtual void startContextSection() = 0;
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
uint64_t TotalRootEntryCount) = 0;
virtual void endContextSection() = 0;

virtual void startFlatSection() = 0;
Expand Down
10 changes: 8 additions & 2 deletions llvm/include/llvm/ProfileData/PGOCtxProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,13 @@ class PGOCtxProfContext final : public internal::IndexNode {

GlobalValue::GUID GUID = 0;
SmallVector<uint64_t, 16> Counters;
const std::optional<uint64_t> RootEntryCount;
CallsiteMapTy Callsites;

PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters)
: GUID(G), Counters(std::move(Counters)) {}
PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
std::optional<uint64_t> RootEntryCount = std::nullopt)
: GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount) {
}

Expected<PGOCtxProfContext &>
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
Expand All @@ -115,6 +118,9 @@ class PGOCtxProfContext final : public internal::IndexNode {
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
SmallVectorImpl<uint64_t> &counters() { return Counters; }

bool isRoot() const { return RootEntryCount.has_value(); }
uint64_t getTotalRootEntryCount() const { return RootEntryCount.value(); }

uint64_t getEntrycount() const {
assert(!Counters.empty() &&
"Functions are expected to have at their entry BB instrumented, so "
Expand Down
20 changes: 15 additions & 5 deletions llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@
#include "llvm/ProfileData/CtxInstrContextNode.h"

namespace llvm {
enum PGOCtxProfileRecords { Invalid = 0, Version, Guid, CalleeIndex, Counters };
enum PGOCtxProfileRecords {
Invalid = 0,
Version,
Guid,
CallsiteIndex,
Counters,
TotalRootEntryCount
};

enum PGOCtxProfileBlockIDs {
FIRST_VALID = bitc::FIRST_APPLICATION_BLOCKID,
Expand Down Expand Up @@ -73,9 +80,11 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
const bool IncludeEmpty;

void writeGuid(ctx_profile::GUID Guid);
void writeCallsiteIndex(uint32_t Index);
void writeRootEntryCount(uint64_t EntryCount);
void writeCounters(ArrayRef<uint64_t> Counters);
void writeImpl(std::optional<uint32_t> CallerIndex,
const ctx_profile::ContextNode &Node);
void writeNode(uint32_t CallerIndex, const ctx_profile::ContextNode &Node);
void writeSubcontexts(const ctx_profile::ContextNode &Node);

public:
PGOCtxProfileWriter(raw_ostream &Out,
Expand All @@ -84,7 +93,8 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
~PGOCtxProfileWriter() { Writer.ExitBlock(); }

void startContextSection() override;
void writeContextual(const ctx_profile::ContextNode &RootNode) override;
void writeContextual(const ctx_profile::ContextNode &RootNode,
uint64_t TotalRootEntryCount) override;
void endContextSection() override;

void startFlatSection() override;
Expand All @@ -94,7 +104,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {

// constants used in writing which a reader may find useful.
static constexpr unsigned CodeLen = 2;
static constexpr uint32_t CurrentVersion = 2;
static constexpr uint32_t CurrentVersion = 3;
static constexpr unsigned VBREncodingBits = 6;
static constexpr StringRef ContainerMagic = "CTXP";
};
Expand Down
32 changes: 27 additions & 5 deletions llvm/lib/ProfileData/PGOCtxProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,19 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
std::optional<ctx_profile::GUID> Guid;
std::optional<SmallVector<uint64_t, 16>> Counters;
std::optional<uint32_t> CallsiteIndex;
std::optional<uint64_t> TotalEntryCount;

SmallVector<uint64_t, 1> RecordValues;

const bool ExpectIndex = Kind == PGOCtxProfileBlockIDs::ContextNodeBlockID;
const bool IsRoot = Kind == PGOCtxProfileBlockIDs::ContextRootBlockID;
// We don't prescribe the order in which the records come in, and we are ok
// if other unsupported records appear. We seek in the current subblock until
// we get all we know.
auto GotAllWeNeed = [&]() {
return Guid.has_value() && Counters.has_value() &&
(!ExpectIndex || CallsiteIndex.has_value());
(!ExpectIndex || CallsiteIndex.has_value()) &&
(!IsRoot || TotalEntryCount.has_value());
};
while (!GotAllWeNeed()) {
RecordValues.clear();
Expand All @@ -127,21 +130,29 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
return wrongValue("Empty counters. At least the entry counter (one "
"value) was expected");
break;
case PGOCtxProfileRecords::CalleeIndex:
case PGOCtxProfileRecords::CallsiteIndex:
if (!ExpectIndex)
return wrongValue("The root context should not have a callee index");
if (RecordValues.size() != 1)
return wrongValue("The callee index should have exactly one value");
CallsiteIndex = RecordValues[0];
break;
case PGOCtxProfileRecords::TotalRootEntryCount:
if (!IsRoot)
return wrongValue("Non-root has a total entry count record");
if (RecordValues.size() != 1)
return wrongValue(
"The root total entry count record should have exactly one value");
TotalEntryCount = RecordValues[0];
break;
default:
// OK if we see records we do not understand, like records (profile
// components) introduced later.
break;
}
}

PGOCtxProfContext Ret(*Guid, std::move(*Counters));
PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount);

while (canEnterBlockWithID(PGOCtxProfileBlockIDs::ContextNodeBlockID)) {
EXPECT_OR_RET(SC, readProfile(PGOCtxProfileBlockIDs::ContextNodeBlockID));
Expand Down Expand Up @@ -278,7 +289,8 @@ void toYaml(yaml::Output &Out,

void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
const SmallVectorImpl<uint64_t> &Counters,
const PGOCtxProfContext::CallsiteMapTy &Callsites) {
const PGOCtxProfContext::CallsiteMapTy &Callsites,
std::optional<uint64_t> TotalRootEntryCount = std::nullopt) {
yaml::EmptyContext Empty;
Out.beginMapping();
void *SaveInfo = nullptr;
Expand All @@ -289,6 +301,11 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
yaml::yamlize(Out, Guid, true, Empty);
Out.postflightKey(nullptr);
}
if (TotalRootEntryCount) {
Out.preflightKey("TotalRootEntryCount", true, false, UseDefault, SaveInfo);
yaml::yamlize(Out, *TotalRootEntryCount, true, Empty);
Out.postflightKey(nullptr);
}
{
Out.preflightKey("Counters", true, false, UseDefault, SaveInfo);
Out.beginFlowSequence();
Expand All @@ -308,8 +325,13 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
}
Out.endMapping();
}

void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
if (Ctx.isRoot())
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites(),
Ctx.getTotalRootEntryCount());
else
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
}

} // namespace
Expand Down
61 changes: 47 additions & 14 deletions llvm/lib/ProfileData/PGOCtxProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ PGOCtxProfileWriter::PGOCtxProfileWriter(
DescribeBlock(PGOCtxProfileBlockIDs::ContextsSectionBlockID, "Contexts");
DescribeBlock(PGOCtxProfileBlockIDs::ContextRootBlockID, "Root");
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
DescribeRecord(PGOCtxProfileRecords::TotalRootEntryCount,
"TotalRootEntryCount");
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
DescribeBlock(PGOCtxProfileBlockIDs::ContextNodeBlockID, "Context");
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
DescribeRecord(PGOCtxProfileRecords::CalleeIndex, "CalleeIndex");
DescribeRecord(PGOCtxProfileRecords::CallsiteIndex, "CalleeIndex");
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
DescribeBlock(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID,
"FlatProfiles");
Expand All @@ -85,29 +87,39 @@ void PGOCtxProfileWriter::writeGuid(ctx_profile::GUID Guid) {
Writer.EmitRecord(PGOCtxProfileRecords::Guid, SmallVector<uint64_t, 1>{Guid});
}

void PGOCtxProfileWriter::writeCallsiteIndex(uint32_t CallsiteIndex) {
Writer.EmitRecord(PGOCtxProfileRecords::CallsiteIndex,
SmallVector<uint64_t, 1>{CallsiteIndex});
}

void PGOCtxProfileWriter::writeRootEntryCount(uint64_t TotalRootEntryCount) {
Writer.EmitRecord(PGOCtxProfileRecords::TotalRootEntryCount,
SmallVector<uint64_t, 1>{TotalRootEntryCount});
}

// recursively write all the subcontexts. We do need to traverse depth first to
// model the context->subcontext implicitly, and since this captures call
// stacks, we don't really need to be worried about stack overflow and we can
// keep the implementation simple.
void PGOCtxProfileWriter::writeImpl(std::optional<uint32_t> CallerIndex,
void PGOCtxProfileWriter::writeNode(uint32_t CallsiteIndex,
const ContextNode &Node) {
// A node with no counters is an error. We don't expect this to happen from
// the runtime, rather, this is interesting for testing the reader.
if (!IncludeEmpty && (Node.counters_size() > 0 && Node.entrycount() == 0))
return;
Writer.EnterSubblock(CallerIndex ? PGOCtxProfileBlockIDs::ContextNodeBlockID
: PGOCtxProfileBlockIDs::ContextRootBlockID,
CodeLen);
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ContextNodeBlockID, CodeLen);
writeGuid(Node.guid());
if (CallerIndex)
Writer.EmitRecord(PGOCtxProfileRecords::CalleeIndex,
SmallVector<uint64_t, 1>{*CallerIndex});
writeCallsiteIndex(CallsiteIndex);
writeCounters({Node.counters(), Node.counters_size()});
writeSubcontexts(Node);
Writer.ExitBlock();
}

void PGOCtxProfileWriter::writeSubcontexts(const ContextNode &Node) {
for (uint32_t I = 0U; I < Node.callsites_size(); ++I)
for (const auto *Subcontext = Node.subContexts()[I]; Subcontext;
Subcontext = Subcontext->next())
writeImpl(I, *Subcontext);
Writer.ExitBlock();
writeNode(I, *Subcontext);
}

void PGOCtxProfileWriter::startContextSection() {
Expand All @@ -122,8 +134,17 @@ void PGOCtxProfileWriter::startFlatSection() {
void PGOCtxProfileWriter::endContextSection() { Writer.ExitBlock(); }
void PGOCtxProfileWriter::endFlatSection() { Writer.ExitBlock(); }

void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode) {
writeImpl(std::nullopt, RootNode);
void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
uint64_t TotalRootEntryCount) {
if (!IncludeEmpty && (!TotalRootEntryCount || (RootNode.counters_size() > 0 &&
RootNode.entrycount() == 0)))
return;
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ContextRootBlockID, CodeLen);
writeGuid(RootNode.guid());
writeRootEntryCount(TotalRootEntryCount);
writeCounters({RootNode.counters(), RootNode.counters_size()});
writeSubcontexts(RootNode);
Writer.ExitBlock();
}

void PGOCtxProfileWriter::writeFlat(ctx_profile::GUID Guid,
Expand All @@ -144,11 +165,15 @@ struct SerializableCtxRepresentation {
std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
};

struct SerializableRootRepresentation : public SerializableCtxRepresentation {
uint64_t TotalRootEntryCount = 0;
};

using SerializableFlatProfileRepresentation =
std::pair<ctx_profile::GUID, std::vector<uint64_t>>;

struct SerializableProfileRepresentation {
std::vector<SerializableCtxRepresentation> Contexts;
std::vector<SerializableRootRepresentation> Contexts;
std::vector<SerializableFlatProfileRepresentation> FlatProfiles;
};

Expand Down Expand Up @@ -189,6 +214,7 @@ createNode(std::vector<std::unique_ptr<char[]>> &Nodes,

LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableCtxRepresentation)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<SerializableCtxRepresentation>)
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableRootRepresentation)
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableFlatProfileRepresentation)
template <> struct yaml::MappingTraits<SerializableCtxRepresentation> {
static void mapping(yaml::IO &IO, SerializableCtxRepresentation &SCR) {
Expand All @@ -198,6 +224,13 @@ template <> struct yaml::MappingTraits<SerializableCtxRepresentation> {
}
};

template <> struct yaml::MappingTraits<SerializableRootRepresentation> {
static void mapping(yaml::IO &IO, SerializableRootRepresentation &R) {
yaml::MappingTraits<SerializableCtxRepresentation>::mapping(IO, R);
IO.mapRequired("TotalRootEntryCount", R.TotalRootEntryCount);
}
};

template <> struct yaml::MappingTraits<SerializableProfileRepresentation> {
static void mapping(yaml::IO &IO, SerializableProfileRepresentation &SPR) {
IO.mapOptional("Contexts", SPR.Contexts);
Expand Down Expand Up @@ -232,7 +265,7 @@ Error llvm::createCtxProfFromYAML(StringRef Profile, raw_ostream &Out) {
if (!TopList)
return createStringError(
"Unexpected error converting internal structure to ctx profile");
Writer.writeContextual(*TopList);
Writer.writeContextual(*TopList, DC.TotalRootEntryCount);
}
Writer.endContextSection();
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
PointerTy, /*FirstNode*/
PointerTy, /*FirstMemBlock*/
PointerTy, /*CurrentMem*/
I64Ty, /*TotalEntries*/
SanitizerMutexType, /*Taken*/
});
FunctionDataTy =
Expand Down
Loading
Loading