Skip to content

Commit b034905

Browse files
authored
[ctxprof] Capture sampling info for context roots (#131201)
When we collect a contextual profile, we sample the threads entering its root and only collect on one at a time (see `ContextRoot::Taken`). If we want to compare profiles between contextual profiles, and/or flat profiles, we have a problem: we don't know how to compare the counter values relative to each other. To that end, we add `ContextRoot::TotalEntries`, which is incremented every time a root is entered and serves as multiplier for the counter values collected under that root. We expose this in the profile and leave the normalization to the user of the profile, for a few reasons: * it's only needed if reasoning about all profiles in aggregate. * the goal, in compiler_rt, is to flush out the profile as quickly as possible, and performing multiplications adds an overhead that may not even be necessary if the consumer of the profile doesn't care about combining profiles * the information itself may be interesting as an indication of relative sampling of various contexts.
1 parent a3ce1cc commit b034905

30 files changed

+172
-47
lines changed

compiler-rt/lib/ctx_profile/CtxInstrContextNode.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ class ContextNode final {
120120
class ProfileWriter {
121121
public:
122122
virtual void startContextSection() = 0;
123-
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
123+
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
124+
uint64_t TotalRootEntryCount) = 0;
124125
virtual void endContextSection() = 0;
125126

126127
virtual void startFlatSection() = 0;

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ ContextNode *__llvm_ctx_profile_start_context(
340340
ContextRoot *Root, GUID Guid, uint32_t Counters,
341341
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
342342
IsUnderContext = true;
343+
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
344+
__sanitizer::memory_order_relaxed);
345+
343346
if (!Root->FirstMemBlock) {
344347
setupContext(Root, Guid, Counters, Callsites);
345348
}
@@ -374,6 +377,7 @@ void __llvm_ctx_profile_start_collection() {
374377
++NumMemUnits;
375378

376379
resetContextNode(*Root->FirstNode);
380+
__sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0);
377381
}
378382
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
379383
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
@@ -393,7 +397,8 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
393397
__sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid");
394398
return false;
395399
}
396-
Writer.writeContextual(*Root->FirstNode);
400+
Writer.writeContextual(*Root->FirstNode, __sanitizer::atomic_load_relaxed(
401+
&Root->TotalEntries));
397402
}
398403
Writer.endContextSection();
399404
Writer.startFlatSection();

compiler-rt/lib/ctx_profile/CtxInstrProfiling.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ struct ContextRoot {
8080
ContextNode *FirstNode = nullptr;
8181
Arena *FirstMemBlock = nullptr;
8282
Arena *CurrentMem = nullptr;
83+
84+
// Count the number of entries - regardless if we could take the `Taken` mutex
85+
::__sanitizer::atomic_uint64_t TotalEntries = {};
86+
8387
// This is init-ed by the static zero initializer in LLVM.
8488
// Taken is used to ensure only one thread traverses the contextual graph -
8589
// either to read it or to write it. On server side, the same entrypoint will

compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,9 @@ TEST_F(ContextTest, Dump) {
238238
TestProfileWriter(ContextRoot *Root, size_t Entries)
239239
: Root(Root), Entries(Entries) {}
240240

241-
void writeContextual(const ContextNode &Node) override {
241+
void writeContextual(const ContextNode &Node,
242+
uint64_t TotalRootEntryCount) override {
243+
EXPECT_EQ(TotalRootEntryCount, Entries);
242244
EXPECT_EQ(EnteredSectionCount, 1);
243245
EXPECT_EQ(ExitedSectionCount, 0);
244246
EXPECT_FALSE(Root->Taken.TryLock());

compiler-rt/test/ctx_profile/TestCases/generate-context.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ class TestProfileWriter : public ProfileWriter {
8484
std::cout << "Exited Context Section" << std::endl;
8585
}
8686

87-
void writeContextual(const ContextNode &RootNode) override {
87+
void writeContextual(const ContextNode &RootNode,
88+
uint64_t EntryCount) override {
89+
std::cout << "Entering Root " << RootNode.guid()
90+
<< " with total entry count " << EntryCount << std::endl;
8891
printProfile(RootNode, "", "");
8992
}
9093

@@ -115,6 +118,7 @@ class TestProfileWriter : public ProfileWriter {
115118
// The second context is in the loop. We expect 2 entries and each of the
116119
// branches would be taken once, so the second counter is 1.
117120
// CHECK-NEXT: Entered Context Section
121+
// CHECK-NEXT: Entering Root 8657661246551306189 with total entry count 1
118122
// CHECK-NEXT: Guid: 8657661246551306189
119123
// CHECK-NEXT: Entries: 1
120124
// CHECK-NEXT: 2 counters and 3 callsites

llvm/include/llvm/ProfileData/CtxInstrContextNode.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ class ContextNode final {
120120
class ProfileWriter {
121121
public:
122122
virtual void startContextSection() = 0;
123-
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
123+
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
124+
uint64_t TotalRootEntryCount) = 0;
124125
virtual void endContextSection() = 0;
125126

126127
virtual void startFlatSection() = 0;

llvm/include/llvm/ProfileData/PGOCtxProfReader.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,13 @@ class PGOCtxProfContext final : public internal::IndexNode {
9292

9393
GlobalValue::GUID GUID = 0;
9494
SmallVector<uint64_t, 16> Counters;
95+
const std::optional<uint64_t> RootEntryCount;
9596
CallsiteMapTy Callsites;
9697

97-
PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters)
98-
: GUID(G), Counters(std::move(Counters)) {}
98+
PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
99+
std::optional<uint64_t> RootEntryCount = std::nullopt)
100+
: GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount) {
101+
}
99102

100103
Expected<PGOCtxProfContext &>
101104
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
@@ -115,6 +118,9 @@ class PGOCtxProfContext final : public internal::IndexNode {
115118
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
116119
SmallVectorImpl<uint64_t> &counters() { return Counters; }
117120

121+
bool isRoot() const { return RootEntryCount.has_value(); }
122+
uint64_t getTotalRootEntryCount() const { return RootEntryCount.value(); }
123+
118124
uint64_t getEntrycount() const {
119125
assert(!Counters.empty() &&
120126
"Functions are expected to have at their entry BB instrumented, so "

llvm/include/llvm/ProfileData/PGOCtxProfWriter.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,14 @@
1919
#include "llvm/ProfileData/CtxInstrContextNode.h"
2020

2121
namespace llvm {
22-
enum PGOCtxProfileRecords { Invalid = 0, Version, Guid, CalleeIndex, Counters };
22+
enum PGOCtxProfileRecords {
23+
Invalid = 0,
24+
Version,
25+
Guid,
26+
CallsiteIndex,
27+
Counters,
28+
TotalRootEntryCount
29+
};
2330

2431
enum PGOCtxProfileBlockIDs {
2532
FIRST_VALID = bitc::FIRST_APPLICATION_BLOCKID,
@@ -73,9 +80,11 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
7380
const bool IncludeEmpty;
7481

7582
void writeGuid(ctx_profile::GUID Guid);
83+
void writeCallsiteIndex(uint32_t Index);
84+
void writeRootEntryCount(uint64_t EntryCount);
7685
void writeCounters(ArrayRef<uint64_t> Counters);
77-
void writeImpl(std::optional<uint32_t> CallerIndex,
78-
const ctx_profile::ContextNode &Node);
86+
void writeNode(uint32_t CallerIndex, const ctx_profile::ContextNode &Node);
87+
void writeSubcontexts(const ctx_profile::ContextNode &Node);
7988

8089
public:
8190
PGOCtxProfileWriter(raw_ostream &Out,
@@ -84,7 +93,8 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
8493
~PGOCtxProfileWriter() { Writer.ExitBlock(); }
8594

8695
void startContextSection() override;
87-
void writeContextual(const ctx_profile::ContextNode &RootNode) override;
96+
void writeContextual(const ctx_profile::ContextNode &RootNode,
97+
uint64_t TotalRootEntryCount) override;
8898
void endContextSection() override;
8999

90100
void startFlatSection() override;
@@ -94,7 +104,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
94104

95105
// constants used in writing which a reader may find useful.
96106
static constexpr unsigned CodeLen = 2;
97-
static constexpr uint32_t CurrentVersion = 2;
107+
static constexpr uint32_t CurrentVersion = 3;
98108
static constexpr unsigned VBREncodingBits = 6;
99109
static constexpr StringRef ContainerMagic = "CTXP";
100110
};

llvm/lib/ProfileData/PGOCtxProfReader.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,19 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
9696
std::optional<ctx_profile::GUID> Guid;
9797
std::optional<SmallVector<uint64_t, 16>> Counters;
9898
std::optional<uint32_t> CallsiteIndex;
99+
std::optional<uint64_t> TotalEntryCount;
99100

100101
SmallVector<uint64_t, 1> RecordValues;
101102

102103
const bool ExpectIndex = Kind == PGOCtxProfileBlockIDs::ContextNodeBlockID;
104+
const bool IsRoot = Kind == PGOCtxProfileBlockIDs::ContextRootBlockID;
103105
// We don't prescribe the order in which the records come in, and we are ok
104106
// if other unsupported records appear. We seek in the current subblock until
105107
// we get all we know.
106108
auto GotAllWeNeed = [&]() {
107109
return Guid.has_value() && Counters.has_value() &&
108-
(!ExpectIndex || CallsiteIndex.has_value());
110+
(!ExpectIndex || CallsiteIndex.has_value()) &&
111+
(!IsRoot || TotalEntryCount.has_value());
109112
};
110113
while (!GotAllWeNeed()) {
111114
RecordValues.clear();
@@ -127,21 +130,29 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
127130
return wrongValue("Empty counters. At least the entry counter (one "
128131
"value) was expected");
129132
break;
130-
case PGOCtxProfileRecords::CalleeIndex:
133+
case PGOCtxProfileRecords::CallsiteIndex:
131134
if (!ExpectIndex)
132135
return wrongValue("The root context should not have a callee index");
133136
if (RecordValues.size() != 1)
134137
return wrongValue("The callee index should have exactly one value");
135138
CallsiteIndex = RecordValues[0];
136139
break;
140+
case PGOCtxProfileRecords::TotalRootEntryCount:
141+
if (!IsRoot)
142+
return wrongValue("Non-root has a total entry count record");
143+
if (RecordValues.size() != 1)
144+
return wrongValue(
145+
"The root total entry count record should have exactly one value");
146+
TotalEntryCount = RecordValues[0];
147+
break;
137148
default:
138149
// OK if we see records we do not understand, like records (profile
139150
// components) introduced later.
140151
break;
141152
}
142153
}
143154

144-
PGOCtxProfContext Ret(*Guid, std::move(*Counters));
155+
PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount);
145156

146157
while (canEnterBlockWithID(PGOCtxProfileBlockIDs::ContextNodeBlockID)) {
147158
EXPECT_OR_RET(SC, readProfile(PGOCtxProfileBlockIDs::ContextNodeBlockID));
@@ -278,7 +289,8 @@ void toYaml(yaml::Output &Out,
278289

279290
void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
280291
const SmallVectorImpl<uint64_t> &Counters,
281-
const PGOCtxProfContext::CallsiteMapTy &Callsites) {
292+
const PGOCtxProfContext::CallsiteMapTy &Callsites,
293+
std::optional<uint64_t> TotalRootEntryCount = std::nullopt) {
282294
yaml::EmptyContext Empty;
283295
Out.beginMapping();
284296
void *SaveInfo = nullptr;
@@ -289,6 +301,11 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
289301
yaml::yamlize(Out, Guid, true, Empty);
290302
Out.postflightKey(nullptr);
291303
}
304+
if (TotalRootEntryCount) {
305+
Out.preflightKey("TotalRootEntryCount", true, false, UseDefault, SaveInfo);
306+
yaml::yamlize(Out, *TotalRootEntryCount, true, Empty);
307+
Out.postflightKey(nullptr);
308+
}
292309
{
293310
Out.preflightKey("Counters", true, false, UseDefault, SaveInfo);
294311
Out.beginFlowSequence();
@@ -308,8 +325,13 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
308325
}
309326
Out.endMapping();
310327
}
328+
311329
void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
312-
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
330+
if (Ctx.isRoot())
331+
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites(),
332+
Ctx.getTotalRootEntryCount());
333+
else
334+
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
313335
}
314336

315337
} // namespace

llvm/lib/ProfileData/PGOCtxProfWriter.cpp

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,12 @@ PGOCtxProfileWriter::PGOCtxProfileWriter(
5555
DescribeBlock(PGOCtxProfileBlockIDs::ContextsSectionBlockID, "Contexts");
5656
DescribeBlock(PGOCtxProfileBlockIDs::ContextRootBlockID, "Root");
5757
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
58+
DescribeRecord(PGOCtxProfileRecords::TotalRootEntryCount,
59+
"TotalRootEntryCount");
5860
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
5961
DescribeBlock(PGOCtxProfileBlockIDs::ContextNodeBlockID, "Context");
6062
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
61-
DescribeRecord(PGOCtxProfileRecords::CalleeIndex, "CalleeIndex");
63+
DescribeRecord(PGOCtxProfileRecords::CallsiteIndex, "CalleeIndex");
6264
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
6365
DescribeBlock(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID,
6466
"FlatProfiles");
@@ -85,29 +87,39 @@ void PGOCtxProfileWriter::writeGuid(ctx_profile::GUID Guid) {
8587
Writer.EmitRecord(PGOCtxProfileRecords::Guid, SmallVector<uint64_t, 1>{Guid});
8688
}
8789

90+
void PGOCtxProfileWriter::writeCallsiteIndex(uint32_t CallsiteIndex) {
91+
Writer.EmitRecord(PGOCtxProfileRecords::CallsiteIndex,
92+
SmallVector<uint64_t, 1>{CallsiteIndex});
93+
}
94+
95+
void PGOCtxProfileWriter::writeRootEntryCount(uint64_t TotalRootEntryCount) {
96+
Writer.EmitRecord(PGOCtxProfileRecords::TotalRootEntryCount,
97+
SmallVector<uint64_t, 1>{TotalRootEntryCount});
98+
}
99+
88100
// recursively write all the subcontexts. We do need to traverse depth first to
89101
// model the context->subcontext implicitly, and since this captures call
90102
// stacks, we don't really need to be worried about stack overflow and we can
91103
// keep the implementation simple.
92-
void PGOCtxProfileWriter::writeImpl(std::optional<uint32_t> CallerIndex,
104+
void PGOCtxProfileWriter::writeNode(uint32_t CallsiteIndex,
93105
const ContextNode &Node) {
94106
// A node with no counters is an error. We don't expect this to happen from
95107
// the runtime, rather, this is interesting for testing the reader.
96108
if (!IncludeEmpty && (Node.counters_size() > 0 && Node.entrycount() == 0))
97109
return;
98-
Writer.EnterSubblock(CallerIndex ? PGOCtxProfileBlockIDs::ContextNodeBlockID
99-
: PGOCtxProfileBlockIDs::ContextRootBlockID,
100-
CodeLen);
110+
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ContextNodeBlockID, CodeLen);
101111
writeGuid(Node.guid());
102-
if (CallerIndex)
103-
Writer.EmitRecord(PGOCtxProfileRecords::CalleeIndex,
104-
SmallVector<uint64_t, 1>{*CallerIndex});
112+
writeCallsiteIndex(CallsiteIndex);
105113
writeCounters({Node.counters(), Node.counters_size()});
114+
writeSubcontexts(Node);
115+
Writer.ExitBlock();
116+
}
117+
118+
void PGOCtxProfileWriter::writeSubcontexts(const ContextNode &Node) {
106119
for (uint32_t I = 0U; I < Node.callsites_size(); ++I)
107120
for (const auto *Subcontext = Node.subContexts()[I]; Subcontext;
108121
Subcontext = Subcontext->next())
109-
writeImpl(I, *Subcontext);
110-
Writer.ExitBlock();
122+
writeNode(I, *Subcontext);
111123
}
112124

113125
void PGOCtxProfileWriter::startContextSection() {
@@ -122,8 +134,17 @@ void PGOCtxProfileWriter::startFlatSection() {
122134
void PGOCtxProfileWriter::endContextSection() { Writer.ExitBlock(); }
123135
void PGOCtxProfileWriter::endFlatSection() { Writer.ExitBlock(); }
124136

125-
void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode) {
126-
writeImpl(std::nullopt, RootNode);
137+
void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
138+
uint64_t TotalRootEntryCount) {
139+
if (!IncludeEmpty && (!TotalRootEntryCount || (RootNode.counters_size() > 0 &&
140+
RootNode.entrycount() == 0)))
141+
return;
142+
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ContextRootBlockID, CodeLen);
143+
writeGuid(RootNode.guid());
144+
writeRootEntryCount(TotalRootEntryCount);
145+
writeCounters({RootNode.counters(), RootNode.counters_size()});
146+
writeSubcontexts(RootNode);
147+
Writer.ExitBlock();
127148
}
128149

129150
void PGOCtxProfileWriter::writeFlat(ctx_profile::GUID Guid,
@@ -144,11 +165,15 @@ struct SerializableCtxRepresentation {
144165
std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
145166
};
146167

168+
struct SerializableRootRepresentation : public SerializableCtxRepresentation {
169+
uint64_t TotalRootEntryCount = 0;
170+
};
171+
147172
using SerializableFlatProfileRepresentation =
148173
std::pair<ctx_profile::GUID, std::vector<uint64_t>>;
149174

150175
struct SerializableProfileRepresentation {
151-
std::vector<SerializableCtxRepresentation> Contexts;
176+
std::vector<SerializableRootRepresentation> Contexts;
152177
std::vector<SerializableFlatProfileRepresentation> FlatProfiles;
153178
};
154179

@@ -189,6 +214,7 @@ createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
189214

190215
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableCtxRepresentation)
191216
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<SerializableCtxRepresentation>)
217+
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableRootRepresentation)
192218
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableFlatProfileRepresentation)
193219
template <> struct yaml::MappingTraits<SerializableCtxRepresentation> {
194220
static void mapping(yaml::IO &IO, SerializableCtxRepresentation &SCR) {
@@ -198,6 +224,13 @@ template <> struct yaml::MappingTraits<SerializableCtxRepresentation> {
198224
}
199225
};
200226

227+
template <> struct yaml::MappingTraits<SerializableRootRepresentation> {
228+
static void mapping(yaml::IO &IO, SerializableRootRepresentation &R) {
229+
yaml::MappingTraits<SerializableCtxRepresentation>::mapping(IO, R);
230+
IO.mapRequired("TotalRootEntryCount", R.TotalRootEntryCount);
231+
}
232+
};
233+
201234
template <> struct yaml::MappingTraits<SerializableProfileRepresentation> {
202235
static void mapping(yaml::IO &IO, SerializableProfileRepresentation &SPR) {
203236
IO.mapOptional("Contexts", SPR.Contexts);
@@ -232,7 +265,7 @@ Error llvm::createCtxProfFromYAML(StringRef Profile, raw_ostream &Out) {
232265
if (!TopList)
233266
return createStringError(
234267
"Unexpected error converting internal structure to ctx profile");
235-
Writer.writeContextual(*TopList);
268+
Writer.writeContextual(*TopList, DC.TotalRootEntryCount);
236269
}
237270
Writer.endContextSection();
238271
}

llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
120120
PointerTy, /*FirstNode*/
121121
PointerTy, /*FirstMemBlock*/
122122
PointerTy, /*CurrentMem*/
123+
I64Ty, /*TotalEntries*/
123124
SanitizerMutexType, /*Taken*/
124125
});
125126
FunctionDataTy =

0 commit comments

Comments
 (0)