Skip to content

Commit c4b136b

Browse files
committed
[ctxprof] Capture sampling info for context roots
1 parent bb69499 commit c4b136b

29 files changed

+171
-47
lines changed

compiler-rt/lib/ctx_profile/CtxInstrContextNode.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ class ContextNode final {
120120
class ProfileWriter {
121121
public:
122122
virtual void startContextSection() = 0;
123-
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
123+
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
124+
uint64_t TotalRootEntryCount) = 0;
124125
virtual void endContextSection() = 0;
125126

126127
virtual void startFlatSection() = 0;

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ ContextNode *__llvm_ctx_profile_start_context(
340340
ContextRoot *Root, GUID Guid, uint32_t Counters,
341341
uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
342342
IsUnderContext = true;
343+
__sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
344+
__sanitizer::memory_order_relaxed);
345+
343346
if (!Root->FirstMemBlock) {
344347
setupContext(Root, Guid, Counters, Callsites);
345348
}
@@ -374,6 +377,7 @@ void __llvm_ctx_profile_start_collection() {
374377
++NumMemUnits;
375378

376379
resetContextNode(*Root->FirstNode);
380+
__sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0);
377381
}
378382
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
379383
__sanitizer::Printf("[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
@@ -393,7 +397,8 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
393397
__sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid");
394398
return false;
395399
}
396-
Writer.writeContextual(*Root->FirstNode);
400+
Writer.writeContextual(*Root->FirstNode, __sanitizer::atomic_load_relaxed(
401+
&Root->TotalEntries));
397402
}
398403
Writer.endContextSection();
399404
Writer.startFlatSection();

compiler-rt/lib/ctx_profile/CtxInstrProfiling.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ struct ContextRoot {
8080
ContextNode *FirstNode = nullptr;
8181
Arena *FirstMemBlock = nullptr;
8282
Arena *CurrentMem = nullptr;
83+
84+
// Count the number of entries - regardless if we could take the `Taken` mutex
85+
::__sanitizer::atomic_uint64_t TotalEntries = {};
86+
8387
// This is init-ed by the static zero initializer in LLVM.
8488
// Taken is used to ensure only one thread traverses the contextual graph -
8589
// either to read it or to write it. On server side, the same entrypoint will

compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,9 @@ TEST_F(ContextTest, Dump) {
238238
TestProfileWriter(ContextRoot *Root, size_t Entries)
239239
: Root(Root), Entries(Entries) {}
240240

241-
void writeContextual(const ContextNode &Node) override {
241+
void writeContextual(const ContextNode &Node,
242+
uint64_t TotalRootEntryCount) override {
243+
EXPECT_EQ(TotalRootEntryCount, Entries);
242244
EXPECT_EQ(EnteredSectionCount, 1);
243245
EXPECT_EQ(ExitedSectionCount, 0);
244246
EXPECT_FALSE(Root->Taken.TryLock());

compiler-rt/test/ctx_profile/TestCases/generate-context.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ class TestProfileWriter : public ProfileWriter {
8484
std::cout << "Exited Context Section" << std::endl;
8585
}
8686

87-
void writeContextual(const ContextNode &RootNode) override {
87+
void writeContextual(const ContextNode &RootNode,
88+
uint64_t EntryCount) override {
89+
std::cout << "Entering Root " << RootNode.guid()
90+
<< " with total entry count " << EntryCount << std::endl;
8891
printProfile(RootNode, "", "");
8992
}
9093

@@ -115,6 +118,7 @@ class TestProfileWriter : public ProfileWriter {
115118
// The second context is in the loop. We expect 2 entries and each of the
116119
// branches would be taken once, so the second counter is 1.
117120
// CHECK-NEXT: Entered Context Section
121+
// CHECK-NEXT: Entering Root 8657661246551306189 with total entry count 1
118122
// CHECK-NEXT: Guid: 8657661246551306189
119123
// CHECK-NEXT: Entries: 1
120124
// CHECK-NEXT: 2 counters and 3 callsites

llvm/include/llvm/ProfileData/CtxInstrContextNode.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ class ContextNode final {
120120
class ProfileWriter {
121121
public:
122122
virtual void startContextSection() = 0;
123-
virtual void writeContextual(const ctx_profile::ContextNode &RootNode) = 0;
123+
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
124+
uint64_t TotalRootEntryCount) = 0;
124125
virtual void endContextSection() = 0;
125126

126127
virtual void startFlatSection() = 0;

llvm/include/llvm/ProfileData/PGOCtxProfReader.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,13 @@ class PGOCtxProfContext final : public internal::IndexNode {
9292

9393
GlobalValue::GUID GUID = 0;
9494
SmallVector<uint64_t, 16> Counters;
95+
const std::optional<uint64_t> RootEntryCount;
9596
CallsiteMapTy Callsites;
9697

97-
PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters)
98-
: GUID(G), Counters(std::move(Counters)) {}
98+
PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
99+
std::optional<uint64_t> RootEntryCount = std::nullopt)
100+
: GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount) {
101+
}
99102

100103
Expected<PGOCtxProfContext &>
101104
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
@@ -115,6 +118,9 @@ class PGOCtxProfContext final : public internal::IndexNode {
115118
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
116119
SmallVectorImpl<uint64_t> &counters() { return Counters; }
117120

121+
bool isRoot() const { return RootEntryCount.has_value(); }
122+
uint64_t getTotalRootEntryCount() const { return RootEntryCount.value(); }
123+
118124
uint64_t getEntrycount() const {
119125
assert(!Counters.empty() &&
120126
"Functions are expected to have at their entry BB instrumented, so "

llvm/include/llvm/ProfileData/PGOCtxProfWriter.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,14 @@
1919
#include "llvm/ProfileData/CtxInstrContextNode.h"
2020

2121
namespace llvm {
22-
enum PGOCtxProfileRecords { Invalid = 0, Version, Guid, CalleeIndex, Counters };
22+
enum PGOCtxProfileRecords {
23+
Invalid = 0,
24+
Version,
25+
Guid,
26+
CallsiteIndex,
27+
Counters,
28+
TotalRootEntryCount
29+
};
2330

2431
enum PGOCtxProfileBlockIDs {
2532
FIRST_VALID = bitc::FIRST_APPLICATION_BLOCKID,
@@ -73,9 +80,11 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
7380
const bool IncludeEmpty;
7481

7582
void writeGuid(ctx_profile::GUID Guid);
83+
void writeCallsiteIndex(uint32_t Index);
84+
void writeRootEntryCount(uint64_t EntryCount);
7685
void writeCounters(ArrayRef<uint64_t> Counters);
77-
void writeImpl(std::optional<uint32_t> CallerIndex,
78-
const ctx_profile::ContextNode &Node);
86+
void writeNode(uint32_t CallerIndex, const ctx_profile::ContextNode &Node);
87+
void writeSubcontexts(const ctx_profile::ContextNode &Node);
7988

8089
public:
8190
PGOCtxProfileWriter(raw_ostream &Out,
@@ -84,7 +93,8 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
8493
~PGOCtxProfileWriter() { Writer.ExitBlock(); }
8594

8695
void startContextSection() override;
87-
void writeContextual(const ctx_profile::ContextNode &RootNode) override;
96+
void writeContextual(const ctx_profile::ContextNode &RootNode,
97+
uint64_t TotalRootEntryCount) override;
8898
void endContextSection() override;
8999

90100
void startFlatSection() override;
@@ -94,7 +104,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
94104

95105
// constants used in writing which a reader may find useful.
96106
static constexpr unsigned CodeLen = 2;
97-
static constexpr uint32_t CurrentVersion = 2;
107+
static constexpr uint32_t CurrentVersion = 3;
98108
static constexpr unsigned VBREncodingBits = 6;
99109
static constexpr StringRef ContainerMagic = "CTXP";
100110
};

llvm/lib/ProfileData/PGOCtxProfReader.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,19 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
9696
std::optional<ctx_profile::GUID> Guid;
9797
std::optional<SmallVector<uint64_t, 16>> Counters;
9898
std::optional<uint32_t> CallsiteIndex;
99+
std::optional<uint64_t> TotalEntryCount;
99100

100101
SmallVector<uint64_t, 1> RecordValues;
101102

102103
const bool ExpectIndex = Kind == PGOCtxProfileBlockIDs::ContextNodeBlockID;
104+
const bool IsRoot = Kind == PGOCtxProfileBlockIDs::ContextRootBlockID;
103105
// We don't prescribe the order in which the records come in, and we are ok
104106
// if other unsupported records appear. We seek in the current subblock until
105107
// we get all we know.
106108
auto GotAllWeNeed = [&]() {
107109
return Guid.has_value() && Counters.has_value() &&
108-
(!ExpectIndex || CallsiteIndex.has_value());
110+
(!ExpectIndex || CallsiteIndex.has_value()) &&
111+
(!IsRoot || TotalEntryCount.has_value());
109112
};
110113
while (!GotAllWeNeed()) {
111114
RecordValues.clear();
@@ -127,21 +130,29 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
127130
return wrongValue("Empty counters. At least the entry counter (one "
128131
"value) was expected");
129132
break;
130-
case PGOCtxProfileRecords::CalleeIndex:
133+
case PGOCtxProfileRecords::CallsiteIndex:
131134
if (!ExpectIndex)
132135
return wrongValue("The root context should not have a callee index");
133136
if (RecordValues.size() != 1)
134137
return wrongValue("The callee index should have exactly one value");
135138
CallsiteIndex = RecordValues[0];
136139
break;
140+
case PGOCtxProfileRecords::TotalRootEntryCount:
141+
if (!IsRoot)
142+
return wrongValue("Non-root has a total entry count record");
143+
if (RecordValues.size() != 1)
144+
return wrongValue(
145+
"The root total entry count record should have exactly one value");
146+
TotalEntryCount = RecordValues[0];
147+
break;
137148
default:
138149
// OK if we see records we do not understand, like records (profile
139150
// components) introduced later.
140151
break;
141152
}
142153
}
143154

144-
PGOCtxProfContext Ret(*Guid, std::move(*Counters));
155+
PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount);
145156

146157
while (canEnterBlockWithID(PGOCtxProfileBlockIDs::ContextNodeBlockID)) {
147158
EXPECT_OR_RET(SC, readProfile(PGOCtxProfileBlockIDs::ContextNodeBlockID));
@@ -278,7 +289,8 @@ void toYaml(yaml::Output &Out,
278289

279290
void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
280291
const SmallVectorImpl<uint64_t> &Counters,
281-
const PGOCtxProfContext::CallsiteMapTy &Callsites) {
292+
const PGOCtxProfContext::CallsiteMapTy &Callsites,
293+
std::optional<uint64_t> TotalRootEntryCount = std::nullopt) {
282294
yaml::EmptyContext Empty;
283295
Out.beginMapping();
284296
void *SaveInfo = nullptr;
@@ -289,6 +301,11 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
289301
yaml::yamlize(Out, Guid, true, Empty);
290302
Out.postflightKey(nullptr);
291303
}
304+
if (TotalRootEntryCount) {
305+
Out.preflightKey("TotalRootEntryCount", true, false, UseDefault, SaveInfo);
306+
yaml::yamlize(Out, *TotalRootEntryCount, true, Empty);
307+
Out.postflightKey(nullptr);
308+
}
292309
{
293310
Out.preflightKey("Counters", true, false, UseDefault, SaveInfo);
294311
Out.beginFlowSequence();
@@ -308,8 +325,13 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
308325
}
309326
Out.endMapping();
310327
}
328+
311329
void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
312-
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
330+
if (Ctx.isRoot())
331+
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites(),
332+
Ctx.getTotalRootEntryCount());
333+
else
334+
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
313335
}
314336

315337
} // namespace

llvm/lib/ProfileData/PGOCtxProfWriter.cpp

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,12 @@ PGOCtxProfileWriter::PGOCtxProfileWriter(
5555
DescribeBlock(PGOCtxProfileBlockIDs::ContextsSectionBlockID, "Contexts");
5656
DescribeBlock(PGOCtxProfileBlockIDs::ContextRootBlockID, "Root");
5757
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
58+
DescribeRecord(PGOCtxProfileRecords::TotalRootEntryCount,
59+
"TotalRootEntryCount");
5860
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
5961
DescribeBlock(PGOCtxProfileBlockIDs::ContextNodeBlockID, "Context");
6062
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
61-
DescribeRecord(PGOCtxProfileRecords::CalleeIndex, "CalleeIndex");
63+
DescribeRecord(PGOCtxProfileRecords::CallsiteIndex, "CalleeIndex");
6264
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
6365
DescribeBlock(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID,
6466
"FlatProfiles");
@@ -85,29 +87,39 @@ void PGOCtxProfileWriter::writeGuid(ctx_profile::GUID Guid) {
8587
Writer.EmitRecord(PGOCtxProfileRecords::Guid, SmallVector<uint64_t, 1>{Guid});
8688
}
8789

90+
void PGOCtxProfileWriter::writeCallsiteIndex(uint32_t CallsiteIndex) {
91+
Writer.EmitRecord(PGOCtxProfileRecords::CallsiteIndex,
92+
SmallVector<uint64_t, 1>{CallsiteIndex});
93+
}
94+
95+
void PGOCtxProfileWriter::writeRootEntryCount(uint64_t TotalRootEntryCount) {
96+
Writer.EmitRecord(PGOCtxProfileRecords::TotalRootEntryCount,
97+
SmallVector<uint64_t, 1>{TotalRootEntryCount});
98+
}
99+
88100
// recursively write all the subcontexts. We do need to traverse depth first to
89101
// model the context->subcontext implicitly, and since this captures call
90102
// stacks, we don't really need to be worried about stack overflow and we can
91103
// keep the implementation simple.
92-
void PGOCtxProfileWriter::writeImpl(std::optional<uint32_t> CallerIndex,
104+
void PGOCtxProfileWriter::writeNode(uint32_t CallsiteIndex,
93105
const ContextNode &Node) {
94106
// A node with no counters is an error. We don't expect this to happen from
95107
// the runtime, rather, this is interesting for testing the reader.
96108
if (!IncludeEmpty && (Node.counters_size() > 0 && Node.entrycount() == 0))
97109
return;
98-
Writer.EnterSubblock(CallerIndex ? PGOCtxProfileBlockIDs::ContextNodeBlockID
99-
: PGOCtxProfileBlockIDs::ContextRootBlockID,
100-
CodeLen);
110+
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ContextNodeBlockID, CodeLen);
101111
writeGuid(Node.guid());
102-
if (CallerIndex)
103-
Writer.EmitRecord(PGOCtxProfileRecords::CalleeIndex,
104-
SmallVector<uint64_t, 1>{*CallerIndex});
112+
writeCallsiteIndex(CallsiteIndex);
105113
writeCounters({Node.counters(), Node.counters_size()});
114+
writeSubcontexts(Node);
115+
Writer.ExitBlock();
116+
}
117+
118+
void PGOCtxProfileWriter::writeSubcontexts(const ContextNode &Node) {
106119
for (uint32_t I = 0U; I < Node.callsites_size(); ++I)
107120
for (const auto *Subcontext = Node.subContexts()[I]; Subcontext;
108121
Subcontext = Subcontext->next())
109-
writeImpl(I, *Subcontext);
110-
Writer.ExitBlock();
122+
writeNode(I, *Subcontext);
111123
}
112124

113125
void PGOCtxProfileWriter::startContextSection() {
@@ -122,8 +134,17 @@ void PGOCtxProfileWriter::startFlatSection() {
122134
void PGOCtxProfileWriter::endContextSection() { Writer.ExitBlock(); }
123135
void PGOCtxProfileWriter::endFlatSection() { Writer.ExitBlock(); }
124136

125-
void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode) {
126-
writeImpl(std::nullopt, RootNode);
137+
void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
138+
uint64_t TotalRootEntryCount) {
139+
if (!IncludeEmpty && (!TotalRootEntryCount || (RootNode.counters_size() > 0 &&
140+
RootNode.entrycount() == 0)))
141+
return;
142+
Writer.EnterSubblock(PGOCtxProfileBlockIDs::ContextRootBlockID, CodeLen);
143+
writeGuid(RootNode.guid());
144+
writeRootEntryCount(TotalRootEntryCount);
145+
writeCounters({RootNode.counters(), RootNode.counters_size()});
146+
writeSubcontexts(RootNode);
147+
Writer.ExitBlock();
127148
}
128149

129150
void PGOCtxProfileWriter::writeFlat(ctx_profile::GUID Guid,
@@ -144,11 +165,15 @@ struct SerializableCtxRepresentation {
144165
std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
145166
};
146167

168+
struct SerializableRootRepresentation : public SerializableCtxRepresentation {
169+
uint64_t TotalRootEntryCount = 0;
170+
};
171+
147172
using SerializableFlatProfileRepresentation =
148173
std::pair<ctx_profile::GUID, std::vector<uint64_t>>;
149174

150175
struct SerializableProfileRepresentation {
151-
std::vector<SerializableCtxRepresentation> Contexts;
176+
std::vector<SerializableRootRepresentation> Contexts;
152177
std::vector<SerializableFlatProfileRepresentation> FlatProfiles;
153178
};
154179

@@ -189,6 +214,7 @@ createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
189214

190215
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableCtxRepresentation)
191216
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<SerializableCtxRepresentation>)
217+
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableRootRepresentation)
192218
LLVM_YAML_IS_SEQUENCE_VECTOR(SerializableFlatProfileRepresentation)
193219
template <> struct yaml::MappingTraits<SerializableCtxRepresentation> {
194220
static void mapping(yaml::IO &IO, SerializableCtxRepresentation &SCR) {
@@ -198,6 +224,13 @@ template <> struct yaml::MappingTraits<SerializableCtxRepresentation> {
198224
}
199225
};
200226

227+
template <> struct yaml::MappingTraits<SerializableRootRepresentation> {
228+
static void mapping(yaml::IO &IO, SerializableRootRepresentation &R) {
229+
yaml::MappingTraits<SerializableCtxRepresentation>::mapping(IO, R);
230+
IO.mapRequired("TotalRootEntryCount", R.TotalRootEntryCount);
231+
}
232+
};
233+
201234
template <> struct yaml::MappingTraits<SerializableProfileRepresentation> {
202235
static void mapping(yaml::IO &IO, SerializableProfileRepresentation &SPR) {
203236
IO.mapOptional("Contexts", SPR.Contexts);
@@ -232,7 +265,7 @@ Error llvm::createCtxProfFromYAML(StringRef Profile, raw_ostream &Out) {
232265
if (!TopList)
233266
return createStringError(
234267
"Unexpected error converting internal structure to ctx profile");
235-
Writer.writeContextual(*TopList);
268+
Writer.writeContextual(*TopList, DC.TotalRootEntryCount);
236269
}
237270
Writer.endContextSection();
238271
}

llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
;--- profile.yaml
6262
Contexts:
6363
- Guid: 4909520559318251808
64+
TotalRootEntryCount: 100
6465
Counters: [100, 40]
6566
Callsites: -
6667
- Guid: 11872291593386833696

0 commit comments

Comments
 (0)