Skip to content

Commit a3d027f

Browse files
[MemProf] Disable alloc context in combined summary for ndebug builds (#139161)
Since we currently only use the context information in the alloc info summary in the LTO backend for assertion checking, there is no need to write this into the combined summary index for distributed ThinLTO for NDEBUG builds. Put this under a new -combined-index-memprof-context option which is off by default for NDEBUG. The advantage is that we save time (not having to sort in preparation for building the radix trees), and space in the generated bitcode files. We could also do so for the callsite info records, but those are smaller and less expensive to prepare.
1 parent 34ecc4b commit a3d027f

File tree

7 files changed

+116
-63
lines changed

7 files changed

+116
-63
lines changed

llvm/include/llvm/Bitcode/LLVMBitCodes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,11 @@ enum GlobalValueSummarySymtabCodes {
335335
// CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
336336
// [n x entry]
337337
FS_CONTEXT_RADIX_TREE_ARRAY = 32,
338+
// Summary of combined index allocation memprof metadata, without context.
339+
// [nummib, numver,
340+
// nummib x alloc type,
341+
// numver x version]
342+
FS_COMBINED_ALLOC_INFO_NO_CONTEXT = 33,
338343
};
339344

340345
enum MetadataCodes {

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10781,12 +10781,15 @@ bool LLParser::parseMemProfs(std::vector<MIBInfo> &MIBs) {
1078110781
return true;
1078210782

1078310783
SmallVector<unsigned> StackIdIndices;
10784-
do {
10785-
uint64_t StackId = 0;
10786-
if (parseUInt64(StackId))
10787-
return true;
10788-
StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
10789-
} while (EatIfPresent(lltok::comma));
10784+
// Combined index alloc records may not have a stack id list.
10785+
if (Lex.getKind() != lltok::rparen) {
10786+
do {
10787+
uint64_t StackId = 0;
10788+
if (parseUInt64(StackId))
10789+
return true;
10790+
StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
10791+
} while (EatIfPresent(lltok::comma));
10792+
}
1079010793

1079110794
if (parseToken(lltok::rparen, "expected ')' in stackIds"))
1079210795
return true;

llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
330330
STRINGIFY_CODE(FS, STACK_IDS)
331331
STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
332332
STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
333+
STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_NO_CONTEXT)
333334
}
334335
case bitc::METADATA_ATTACHMENT_ID:
335336
switch (CodeID) {

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8178,7 +8178,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
81788178
break;
81798179
}
81808180

8181-
case bitc::FS_COMBINED_ALLOC_INFO: {
8181+
case bitc::FS_COMBINED_ALLOC_INFO:
8182+
case bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT: {
81828183
unsigned I = 0;
81838184
std::vector<MIBInfo> MIBs;
81848185
unsigned NumMIBs = Record[I++];
@@ -8187,7 +8188,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
81878188
while (MIBsRead++ < NumMIBs) {
81888189
assert(Record.size() - I >= 2);
81898190
AllocationType AllocType = (AllocationType)Record[I++];
8190-
auto StackIdList = parseAllocInfoContext(Record, I);
8191+
SmallVector<unsigned> StackIdList;
8192+
if (BitCode == bitc::FS_COMBINED_ALLOC_INFO)
8193+
StackIdList = parseAllocInfoContext(Record, I);
81918194
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
81928195
}
81938196
assert(Record.size() - I >= NumVersions);

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Lines changed: 83 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,22 @@ static cl::opt<bool> WriteRelBFToSummary(
9999
"write-relbf-to-summary", cl::Hidden, cl::init(false),
100100
cl::desc("Write relative block frequency to function summary "));
101101

102+
// Since we only use the context information in the memprof summary records in
103+
// the LTO backends to do assertion checking, save time and space by only
104+
// serializing the context for non-NDEBUG builds.
105+
// TODO: Currently this controls writing context of the allocation info records,
106+
// which are larger and more expensive, but we should do this for the callsite
107+
// records as well.
108+
// FIXME: Convert to a const once this has undergone more sigificant testing.
109+
static cl::opt<bool>
110+
CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden,
111+
#ifdef NDEBUG
112+
cl::init(false),
113+
#else
114+
cl::init(true),
115+
#endif
116+
cl::desc(""));
117+
102118
namespace llvm {
103119
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
104120
}
@@ -528,10 +544,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
528544
for (auto Idx : CI.StackIdIndices)
529545
RecordStackIdReference(Idx);
530546
}
531-
for (auto &AI : FS->allocs())
532-
for (auto &MIB : AI.MIBs)
533-
for (auto Idx : MIB.StackIdIndices)
534-
RecordStackIdReference(Idx);
547+
if (CombinedIndexMemProfContext) {
548+
for (auto &AI : FS->allocs())
549+
for (auto &MIB : AI.MIBs)
550+
for (auto Idx : MIB.StackIdIndices)
551+
RecordStackIdReference(Idx);
552+
}
535553
});
536554
}
537555

@@ -4349,9 +4367,14 @@ static void writeFunctionHeapProfileRecords(
43494367
Record.push_back(AI.Versions.size());
43504368
for (auto &MIB : AI.MIBs) {
43514369
Record.push_back((uint8_t)MIB.AllocType);
4352-
// Record the index into the radix tree array for this context.
4353-
assert(CallStackCount <= CallStackPos.size());
4354-
Record.push_back(CallStackPos[CallStackCount++]);
4370+
// The per-module summary always needs to include the alloc context, as we
4371+
// use it during the thin link. For the combined index it is optional (see
4372+
// comments where CombinedIndexMemProfContext is defined).
4373+
if (PerModule || CombinedIndexMemProfContext) {
4374+
// Record the index into the radix tree array for this context.
4375+
assert(CallStackCount <= CallStackPos.size());
4376+
Record.push_back(CallStackPos[CallStackCount++]);
4377+
}
43554378
}
43564379
if (!PerModule)
43574380
llvm::append_range(Record, AI.Versions);
@@ -4384,8 +4407,11 @@ static void writeFunctionHeapProfileRecords(
43844407
Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
43854408
ContextIdAbbvId);
43864409
}
4387-
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
4388-
: bitc::FS_COMBINED_ALLOC_INFO,
4410+
Stream.EmitRecord(PerModule
4411+
? bitc::FS_PERMODULE_ALLOC_INFO
4412+
: (CombinedIndexMemProfContext
4413+
? bitc::FS_COMBINED_ALLOC_INFO
4414+
: bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
43894415
Record, AllocAbbrev);
43904416
}
43914417
}
@@ -4847,7 +4873,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48474873
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
48484874

48494875
Abbv = std::make_shared<BitCodeAbbrev>();
4850-
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
4876+
Abbv->Add(BitCodeAbbrevOp(CombinedIndexMemProfContext
4877+
? bitc::FS_COMBINED_ALLOC_INFO
4878+
: bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
48514879
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
48524880
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
48534881
// nummib x (alloc type, context radix tree index),
@@ -4857,13 +4885,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48574885
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
48584886
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
48594887

4860-
Abbv = std::make_shared<BitCodeAbbrev>();
4861-
Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4862-
// n x entry
4863-
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
4864-
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
4865-
unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
4866-
48674888
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
48684889
if (DecSummaries == nullptr)
48694890
return false;
@@ -4900,44 +4921,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
49004921
NameVals.clear();
49014922
};
49024923

4903-
// First walk through all the functions and collect the allocation contexts in
4904-
// their associated summaries, for use in constructing a radix tree of
4905-
// contexts. Note that we need to do this in the same order as the functions
4906-
// are processed further below since the call stack positions in the resulting
4907-
// radix tree array are identified based on this order.
4908-
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4909-
forEachSummary([&](GVInfo I, bool IsAliasee) {
4910-
// Don't collect this when invoked for an aliasee, as it is not needed for
4911-
// the alias summary. If the aliasee is to be imported, we will invoke this
4912-
// separately with IsAliasee=false.
4913-
if (IsAliasee)
4914-
return;
4915-
GlobalValueSummary *S = I.second;
4916-
assert(S);
4917-
auto *FS = dyn_cast<FunctionSummary>(S);
4918-
if (!FS)
4919-
return;
4920-
collectMemProfCallStacks(
4921-
FS,
4922-
/*GetStackIndex*/
4923-
[&](unsigned I) {
4924-
// Get the corresponding index into the list of StackIds actually
4925-
// being written for this combined index (which may be a subset in
4926-
// the case of distributed indexes).
4927-
assert(StackIdIndicesToIndex.contains(I));
4928-
return StackIdIndicesToIndex[I];
4929-
},
4930-
CallStacks);
4931-
});
4932-
// Finalize the radix tree, write it out, and get the map of positions in the
4933-
// linearized tree array.
49344924
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4935-
if (!CallStacks.empty()) {
4936-
CallStackPos =
4937-
writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
4925+
if (CombinedIndexMemProfContext) {
4926+
Abbv = std::make_shared<BitCodeAbbrev>();
4927+
Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4928+
// n x entry
4929+
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
4930+
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
4931+
unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
4932+
4933+
// First walk through all the functions and collect the allocation contexts
4934+
// in their associated summaries, for use in constructing a radix tree of
4935+
// contexts. Note that we need to do this in the same order as the functions
4936+
// are processed further below since the call stack positions in the
4937+
// resulting radix tree array are identified based on this order.
4938+
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4939+
forEachSummary([&](GVInfo I, bool IsAliasee) {
4940+
// Don't collect this when invoked for an aliasee, as it is not needed for
4941+
// the alias summary. If the aliasee is to be imported, we will invoke
4942+
// this separately with IsAliasee=false.
4943+
if (IsAliasee)
4944+
return;
4945+
GlobalValueSummary *S = I.second;
4946+
assert(S);
4947+
auto *FS = dyn_cast<FunctionSummary>(S);
4948+
if (!FS)
4949+
return;
4950+
collectMemProfCallStacks(
4951+
FS,
4952+
/*GetStackIndex*/
4953+
[&](unsigned I) {
4954+
// Get the corresponding index into the list of StackIds actually
4955+
// being written for this combined index (which may be a subset in
4956+
// the case of distributed indexes).
4957+
assert(StackIdIndicesToIndex.contains(I));
4958+
return StackIdIndicesToIndex[I];
4959+
},
4960+
CallStacks);
4961+
});
4962+
// Finalize the radix tree, write it out, and get the map of positions in
4963+
// the linearized tree array.
4964+
if (!CallStacks.empty()) {
4965+
CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream,
4966+
RadixAbbrev);
4967+
}
49384968
}
49394969

4940-
// Keep track of the current index into the CallStackPos map.
4970+
// Keep track of the current index into the CallStackPos map. Not used if
4971+
// CombinedIndexMemProfContext is false.
49414972
CallStackId CallStackCount = 0;
49424973

49434974
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;

llvm/test/Assembler/thinlto-memprof-summary.ll

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
;; Test memprof summary parsing (tests all types/fields in various combinations).
2-
; RUN: llvm-as %s -o - | llvm-dis -o - | FileCheck %s
2+
3+
;; Force enable -combined-index-memprof-context to get the allocation context
4+
;; stack ids even in release builds.
5+
; RUN: llvm-as %s -o - -combined-index-memprof-context | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,CONTEXT
6+
7+
;; Force disable -combined-index-memprof-context to block the allocation context
8+
;; stack ids even in non-release builds.
9+
; RUN: llvm-as %s -o - -combined-index-memprof-context=false | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,NOCONTEXT
310

411
; ModuleID = 'thinlto-memprof-summary.thinlto.bc'
512

@@ -17,8 +24,10 @@
1724

1825
; Make sure we get back from llvm-dis what we put in via llvm-as.
1926
; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049))
20-
; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
27+
; CONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
28+
; NOCONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()), (type: hot, stackIds: ())))))))
2129
; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789))))))
22-
; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
30+
; CONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
31+
; NOCONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: ()), (type: notcold, stackIds: ())))))))
2332
; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789))))))
2433
; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414))))))

llvm/test/ThinLTO/X86/memprof_direct_recursion.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
; RUN: llvm-lto2 run %t/b.o %t/a.o -enable-memprof-context-disambiguation \
3434
; RUN: -supports-hot-cold-new \
3535
; RUN: -thinlto-distributed-indexes \
36+
; RUN: -combined-index-memprof-context \
3637
; RUN: -r=%t/b.o,_Z3fooi,plx \
3738
; RUN: -r=%t/b.o,aliasee,plx \
3839
; RUN: -r=%t/b.o,a \

0 commit comments

Comments
 (0)