@@ -99,6 +99,22 @@ static cl::opt<bool> WriteRelBFToSummary(
99
99
" write-relbf-to-summary" , cl::Hidden, cl::init(false ),
100
100
cl::desc(" Write relative block frequency to function summary " ));
101
101
102
+ // Since we only use the context information in the memprof summary records in
103
+ // the LTO backends to do assertion checking, save time and space by only
104
+ // serializing the context for non-NDEBUG builds.
105
+ // TODO: Currently this controls writing context of the allocation info records,
106
+ // which are larger and more expensive, but we should do this for the callsite
107
+ // records as well.
108
+ // FIXME: Convert to a const once this has undergone more sigificant testing.
109
+ static cl::opt<bool >
110
+ CombinedIndexMemProfContext (" combined-index-memprof-context" , cl::Hidden,
111
+ #ifdef NDEBUG
112
+ cl::init (false ),
113
+ #else
114
+ cl::init (true ),
115
+ #endif
116
+ cl::desc (" " ));
117
+
102
118
namespace llvm {
103
119
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
104
120
}
@@ -528,10 +544,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
528
544
for (auto Idx : CI.StackIdIndices )
529
545
RecordStackIdReference (Idx);
530
546
}
531
- for (auto &AI : FS->allocs ())
532
- for (auto &MIB : AI.MIBs )
533
- for (auto Idx : MIB.StackIdIndices )
534
- RecordStackIdReference (Idx);
547
+ if (CombinedIndexMemProfContext) {
548
+ for (auto &AI : FS->allocs ())
549
+ for (auto &MIB : AI.MIBs )
550
+ for (auto Idx : MIB.StackIdIndices )
551
+ RecordStackIdReference (Idx);
552
+ }
535
553
});
536
554
}
537
555
@@ -4349,9 +4367,14 @@ static void writeFunctionHeapProfileRecords(
4349
4367
Record.push_back (AI.Versions .size ());
4350
4368
for (auto &MIB : AI.MIBs ) {
4351
4369
Record.push_back ((uint8_t )MIB.AllocType );
4352
- // Record the index into the radix tree array for this context.
4353
- assert (CallStackCount <= CallStackPos.size ());
4354
- Record.push_back (CallStackPos[CallStackCount++]);
4370
+ // The per-module summary always needs to include the alloc context, as we
4371
+ // use it during the thin link. For the combined index it is optional (see
4372
+ // comments where CombinedIndexMemProfContext is defined).
4373
+ if (PerModule || CombinedIndexMemProfContext) {
4374
+ // Record the index into the radix tree array for this context.
4375
+ assert (CallStackCount <= CallStackPos.size ());
4376
+ Record.push_back (CallStackPos[CallStackCount++]);
4377
+ }
4355
4378
}
4356
4379
if (!PerModule)
4357
4380
llvm::append_range (Record, AI.Versions );
@@ -4384,8 +4407,11 @@ static void writeFunctionHeapProfileRecords(
4384
4407
Stream.EmitRecord (bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
4385
4408
ContextIdAbbvId);
4386
4409
}
4387
- Stream.EmitRecord (PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
4388
- : bitc::FS_COMBINED_ALLOC_INFO,
4410
+ Stream.EmitRecord (PerModule
4411
+ ? bitc::FS_PERMODULE_ALLOC_INFO
4412
+ : (CombinedIndexMemProfContext
4413
+ ? bitc::FS_COMBINED_ALLOC_INFO
4414
+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
4389
4415
Record, AllocAbbrev);
4390
4416
}
4391
4417
}
@@ -4847,7 +4873,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
4847
4873
unsigned CallsiteAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4848
4874
4849
4875
Abbv = std::make_shared<BitCodeAbbrev>();
4850
- Abbv->Add (BitCodeAbbrevOp (bitc::FS_COMBINED_ALLOC_INFO));
4876
+ Abbv->Add (BitCodeAbbrevOp (CombinedIndexMemProfContext
4877
+ ? bitc::FS_COMBINED_ALLOC_INFO
4878
+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
4851
4879
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
4852
4880
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // numver
4853
4881
// nummib x (alloc type, context radix tree index),
@@ -4857,13 +4885,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
4857
4885
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4858
4886
unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4859
4887
4860
- Abbv = std::make_shared<BitCodeAbbrev>();
4861
- Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4862
- // n x entry
4863
- Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4864
- Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4865
- unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4866
-
4867
4888
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
4868
4889
if (DecSummaries == nullptr )
4869
4890
return false ;
@@ -4900,44 +4921,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
4900
4921
NameVals.clear ();
4901
4922
};
4902
4923
4903
- // First walk through all the functions and collect the allocation contexts in
4904
- // their associated summaries, for use in constructing a radix tree of
4905
- // contexts. Note that we need to do this in the same order as the functions
4906
- // are processed further below since the call stack positions in the resulting
4907
- // radix tree array are identified based on this order.
4908
- MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4909
- forEachSummary ([&](GVInfo I, bool IsAliasee) {
4910
- // Don't collect this when invoked for an aliasee, as it is not needed for
4911
- // the alias summary. If the aliasee is to be imported, we will invoke this
4912
- // separately with IsAliasee=false.
4913
- if (IsAliasee)
4914
- return ;
4915
- GlobalValueSummary *S = I.second ;
4916
- assert (S);
4917
- auto *FS = dyn_cast<FunctionSummary>(S);
4918
- if (!FS)
4919
- return ;
4920
- collectMemProfCallStacks (
4921
- FS,
4922
- /* GetStackIndex*/
4923
- [&](unsigned I) {
4924
- // Get the corresponding index into the list of StackIds actually
4925
- // being written for this combined index (which may be a subset in
4926
- // the case of distributed indexes).
4927
- assert (StackIdIndicesToIndex.contains (I));
4928
- return StackIdIndicesToIndex[I];
4929
- },
4930
- CallStacks);
4931
- });
4932
- // Finalize the radix tree, write it out, and get the map of positions in the
4933
- // linearized tree array.
4934
4924
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4935
- if (!CallStacks.empty ()) {
4936
- CallStackPos =
4937
- writeMemoryProfileRadixTree (std::move (CallStacks), Stream, RadixAbbrev);
4925
+ if (CombinedIndexMemProfContext) {
4926
+ Abbv = std::make_shared<BitCodeAbbrev>();
4927
+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4928
+ // n x entry
4929
+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4930
+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4931
+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4932
+
4933
+ // First walk through all the functions and collect the allocation contexts
4934
+ // in their associated summaries, for use in constructing a radix tree of
4935
+ // contexts. Note that we need to do this in the same order as the functions
4936
+ // are processed further below since the call stack positions in the
4937
+ // resulting radix tree array are identified based on this order.
4938
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4939
+ forEachSummary ([&](GVInfo I, bool IsAliasee) {
4940
+ // Don't collect this when invoked for an aliasee, as it is not needed for
4941
+ // the alias summary. If the aliasee is to be imported, we will invoke
4942
+ // this separately with IsAliasee=false.
4943
+ if (IsAliasee)
4944
+ return ;
4945
+ GlobalValueSummary *S = I.second ;
4946
+ assert (S);
4947
+ auto *FS = dyn_cast<FunctionSummary>(S);
4948
+ if (!FS)
4949
+ return ;
4950
+ collectMemProfCallStacks (
4951
+ FS,
4952
+ /* GetStackIndex*/
4953
+ [&](unsigned I) {
4954
+ // Get the corresponding index into the list of StackIds actually
4955
+ // being written for this combined index (which may be a subset in
4956
+ // the case of distributed indexes).
4957
+ assert (StackIdIndicesToIndex.contains (I));
4958
+ return StackIdIndicesToIndex[I];
4959
+ },
4960
+ CallStacks);
4961
+ });
4962
+ // Finalize the radix tree, write it out, and get the map of positions in
4963
+ // the linearized tree array.
4964
+ if (!CallStacks.empty ()) {
4965
+ CallStackPos = writeMemoryProfileRadixTree (std::move (CallStacks), Stream,
4966
+ RadixAbbrev);
4967
+ }
4938
4968
}
4939
4969
4940
- // Keep track of the current index into the CallStackPos map.
4970
+ // Keep track of the current index into the CallStackPos map. Not used if
4971
+ // CombinedIndexMemProfContext is false.
4941
4972
CallStackId CallStackCount = 0 ;
4942
4973
4943
4974
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
0 commit comments