Skip to content

Commit 4745945

Browse files
committed
[MemProf] ThinLTO summary support
Implements the ThinLTO summary support for memprof related metadata. This includes support for the assembly format, and for building the summary from IR during ModuleSummaryAnalysis. To reduce space in both the bitcode format and the in memory index, we do 2 things: 1. We keep a single vector of all uniq stack id hashes, and record the index into this vector in the callsite and allocation memprof summaries. 2. When building the combined index during the LTO link, the callsite and allocation memprof summaries are only kept on the FunctionSummary of the prevailing copy. Differential Revision: https://reviews.llvm.org/D135714
1 parent a8673b7 commit 4745945

21 files changed

+1296
-48
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,12 @@
1717
#include "llvm/IR/InstrTypes.h"
1818
#include "llvm/IR/Metadata.h"
1919
#include "llvm/IR/Module.h"
20+
#include "llvm/IR/ModuleSummaryIndex.h"
2021
#include <map>
2122

2223
namespace llvm {
2324
namespace memprof {
2425

25-
// Allocation type assigned to an allocation reached by a given context.
26-
// More can be added but initially this is just noncold and cold.
27-
// Values should be powers of two so that they can be ORed, in particular to
28-
// track allocations that have different behavior with different calling
29-
// contexts.
30-
enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
31-
3226
/// Return the allocation type for a given set of memory profile values.
3327
AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize,
3428
uint64_t MinLifetime);
@@ -106,6 +100,62 @@ class CallStackTrie {
106100
bool buildAndAttachMIBMetadata(CallBase *CI);
107101
};
108102

103+
/// Helper class to iterate through stack ids in both metadata (memprof MIB and
104+
/// callsite) and the corresponding ThinLTO summary data structures
105+
/// (CallsiteInfo and MIBInfo). This simplifies implementation of client code
106+
/// which doesn't need to worry about whether we are operating with IR (Regular
107+
/// LTO), or summary (ThinLTO).
108+
template <class NodeT, class IteratorT> class CallStack {
109+
public:
110+
CallStack(const NodeT *N = nullptr) : N(N) {}
111+
112+
// Implement minimum required methods for range-based for loop.
113+
// The default implementation assumes we are operating on ThinLTO data
114+
// structures, which have a vector of StackIdIndices. There are specialized
115+
// versions provided to iterate through metadata.
116+
struct CallStackIterator {
117+
const NodeT *N = nullptr;
118+
IteratorT Iter;
119+
CallStackIterator(const NodeT *N, bool End) : N(N) {
120+
if (!N)
121+
return;
122+
Iter = End ? N->StackIdIndices.end() : N->StackIdIndices.begin();
123+
}
124+
uint64_t operator*() {
125+
assert(Iter != N->StackIdIndices.end());
126+
return *Iter;
127+
}
128+
bool operator==(const CallStackIterator &rhs) { return Iter == rhs.Iter; }
129+
bool operator!=(const CallStackIterator &rhs) { return !(*this == rhs); }
130+
void operator++() { ++Iter; }
131+
};
132+
133+
bool empty() const { return N == nullptr; }
134+
135+
CallStackIterator begin() const {
136+
return CallStackIterator(N, /*End*/ false);
137+
}
138+
CallStackIterator end() const { return CallStackIterator(N, /*End*/ true); }
139+
140+
CallStackIterator beginAfterSharedPrefix(CallStack &Other) {
141+
CallStackIterator Cur = begin();
142+
for (CallStackIterator OtherCur = Other.begin();
143+
Cur != end() && OtherCur != Other.end(); ++Cur, ++OtherCur)
144+
assert(*Cur == *OtherCur);
145+
return Cur;
146+
}
147+
148+
private:
149+
const NodeT *N = nullptr;
150+
};
151+
152+
/// Specializations for iterating through IR metadata stack contexts.
153+
template <>
154+
CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
155+
const MDNode *N, bool End);
156+
template <>
157+
uint64_t CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*();
158+
109159
} // end namespace memprof
110160
} // end namespace llvm
111161

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,10 @@ namespace llvm {
406406
void addGlobalValueToIndex(std::string Name, GlobalValue::GUID,
407407
GlobalValue::LinkageTypes Linkage, unsigned ID,
408408
std::unique_ptr<GlobalValueSummary> Summary);
409+
bool parseOptionalAllocs(std::vector<AllocInfo> &Allocs);
410+
bool parseMemProfs(std::vector<MIBInfo> &MIBs);
411+
bool parseAllocType(uint8_t &AllocType);
412+
bool parseOptionalCallsites(std::vector<CallsiteInfo> &Callsites);
409413

410414
// Type Parsing.
411415
bool parseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);

llvm/include/llvm/AsmParser/LLToken.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,15 @@ enum Kind {
406406
kw_byte,
407407
kw_bit,
408408
kw_varFlags,
409+
// The following are used by MemProf summary info.
410+
kw_callsites,
411+
kw_clones,
412+
kw_stackIds,
413+
kw_allocs,
414+
kw_versions,
415+
kw_memProf,
416+
kw_notcold,
417+
kw_notcoldandcold,
409418

410419
// GV's with __attribute__((no_sanitize("address"))), or things in
411420
// -fsanitize-ignorelist when built with ASan.

llvm/include/llvm/AsmParser/Parser.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,17 @@ ParsedModuleAndIndex parseAssemblyFileWithIndexNoUpgradeDebugInfo(
105105
std::unique_ptr<ModuleSummaryIndex>
106106
parseSummaryIndexAssemblyFile(StringRef Filename, SMDiagnostic &Err);
107107

108+
/// The function is a secondary interface to the LLVM Assembly Parser. It parses
109+
/// an ASCII string that (presumably) contains LLVM Assembly code for a module
110+
/// summary. It returns a a ModuleSummaryIndex with the corresponding features.
111+
/// Note that this does not verify that the generated Index is valid, so you
112+
/// should run the verifier after parsing the file to check that it is okay.
113+
/// Parse LLVM Assembly from a string
114+
/// \param AsmString The string containing assembly
115+
/// \param Err Error result info.
116+
std::unique_ptr<ModuleSummaryIndex>
117+
parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
118+
108119
/// parseAssemblyFile and parseAssemblyString are wrappers around this function.
109120
/// Parse LLVM Assembly from a MemoryBuffer.
110121
/// \param F The MemoryBuffer containing assembly

llvm/include/llvm/Bitcode/BitcodeReader.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/ArrayRef.h"
1717
#include "llvm/ADT/StringRef.h"
1818
#include "llvm/Bitstream/BitCodeEnums.h"
19+
#include "llvm/IR/GlobalValue.h"
1920
#include "llvm/Support/Endian.h"
2021
#include "llvm/Support/Error.h"
2122
#include "llvm/Support/ErrorOr.h"
@@ -117,8 +118,10 @@ typedef llvm::function_ref<Optional<std::string>(StringRef)>
117118

118119
/// Parse the specified bitcode buffer and merge its module summary index
119120
/// into CombinedIndex.
120-
Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
121-
uint64_t ModuleId);
121+
Error
122+
readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
123+
uint64_t ModuleId,
124+
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
122125
};
123126

124127
struct BitcodeFileContents {

llvm/include/llvm/Bitcode/LLVMBitCodes.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,22 @@ enum GlobalValueSummarySymtabCodes {
301301
// Range information for accessed offsets for every argument.
302302
// [n x (paramno, range, numcalls, numcalls x (callee_guid, paramno, range))]
303303
FS_PARAM_ACCESS = 25,
304+
// Summary of per-module memprof callsite metadata.
305+
// [valueid, n x stackidindex]
306+
FS_PERMODULE_CALLSITE_INFO = 26,
307+
// Summary of per-module allocation memprof metadata.
308+
// [n x (alloc type, nummib, nummib x stackidindex)]
309+
FS_PERMODULE_ALLOC_INFO = 27,
310+
// Summary of combined index memprof callsite metadata.
311+
// [valueid, numstackindices, numver,
312+
// numstackindices x stackidindex, numver x version]
313+
FS_COMBINED_CALLSITE_INFO = 28,
314+
// Summary of combined index allocation memprof metadata.
315+
// [nummib, numver,
316+
// nummib x (alloc type, numstackids, numstackids x stackidindex),
317+
// numver x version]
318+
FS_COMBINED_ALLOC_INFO = 29,
319+
FS_STACK_IDS = 30,
304320
};
305321

306322
enum MetadataCodes {

llvm/include/llvm/IR/ModuleSummaryIndex.h

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/DenseMap.h"
2020
#include "llvm/ADT/STLExtras.h"
2121
#include "llvm/ADT/SmallString.h"
22+
#include "llvm/ADT/SmallVector.h"
2223
#include "llvm/ADT/StringExtras.h"
2324
#include "llvm/ADT/StringMap.h"
2425
#include "llvm/ADT/StringRef.h"
@@ -284,6 +285,79 @@ template <> struct DenseMapInfo<ValueInfo> {
284285
static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
285286
};
286287

288+
/// Summary of memprof callsite metadata.
289+
struct CallsiteInfo {
290+
// Actual callee function.
291+
ValueInfo Callee;
292+
293+
// Used to record whole program analysis cloning decisions.
294+
// The ThinLTO backend will need to create as many clones as there are entries
295+
// in the vector (it is expected and should be confirmed that all such
296+
// summaries in the same FunctionSummary have the same number of entries).
297+
// Each index records version info for the corresponding clone of this
298+
// function. The value is the callee clone it calls (becomes the appended
299+
// suffix id). Index 0 is the original version, and a value of 0 calls the
300+
// original callee.
301+
SmallVector<unsigned> Clones{0};
302+
303+
// Represents stack ids in this context, recorded as indices into the
304+
// StackIds vector in the summary index, which in turn holds the full 64-bit
305+
// stack ids. This reduces memory as there are in practice far fewer unique
306+
// stack ids than stack id references.
307+
SmallVector<unsigned> StackIdIndices;
308+
309+
CallsiteInfo(ValueInfo Callee, SmallVector<unsigned> StackIdIndices)
310+
: Callee(Callee), StackIdIndices(std::move(StackIdIndices)) {}
311+
CallsiteInfo(ValueInfo Callee, SmallVector<unsigned> Clones,
312+
SmallVector<unsigned> StackIdIndices)
313+
: Callee(Callee), Clones(std::move(Clones)),
314+
StackIdIndices(std::move(StackIdIndices)) {}
315+
};
316+
317+
// Allocation type assigned to an allocation reached by a given context.
318+
// More can be added but initially this is just noncold and cold.
319+
// Values should be powers of two so that they can be ORed, in particular to
320+
// track allocations that have different behavior with different calling
321+
// contexts.
322+
enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
323+
324+
/// Summary of a single MIB in a memprof metadata on allocations.
325+
struct MIBInfo {
326+
// The allocation type for this profiled context.
327+
AllocationType AllocType;
328+
329+
// Represents stack ids in this context, recorded as indices into the
330+
// StackIds vector in the summary index, which in turn holds the full 64-bit
331+
// stack ids. This reduces memory as there are in practice far fewer unique
332+
// stack ids than stack id references.
333+
SmallVector<unsigned> StackIdIndices;
334+
335+
MIBInfo(AllocationType AllocType, SmallVector<unsigned> StackIdIndices)
336+
: AllocType(AllocType), StackIdIndices(std::move(StackIdIndices)) {}
337+
};
338+
339+
/// Summary of memprof metadata on allocations.
340+
struct AllocInfo {
341+
// Used to record whole program analysis cloning decisions.
342+
// The ThinLTO backend will need to create as many clones as there are entries
343+
// in the vector (it is expected and should be confirmed that all such
344+
// summaries in the same FunctionSummary have the same number of entries).
345+
// Each index records version info for the corresponding clone of this
346+
// function. The value is the allocation type of the corresponding allocation.
347+
// Index 0 is the original version. Before cloning, index 0 may have more than
348+
// one allocation type.
349+
SmallVector<uint8_t> Versions;
350+
351+
// Vector of MIBs in this memprof metadata.
352+
std::vector<MIBInfo> MIBs;
353+
354+
AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
355+
Versions.push_back(0);
356+
}
357+
AllocInfo(SmallVector<uint8_t> Versions, std::vector<MIBInfo> MIBs)
358+
: Versions(std::move(Versions)), MIBs(std::move(MIBs)) {}
359+
};
360+
287361
/// Function and variable summary information to aid decisions and
288362
/// implementation of importing.
289363
class GlobalValueSummary {
@@ -678,7 +752,8 @@ class FunctionSummary : public GlobalValueSummary {
678752
std::vector<FunctionSummary::VFuncId>(),
679753
std::vector<FunctionSummary::ConstVCall>(),
680754
std::vector<FunctionSummary::ConstVCall>(),
681-
std::vector<FunctionSummary::ParamAccess>());
755+
std::vector<FunctionSummary::ParamAccess>(),
756+
std::vector<CallsiteInfo>(), std::vector<AllocInfo>());
682757
}
683758

684759
/// A dummy node to reference external functions that aren't in the index
@@ -706,6 +781,25 @@ class FunctionSummary : public GlobalValueSummary {
706781
using ParamAccessesTy = std::vector<ParamAccess>;
707782
std::unique_ptr<ParamAccessesTy> ParamAccesses;
708783

784+
/// Optional list of memprof callsite metadata summaries. The correspondence
785+
/// between the callsite summary and the callsites in the function is implied
786+
/// by the order in the vector (and can be validated by comparing the stack
787+
/// ids in the CallsiteInfo to those in the instruction callsite metadata).
788+
/// As a memory savings optimization, we only create these for the prevailing
789+
/// copy of a symbol when creating the combined index during LTO.
790+
using CallsitesTy = std::vector<CallsiteInfo>;
791+
std::unique_ptr<CallsitesTy> Callsites;
792+
793+
/// Optional list of allocation memprof metadata summaries. The correspondence
794+
/// between the alloc memprof summary and the allocation callsites in the
795+
/// function is implied by the order in the vector (and can be validated by
796+
/// comparing the stack ids in the AllocInfo to those in the instruction
797+
/// memprof metadata).
798+
/// As a memory savings optimization, we only create these for the prevailing
799+
/// copy of a symbol when creating the combined index during LTO.
800+
using AllocsTy = std::vector<AllocInfo>;
801+
std::unique_ptr<AllocsTy> Allocs;
802+
709803
public:
710804
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
711805
uint64_t EntryCount, std::vector<ValueInfo> Refs,
@@ -715,7 +809,8 @@ class FunctionSummary : public GlobalValueSummary {
715809
std::vector<VFuncId> TypeCheckedLoadVCalls,
716810
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
717811
std::vector<ConstVCall> TypeCheckedLoadConstVCalls,
718-
std::vector<ParamAccess> Params)
812+
std::vector<ParamAccess> Params, CallsitesTy CallsiteList,
813+
AllocsTy AllocList)
719814
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
720815
InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
721816
CallGraphEdgeList(std::move(CGEdges)) {
@@ -729,6 +824,10 @@ class FunctionSummary : public GlobalValueSummary {
729824
std::move(TypeCheckedLoadConstVCalls)});
730825
if (!Params.empty())
731826
ParamAccesses = std::make_unique<ParamAccessesTy>(std::move(Params));
827+
if (!CallsiteList.empty())
828+
Callsites = std::make_unique<CallsitesTy>(std::move(CallsiteList));
829+
if (!AllocList.empty())
830+
Allocs = std::make_unique<AllocsTy>(std::move(AllocList));
732831
}
733832
// Gets the number of readonly and writeonly refs in RefEdgeList
734833
std::pair<unsigned, unsigned> specialRefCounts() const;
@@ -832,6 +931,18 @@ class FunctionSummary : public GlobalValueSummary {
832931

833932
const TypeIdInfo *getTypeIdInfo() const { return TIdInfo.get(); };
834933

934+
ArrayRef<CallsiteInfo> callsites() const {
935+
if (Callsites)
936+
return *Callsites;
937+
return {};
938+
}
939+
940+
ArrayRef<AllocInfo> allocs() const {
941+
if (Allocs)
942+
return *Allocs;
943+
return {};
944+
}
945+
835946
friend struct GraphTraits<ValueInfo>;
836947
};
837948

@@ -1163,6 +1274,16 @@ class ModuleSummaryIndex {
11631274
// the total number of basic blocks in the LTO unit in the combined index.
11641275
uint64_t BlockCount;
11651276

1277+
// List of unique stack ids (hashes). We use a 4B index of the id in the
1278+
// stack id lists on the alloc and callsite summaries for memory savings,
1279+
// since the number of unique ids is in practice much smaller than the
1280+
// number of stack id references in the summaries.
1281+
std::vector<uint64_t> StackIds;
1282+
1283+
// Temporary map while building StackIds list. Clear when index is completely
1284+
// built via releaseTemporaryMemory.
1285+
std::map<uint64_t, unsigned> StackIdToIndex;
1286+
11661287
// YAML I/O support.
11671288
friend yaml::MappingTraits<ModuleSummaryIndex>;
11681289

@@ -1205,6 +1326,31 @@ class ModuleSummaryIndex {
12051326
const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
12061327
size_t size() const { return GlobalValueMap.size(); }
12071328

1329+
const std::vector<uint64_t> &stackIds() const { return StackIds; }
1330+
1331+
unsigned addOrGetStackIdIndex(uint64_t StackId) {
1332+
auto Inserted = StackIdToIndex.insert({StackId, StackIds.size()});
1333+
if (Inserted.second)
1334+
StackIds.push_back(StackId);
1335+
return Inserted.first->second;
1336+
}
1337+
1338+
uint64_t getStackIdAtIndex(unsigned Index) const {
1339+
assert(StackIds.size() > Index);
1340+
return StackIds[Index];
1341+
}
1342+
1343+
// Facility to release memory from data structures only needed during index
1344+
// construction (including while building combined index). Currently this only
1345+
// releases the temporary map used while constructing a correspondence between
1346+
// stack ids and their index in the StackIds vector. Mostly impactful when
1347+
// building a large combined index.
1348+
void releaseTemporaryMemory() {
1349+
assert(StackIdToIndex.size() == StackIds.size());
1350+
StackIdToIndex.clear();
1351+
StackIds.shrink_to_fit();
1352+
}
1353+
12081354
/// Convenience function for doing a DFS on a ValueInfo. Marks the function in
12091355
/// the FunctionHasParent map.
12101356
static void discoverNodes(ValueInfo V,

llvm/include/llvm/IR/ModuleSummaryIndexYAML.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
234234
std::move(FSum.TypeCheckedLoadVCalls),
235235
std::move(FSum.TypeTestAssumeConstVCalls),
236236
std::move(FSum.TypeCheckedLoadConstVCalls),
237-
ArrayRef<FunctionSummary::ParamAccess>{}));
237+
ArrayRef<FunctionSummary::ParamAccess>{}, ArrayRef<CallsiteInfo>{},
238+
ArrayRef<AllocInfo>{}));
238239
}
239240
}
240241
static void output(IO &io, GlobalValueSummaryMapTy &V) {

0 commit comments

Comments
 (0)