Skip to content

Commit 98ed423

Browse files
committed
Restore "[MemProf] ThinLTO summary support" with fixes
This restores 4745945, which was reverted in commit 452a14e, along with fixes for a couple of bot failures.
1 parent 6747298 commit 98ed423

21 files changed

+1304
-48
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,12 @@
1717
#include "llvm/IR/InstrTypes.h"
1818
#include "llvm/IR/Metadata.h"
1919
#include "llvm/IR/Module.h"
20+
#include "llvm/IR/ModuleSummaryIndex.h"
2021
#include <map>
2122

2223
namespace llvm {
2324
namespace memprof {
2425

25-
// Allocation type assigned to an allocation reached by a given context.
26-
// More can be added but initially this is just noncold and cold.
27-
// Values should be powers of two so that they can be ORed, in particular to
28-
// track allocations that have different behavior with different calling
29-
// contexts.
30-
enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
31-
3226
/// Return the allocation type for a given set of memory profile values.
3327
AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize,
3428
uint64_t MinLifetime);
@@ -106,6 +100,62 @@ class CallStackTrie {
106100
bool buildAndAttachMIBMetadata(CallBase *CI);
107101
};
108102

103+
/// Helper class to iterate through stack ids in both metadata (memprof MIB and
104+
/// callsite) and the corresponding ThinLTO summary data structures
105+
/// (CallsiteInfo and MIBInfo). This simplifies implementation of client code
106+
/// which doesn't need to worry about whether we are operating with IR (Regular
107+
/// LTO), or summary (ThinLTO).
108+
template <class NodeT, class IteratorT> class CallStack {
109+
public:
110+
CallStack(const NodeT *N = nullptr) : N(N) {}
111+
112+
// Implement minimum required methods for range-based for loop.
113+
// The default implementation assumes we are operating on ThinLTO data
114+
// structures, which have a vector of StackIdIndices. There are specialized
115+
// versions provided to iterate through metadata.
116+
struct CallStackIterator {
117+
const NodeT *N = nullptr;
118+
IteratorT Iter;
119+
CallStackIterator(const NodeT *N, bool End) : N(N) {
120+
if (!N)
121+
return;
122+
Iter = End ? N->StackIdIndices.end() : N->StackIdIndices.begin();
123+
}
124+
uint64_t operator*() {
125+
assert(Iter != N->StackIdIndices.end());
126+
return *Iter;
127+
}
128+
bool operator==(const CallStackIterator &rhs) { return Iter == rhs.Iter; }
129+
bool operator!=(const CallStackIterator &rhs) { return !(*this == rhs); }
130+
void operator++() { ++Iter; }
131+
};
132+
133+
bool empty() const { return N == nullptr; }
134+
135+
CallStackIterator begin() const {
136+
return CallStackIterator(N, /*End*/ false);
137+
}
138+
CallStackIterator end() const { return CallStackIterator(N, /*End*/ true); }
139+
140+
CallStackIterator beginAfterSharedPrefix(CallStack &Other) {
141+
CallStackIterator Cur = begin();
142+
for (CallStackIterator OtherCur = Other.begin();
143+
Cur != end() && OtherCur != Other.end(); ++Cur, ++OtherCur)
144+
assert(*Cur == *OtherCur);
145+
return Cur;
146+
}
147+
148+
private:
149+
const NodeT *N = nullptr;
150+
};
151+
152+
/// Specializations for iterating through IR metadata stack contexts.
153+
template <>
154+
CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
155+
const MDNode *N, bool End);
156+
template <>
157+
uint64_t CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*();
158+
109159
} // end namespace memprof
110160
} // end namespace llvm
111161

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,10 @@ namespace llvm {
406406
void addGlobalValueToIndex(std::string Name, GlobalValue::GUID,
407407
GlobalValue::LinkageTypes Linkage, unsigned ID,
408408
std::unique_ptr<GlobalValueSummary> Summary);
409+
bool parseOptionalAllocs(std::vector<AllocInfo> &Allocs);
410+
bool parseMemProfs(std::vector<MIBInfo> &MIBs);
411+
bool parseAllocType(uint8_t &AllocType);
412+
bool parseOptionalCallsites(std::vector<CallsiteInfo> &Callsites);
409413

410414
// Type Parsing.
411415
bool parseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);

llvm/include/llvm/AsmParser/LLToken.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,15 @@ enum Kind {
406406
kw_byte,
407407
kw_bit,
408408
kw_varFlags,
409+
// The following are used by MemProf summary info.
410+
kw_callsites,
411+
kw_clones,
412+
kw_stackIds,
413+
kw_allocs,
414+
kw_versions,
415+
kw_memProf,
416+
kw_notcold,
417+
kw_notcoldandcold,
409418

410419
// GV's with __attribute__((no_sanitize("address"))), or things in
411420
// -fsanitize-ignorelist when built with ASan.

llvm/include/llvm/AsmParser/Parser.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,17 @@ ParsedModuleAndIndex parseAssemblyFileWithIndexNoUpgradeDebugInfo(
105105
std::unique_ptr<ModuleSummaryIndex>
106106
parseSummaryIndexAssemblyFile(StringRef Filename, SMDiagnostic &Err);
107107

108+
/// The function is a secondary interface to the LLVM Assembly Parser. It parses
109+
/// an ASCII string that (presumably) contains LLVM Assembly code for a module
110+
/// summary. It returns a a ModuleSummaryIndex with the corresponding features.
111+
/// Note that this does not verify that the generated Index is valid, so you
112+
/// should run the verifier after parsing the file to check that it is okay.
113+
/// Parse LLVM Assembly from a string
114+
/// \param AsmString The string containing assembly
115+
/// \param Err Error result info.
116+
std::unique_ptr<ModuleSummaryIndex>
117+
parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
118+
108119
/// parseAssemblyFile and parseAssemblyString are wrappers around this function.
109120
/// Parse LLVM Assembly from a MemoryBuffer.
110121
/// \param F The MemoryBuffer containing assembly

llvm/include/llvm/Bitcode/BitcodeReader.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/ArrayRef.h"
1717
#include "llvm/ADT/StringRef.h"
1818
#include "llvm/Bitstream/BitCodeEnums.h"
19+
#include "llvm/IR/GlobalValue.h"
1920
#include "llvm/Support/Endian.h"
2021
#include "llvm/Support/Error.h"
2122
#include "llvm/Support/ErrorOr.h"
@@ -117,8 +118,10 @@ typedef llvm::function_ref<Optional<std::string>(StringRef)>
117118

118119
/// Parse the specified bitcode buffer and merge its module summary index
119120
/// into CombinedIndex.
120-
Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
121-
uint64_t ModuleId);
121+
Error
122+
readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
123+
uint64_t ModuleId,
124+
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
122125
};
123126

124127
struct BitcodeFileContents {

llvm/include/llvm/Bitcode/LLVMBitCodes.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,22 @@ enum GlobalValueSummarySymtabCodes {
301301
// Range information for accessed offsets for every argument.
302302
// [n x (paramno, range, numcalls, numcalls x (callee_guid, paramno, range))]
303303
FS_PARAM_ACCESS = 25,
304+
// Summary of per-module memprof callsite metadata.
305+
// [valueid, n x stackidindex]
306+
FS_PERMODULE_CALLSITE_INFO = 26,
307+
// Summary of per-module allocation memprof metadata.
308+
// [n x (alloc type, nummib, nummib x stackidindex)]
309+
FS_PERMODULE_ALLOC_INFO = 27,
310+
// Summary of combined index memprof callsite metadata.
311+
// [valueid, numstackindices, numver,
312+
// numstackindices x stackidindex, numver x version]
313+
FS_COMBINED_CALLSITE_INFO = 28,
314+
// Summary of combined index allocation memprof metadata.
315+
// [nummib, numver,
316+
// nummib x (alloc type, numstackids, numstackids x stackidindex),
317+
// numver x version]
318+
FS_COMBINED_ALLOC_INFO = 29,
319+
FS_STACK_IDS = 30,
304320
};
305321

306322
enum MetadataCodes {

llvm/include/llvm/IR/ModuleSummaryIndex.h

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/DenseMap.h"
2020
#include "llvm/ADT/STLExtras.h"
2121
#include "llvm/ADT/SmallString.h"
22+
#include "llvm/ADT/SmallVector.h"
2223
#include "llvm/ADT/StringExtras.h"
2324
#include "llvm/ADT/StringMap.h"
2425
#include "llvm/ADT/StringRef.h"
@@ -284,6 +285,79 @@ template <> struct DenseMapInfo<ValueInfo> {
284285
static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
285286
};
286287

288+
/// Summary of memprof callsite metadata.
289+
struct CallsiteInfo {
290+
// Actual callee function.
291+
ValueInfo Callee;
292+
293+
// Used to record whole program analysis cloning decisions.
294+
// The ThinLTO backend will need to create as many clones as there are entries
295+
// in the vector (it is expected and should be confirmed that all such
296+
// summaries in the same FunctionSummary have the same number of entries).
297+
// Each index records version info for the corresponding clone of this
298+
// function. The value is the callee clone it calls (becomes the appended
299+
// suffix id). Index 0 is the original version, and a value of 0 calls the
300+
// original callee.
301+
SmallVector<unsigned> Clones{0};
302+
303+
// Represents stack ids in this context, recorded as indices into the
304+
// StackIds vector in the summary index, which in turn holds the full 64-bit
305+
// stack ids. This reduces memory as there are in practice far fewer unique
306+
// stack ids than stack id references.
307+
SmallVector<unsigned> StackIdIndices;
308+
309+
CallsiteInfo(ValueInfo Callee, SmallVector<unsigned> StackIdIndices)
310+
: Callee(Callee), StackIdIndices(std::move(StackIdIndices)) {}
311+
CallsiteInfo(ValueInfo Callee, SmallVector<unsigned> Clones,
312+
SmallVector<unsigned> StackIdIndices)
313+
: Callee(Callee), Clones(std::move(Clones)),
314+
StackIdIndices(std::move(StackIdIndices)) {}
315+
};
316+
317+
// Allocation type assigned to an allocation reached by a given context.
318+
// More can be added but initially this is just noncold and cold.
319+
// Values should be powers of two so that they can be ORed, in particular to
320+
// track allocations that have different behavior with different calling
321+
// contexts.
322+
enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
323+
324+
/// Summary of a single MIB in a memprof metadata on allocations.
325+
struct MIBInfo {
326+
// The allocation type for this profiled context.
327+
AllocationType AllocType;
328+
329+
// Represents stack ids in this context, recorded as indices into the
330+
// StackIds vector in the summary index, which in turn holds the full 64-bit
331+
// stack ids. This reduces memory as there are in practice far fewer unique
332+
// stack ids than stack id references.
333+
SmallVector<unsigned> StackIdIndices;
334+
335+
MIBInfo(AllocationType AllocType, SmallVector<unsigned> StackIdIndices)
336+
: AllocType(AllocType), StackIdIndices(std::move(StackIdIndices)) {}
337+
};
338+
339+
/// Summary of memprof metadata on allocations.
340+
struct AllocInfo {
341+
// Used to record whole program analysis cloning decisions.
342+
// The ThinLTO backend will need to create as many clones as there are entries
343+
// in the vector (it is expected and should be confirmed that all such
344+
// summaries in the same FunctionSummary have the same number of entries).
345+
// Each index records version info for the corresponding clone of this
346+
// function. The value is the allocation type of the corresponding allocation.
347+
// Index 0 is the original version. Before cloning, index 0 may have more than
348+
// one allocation type.
349+
SmallVector<uint8_t> Versions;
350+
351+
// Vector of MIBs in this memprof metadata.
352+
std::vector<MIBInfo> MIBs;
353+
354+
AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
355+
Versions.push_back(0);
356+
}
357+
AllocInfo(SmallVector<uint8_t> Versions, std::vector<MIBInfo> MIBs)
358+
: Versions(std::move(Versions)), MIBs(std::move(MIBs)) {}
359+
};
360+
287361
/// Function and variable summary information to aid decisions and
288362
/// implementation of importing.
289363
class GlobalValueSummary {
@@ -678,7 +752,8 @@ class FunctionSummary : public GlobalValueSummary {
678752
std::vector<FunctionSummary::VFuncId>(),
679753
std::vector<FunctionSummary::ConstVCall>(),
680754
std::vector<FunctionSummary::ConstVCall>(),
681-
std::vector<FunctionSummary::ParamAccess>());
755+
std::vector<FunctionSummary::ParamAccess>(),
756+
std::vector<CallsiteInfo>(), std::vector<AllocInfo>());
682757
}
683758

684759
/// A dummy node to reference external functions that aren't in the index
@@ -706,6 +781,25 @@ class FunctionSummary : public GlobalValueSummary {
706781
using ParamAccessesTy = std::vector<ParamAccess>;
707782
std::unique_ptr<ParamAccessesTy> ParamAccesses;
708783

784+
/// Optional list of memprof callsite metadata summaries. The correspondence
785+
/// between the callsite summary and the callsites in the function is implied
786+
/// by the order in the vector (and can be validated by comparing the stack
787+
/// ids in the CallsiteInfo to those in the instruction callsite metadata).
788+
/// As a memory savings optimization, we only create these for the prevailing
789+
/// copy of a symbol when creating the combined index during LTO.
790+
using CallsitesTy = std::vector<CallsiteInfo>;
791+
std::unique_ptr<CallsitesTy> Callsites;
792+
793+
/// Optional list of allocation memprof metadata summaries. The correspondence
794+
/// between the alloc memprof summary and the allocation callsites in the
795+
/// function is implied by the order in the vector (and can be validated by
796+
/// comparing the stack ids in the AllocInfo to those in the instruction
797+
/// memprof metadata).
798+
/// As a memory savings optimization, we only create these for the prevailing
799+
/// copy of a symbol when creating the combined index during LTO.
800+
using AllocsTy = std::vector<AllocInfo>;
801+
std::unique_ptr<AllocsTy> Allocs;
802+
709803
public:
710804
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
711805
uint64_t EntryCount, std::vector<ValueInfo> Refs,
@@ -715,7 +809,8 @@ class FunctionSummary : public GlobalValueSummary {
715809
std::vector<VFuncId> TypeCheckedLoadVCalls,
716810
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
717811
std::vector<ConstVCall> TypeCheckedLoadConstVCalls,
718-
std::vector<ParamAccess> Params)
812+
std::vector<ParamAccess> Params, CallsitesTy CallsiteList,
813+
AllocsTy AllocList)
719814
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
720815
InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
721816
CallGraphEdgeList(std::move(CGEdges)) {
@@ -729,6 +824,10 @@ class FunctionSummary : public GlobalValueSummary {
729824
std::move(TypeCheckedLoadConstVCalls)});
730825
if (!Params.empty())
731826
ParamAccesses = std::make_unique<ParamAccessesTy>(std::move(Params));
827+
if (!CallsiteList.empty())
828+
Callsites = std::make_unique<CallsitesTy>(std::move(CallsiteList));
829+
if (!AllocList.empty())
830+
Allocs = std::make_unique<AllocsTy>(std::move(AllocList));
732831
}
733832
// Gets the number of readonly and writeonly refs in RefEdgeList
734833
std::pair<unsigned, unsigned> specialRefCounts() const;
@@ -832,6 +931,18 @@ class FunctionSummary : public GlobalValueSummary {
832931

833932
const TypeIdInfo *getTypeIdInfo() const { return TIdInfo.get(); };
834933

934+
ArrayRef<CallsiteInfo> callsites() const {
935+
if (Callsites)
936+
return *Callsites;
937+
return {};
938+
}
939+
940+
ArrayRef<AllocInfo> allocs() const {
941+
if (Allocs)
942+
return *Allocs;
943+
return {};
944+
}
945+
835946
friend struct GraphTraits<ValueInfo>;
836947
};
837948

@@ -1163,6 +1274,16 @@ class ModuleSummaryIndex {
11631274
// the total number of basic blocks in the LTO unit in the combined index.
11641275
uint64_t BlockCount;
11651276

1277+
// List of unique stack ids (hashes). We use a 4B index of the id in the
1278+
// stack id lists on the alloc and callsite summaries for memory savings,
1279+
// since the number of unique ids is in practice much smaller than the
1280+
// number of stack id references in the summaries.
1281+
std::vector<uint64_t> StackIds;
1282+
1283+
// Temporary map while building StackIds list. Clear when index is completely
1284+
// built via releaseTemporaryMemory.
1285+
std::map<uint64_t, unsigned> StackIdToIndex;
1286+
11661287
// YAML I/O support.
11671288
friend yaml::MappingTraits<ModuleSummaryIndex>;
11681289

@@ -1205,6 +1326,31 @@ class ModuleSummaryIndex {
12051326
const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
12061327
size_t size() const { return GlobalValueMap.size(); }
12071328

1329+
const std::vector<uint64_t> &stackIds() const { return StackIds; }
1330+
1331+
unsigned addOrGetStackIdIndex(uint64_t StackId) {
1332+
auto Inserted = StackIdToIndex.insert({StackId, StackIds.size()});
1333+
if (Inserted.second)
1334+
StackIds.push_back(StackId);
1335+
return Inserted.first->second;
1336+
}
1337+
1338+
uint64_t getStackIdAtIndex(unsigned Index) const {
1339+
assert(StackIds.size() > Index);
1340+
return StackIds[Index];
1341+
}
1342+
1343+
// Facility to release memory from data structures only needed during index
1344+
// construction (including while building combined index). Currently this only
1345+
// releases the temporary map used while constructing a correspondence between
1346+
// stack ids and their index in the StackIds vector. Mostly impactful when
1347+
// building a large combined index.
1348+
void releaseTemporaryMemory() {
1349+
assert(StackIdToIndex.size() == StackIds.size());
1350+
StackIdToIndex.clear();
1351+
StackIds.shrink_to_fit();
1352+
}
1353+
12081354
/// Convenience function for doing a DFS on a ValueInfo. Marks the function in
12091355
/// the FunctionHasParent map.
12101356
static void discoverNodes(ValueInfo V,

llvm/include/llvm/IR/ModuleSummaryIndexYAML.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
234234
std::move(FSum.TypeCheckedLoadVCalls),
235235
std::move(FSum.TypeTestAssumeConstVCalls),
236236
std::move(FSum.TypeCheckedLoadConstVCalls),
237-
ArrayRef<FunctionSummary::ParamAccess>{}));
237+
ArrayRef<FunctionSummary::ParamAccess>{}, ArrayRef<CallsiteInfo>{},
238+
ArrayRef<AllocInfo>{}));
238239
}
239240
}
240241
static void output(IO &io, GlobalValueSummaryMapTy &V) {

0 commit comments

Comments
 (0)