19
19
#include " llvm/ADT/DenseMap.h"
20
20
#include " llvm/ADT/STLExtras.h"
21
21
#include " llvm/ADT/SmallString.h"
22
+ #include " llvm/ADT/SmallVector.h"
22
23
#include " llvm/ADT/StringExtras.h"
23
24
#include " llvm/ADT/StringMap.h"
24
25
#include " llvm/ADT/StringRef.h"
@@ -284,6 +285,79 @@ template <> struct DenseMapInfo<ValueInfo> {
284
285
static unsigned getHashValue (ValueInfo I) { return (uintptr_t )I.getRef (); }
285
286
};
286
287
288
+ // / Summary of memprof callsite metadata.
289
+ struct CallsiteInfo {
290
+ // Actual callee function.
291
+ ValueInfo Callee;
292
+
293
+ // Used to record whole program analysis cloning decisions.
294
+ // The ThinLTO backend will need to create as many clones as there are entries
295
+ // in the vector (it is expected and should be confirmed that all such
296
+ // summaries in the same FunctionSummary have the same number of entries).
297
+ // Each index records version info for the corresponding clone of this
298
+ // function. The value is the callee clone it calls (becomes the appended
299
+ // suffix id). Index 0 is the original version, and a value of 0 calls the
300
+ // original callee.
301
+ SmallVector<unsigned > Clones{0 };
302
+
303
+ // Represents stack ids in this context, recorded as indices into the
304
+ // StackIds vector in the summary index, which in turn holds the full 64-bit
305
+ // stack ids. This reduces memory as there are in practice far fewer unique
306
+ // stack ids than stack id references.
307
+ SmallVector<unsigned > StackIdIndices;
308
+
309
+ CallsiteInfo (ValueInfo Callee, SmallVector<unsigned > StackIdIndices)
310
+ : Callee(Callee), StackIdIndices(std::move(StackIdIndices)) {}
311
+ CallsiteInfo (ValueInfo Callee, SmallVector<unsigned > Clones,
312
+ SmallVector<unsigned > StackIdIndices)
313
+ : Callee(Callee), Clones(std::move(Clones)),
314
+ StackIdIndices (std::move(StackIdIndices)) {}
315
+ };
316
+
317
+ // Allocation type assigned to an allocation reached by a given context.
318
+ // More can be added but initially this is just noncold and cold.
319
+ // Values should be powers of two so that they can be ORed, in particular to
320
+ // track allocations that have different behavior with different calling
321
+ // contexts.
322
+ enum class AllocationType : uint8_t { None = 0 , NotCold = 1 , Cold = 2 };
323
+
324
+ // / Summary of a single MIB in a memprof metadata on allocations.
325
+ struct MIBInfo {
326
+ // The allocation type for this profiled context.
327
+ AllocationType AllocType;
328
+
329
+ // Represents stack ids in this context, recorded as indices into the
330
+ // StackIds vector in the summary index, which in turn holds the full 64-bit
331
+ // stack ids. This reduces memory as there are in practice far fewer unique
332
+ // stack ids than stack id references.
333
+ SmallVector<unsigned > StackIdIndices;
334
+
335
+ MIBInfo (AllocationType AllocType, SmallVector<unsigned > StackIdIndices)
336
+ : AllocType(AllocType), StackIdIndices(std::move(StackIdIndices)) {}
337
+ };
338
+
339
+ // / Summary of memprof metadata on allocations.
340
+ struct AllocInfo {
341
+ // Used to record whole program analysis cloning decisions.
342
+ // The ThinLTO backend will need to create as many clones as there are entries
343
+ // in the vector (it is expected and should be confirmed that all such
344
+ // summaries in the same FunctionSummary have the same number of entries).
345
+ // Each index records version info for the corresponding clone of this
346
+ // function. The value is the allocation type of the corresponding allocation.
347
+ // Index 0 is the original version. Before cloning, index 0 may have more than
348
+ // one allocation type.
349
+ SmallVector<uint8_t > Versions;
350
+
351
+ // Vector of MIBs in this memprof metadata.
352
+ std::vector<MIBInfo> MIBs;
353
+
354
+ AllocInfo (std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
355
+ Versions.push_back (0 );
356
+ }
357
+ AllocInfo (SmallVector<uint8_t > Versions, std::vector<MIBInfo> MIBs)
358
+ : Versions(std::move(Versions)), MIBs(std::move(MIBs)) {}
359
+ };
360
+
287
361
// / Function and variable summary information to aid decisions and
288
362
// / implementation of importing.
289
363
class GlobalValueSummary {
@@ -678,7 +752,8 @@ class FunctionSummary : public GlobalValueSummary {
678
752
std::vector<FunctionSummary::VFuncId>(),
679
753
std::vector<FunctionSummary::ConstVCall>(),
680
754
std::vector<FunctionSummary::ConstVCall>(),
681
- std::vector<FunctionSummary::ParamAccess>());
755
+ std::vector<FunctionSummary::ParamAccess>(),
756
+ std::vector<CallsiteInfo>(), std::vector<AllocInfo>());
682
757
}
683
758
684
759
// / A dummy node to reference external functions that aren't in the index
@@ -706,6 +781,25 @@ class FunctionSummary : public GlobalValueSummary {
706
781
using ParamAccessesTy = std::vector<ParamAccess>;
707
782
std::unique_ptr<ParamAccessesTy> ParamAccesses;
708
783
784
+ // / Optional list of memprof callsite metadata summaries. The correspondence
785
+ // / between the callsite summary and the callsites in the function is implied
786
+ // / by the order in the vector (and can be validated by comparing the stack
787
+ // / ids in the CallsiteInfo to those in the instruction callsite metadata).
788
+ // / As a memory savings optimization, we only create these for the prevailing
789
+ // / copy of a symbol when creating the combined index during LTO.
790
+ using CallsitesTy = std::vector<CallsiteInfo>;
791
+ std::unique_ptr<CallsitesTy> Callsites;
792
+
793
+ // / Optional list of allocation memprof metadata summaries. The correspondence
794
+ // / between the alloc memprof summary and the allocation callsites in the
795
+ // / function is implied by the order in the vector (and can be validated by
796
+ // / comparing the stack ids in the AllocInfo to those in the instruction
797
+ // / memprof metadata).
798
+ // / As a memory savings optimization, we only create these for the prevailing
799
+ // / copy of a symbol when creating the combined index during LTO.
800
+ using AllocsTy = std::vector<AllocInfo>;
801
+ std::unique_ptr<AllocsTy> Allocs;
802
+
709
803
public:
710
804
FunctionSummary (GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
711
805
uint64_t EntryCount, std::vector<ValueInfo> Refs,
@@ -715,7 +809,8 @@ class FunctionSummary : public GlobalValueSummary {
715
809
std::vector<VFuncId> TypeCheckedLoadVCalls,
716
810
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
717
811
std::vector<ConstVCall> TypeCheckedLoadConstVCalls,
718
- std::vector<ParamAccess> Params)
812
+ std::vector<ParamAccess> Params, CallsitesTy CallsiteList,
813
+ AllocsTy AllocList)
719
814
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
720
815
InstCount (NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
721
816
CallGraphEdgeList(std::move(CGEdges)) {
@@ -729,6 +824,10 @@ class FunctionSummary : public GlobalValueSummary {
729
824
std::move (TypeCheckedLoadConstVCalls)});
730
825
if (!Params.empty ())
731
826
ParamAccesses = std::make_unique<ParamAccessesTy>(std::move (Params));
827
+ if (!CallsiteList.empty ())
828
+ Callsites = std::make_unique<CallsitesTy>(std::move (CallsiteList));
829
+ if (!AllocList.empty ())
830
+ Allocs = std::make_unique<AllocsTy>(std::move (AllocList));
732
831
}
733
832
// Gets the number of readonly and writeonly refs in RefEdgeList
734
833
std::pair<unsigned , unsigned > specialRefCounts () const ;
@@ -832,6 +931,18 @@ class FunctionSummary : public GlobalValueSummary {
832
931
833
932
const TypeIdInfo *getTypeIdInfo () const { return TIdInfo.get (); };
834
933
934
+ ArrayRef<CallsiteInfo> callsites () const {
935
+ if (Callsites)
936
+ return *Callsites;
937
+ return {};
938
+ }
939
+
940
+ ArrayRef<AllocInfo> allocs () const {
941
+ if (Allocs)
942
+ return *Allocs;
943
+ return {};
944
+ }
945
+
835
946
friend struct GraphTraits <ValueInfo>;
836
947
};
837
948
@@ -1163,6 +1274,16 @@ class ModuleSummaryIndex {
1163
1274
// the total number of basic blocks in the LTO unit in the combined index.
1164
1275
uint64_t BlockCount;
1165
1276
1277
+ // List of unique stack ids (hashes). We use a 4B index of the id in the
1278
+ // stack id lists on the alloc and callsite summaries for memory savings,
1279
+ // since the number of unique ids is in practice much smaller than the
1280
+ // number of stack id references in the summaries.
1281
+ std::vector<uint64_t > StackIds;
1282
+
1283
+ // Temporary map while building StackIds list. Clear when index is completely
1284
+ // built via releaseTemporaryMemory.
1285
+ std::map<uint64_t , unsigned > StackIdToIndex;
1286
+
1166
1287
// YAML I/O support.
1167
1288
friend yaml::MappingTraits<ModuleSummaryIndex>;
1168
1289
@@ -1205,6 +1326,31 @@ class ModuleSummaryIndex {
1205
1326
const_gvsummary_iterator end () const { return GlobalValueMap.end (); }
1206
1327
size_t size () const { return GlobalValueMap.size (); }
1207
1328
1329
+ const std::vector<uint64_t > &stackIds () const { return StackIds; }
1330
+
1331
+ unsigned addOrGetStackIdIndex (uint64_t StackId) {
1332
+ auto Inserted = StackIdToIndex.insert ({StackId, StackIds.size ()});
1333
+ if (Inserted.second )
1334
+ StackIds.push_back (StackId);
1335
+ return Inserted.first ->second ;
1336
+ }
1337
+
1338
+ uint64_t getStackIdAtIndex (unsigned Index) const {
1339
+ assert (StackIds.size () > Index);
1340
+ return StackIds[Index];
1341
+ }
1342
+
1343
+ // Facility to release memory from data structures only needed during index
1344
+ // construction (including while building combined index). Currently this only
1345
+ // releases the temporary map used while constructing a correspondence between
1346
+ // stack ids and their index in the StackIds vector. Mostly impactful when
1347
+ // building a large combined index.
1348
+ void releaseTemporaryMemory () {
1349
+ assert (StackIdToIndex.size () == StackIds.size ());
1350
+ StackIdToIndex.clear ();
1351
+ StackIds.shrink_to_fit ();
1352
+ }
1353
+
1208
1354
// / Convenience function for doing a DFS on a ValueInfo. Marks the function in
1209
1355
// / the FunctionHasParent map.
1210
1356
static void discoverNodes (ValueInfo V,
0 commit comments