Skip to content

Commit ba1bcba

Browse files
[Memprof] Adds the option to collect AccessCountHistograms for memprof.
Adds compile time flag -mllvm -memprof-histogram and runtime flag histogram=true|false to turn Histogram collection on and off. The -memprof-histogram flag relies on -memprof-use-callbacks=true to work. Updates shadow mapping logic in histogram mode from having one 8 byte counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only supports this granularity as of now. Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary format, including changing to a new version of the raw binary format from version 3 to version 4. Updates creating MemoryInfoBlocks with and without Histograms. When two MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter Histogram is removed. Adds a memprof_histogram test case. Initial commit for adding AccessCountHistograms up until RawProfile for memprof
1 parent 692ae54 commit ba1bcba

30 files changed

+1086
-239
lines changed

compiler-rt/include/profile/MIBEntryDef.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
5151
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
5252
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
5353
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
54+
MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
55+
MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)

compiler-rt/include/profile/MemProfData.inc

Lines changed: 137 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,27 @@
2222
#include <string.h>
2323

2424
#ifdef _MSC_VER
25-
#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
25+
#define PACKED(...) __pragma(pack(push, 1)) __VA_ARGS__ __pragma(pack(pop))
2626
#else
2727
#define PACKED(...) __VA_ARGS__ __attribute__((__packed__))
2828
#endif
2929

30-
// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
31-
#define MEMPROF_RAW_MAGIC_64 \
32-
((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \
33-
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
30+
// A 64-bit magic number to uniquely identify the raw binary memprof profile
31+
// file.
32+
#define MEMPROF_RAW_MAGIC_64 \
33+
((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | \
34+
(uint64_t)'r' << 32 | (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | \
35+
(uint64_t)'r' << 8 | (uint64_t)129)
3436

3537
// The version number of the raw binary format.
36-
#define MEMPROF_RAW_VERSION 3ULL
38+
#define MEMPROF_RAW_VERSION 4ULL
3739

3840
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
3941

4042
namespace llvm {
4143
namespace memprof {
42-
// A struct describing the header used for the raw binary memprof profile format.
44+
// A struct describing the header used for the raw binary memprof profile
45+
// format.
4346
PACKED(struct Header {
4447
uint64_t Magic;
4548
uint64_t Version;
@@ -62,15 +65,15 @@ PACKED(struct SegmentEntry {
6265
SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
6366
: Start(S), End(E), Offset(O), BuildIdSize(0) {}
6467

65-
SegmentEntry(const SegmentEntry& S) {
68+
SegmentEntry(const SegmentEntry &S) {
6669
Start = S.Start;
6770
End = S.End;
6871
Offset = S.Offset;
6972
BuildIdSize = S.BuildIdSize;
7073
memcpy(BuildId, S.BuildId, S.BuildIdSize);
7174
}
7275

73-
SegmentEntry& operator=(const SegmentEntry& S) {
76+
SegmentEntry &operator=(const SegmentEntry &S) {
7477
Start = S.Start;
7578
End = S.End;
7679
Offset = S.Offset;
@@ -79,7 +82,7 @@ PACKED(struct SegmentEntry {
7982
return *this;
8083
}
8184

82-
bool operator==(const SegmentEntry& S) const {
85+
bool operator==(const SegmentEntry &S) const {
8386
return Start == S.Start && End == S.End && Offset == S.Offset &&
8487
BuildIdSize == S.BuildIdSize &&
8588
memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
@@ -90,111 +93,143 @@ PACKED(struct SegmentEntry {
9093
// MemProfData.inc since it would mean we are embedding a directive (the
9194
// #include for MIBEntryDef) into the macros which is undefined behaviour.
9295
#ifdef _MSC_VER
93-
__pragma(pack(push,1))
96+
__pragma(pack(push, 1))
9497
#endif
9598

96-
// A struct representing the heap allocation characteristics of a particular
97-
// runtime context. This struct is shared between the compiler-rt runtime and
98-
// the raw profile reader. The indexed format uses a separate, self-describing
99-
// backwards compatible format.
100-
struct MemInfoBlock{
99+
// A struct representing the heap allocation characteristics of a particular
100+
// runtime context. This struct is shared between the compiler-rt runtime
101+
// and the raw profile reader. The indexed format uses a separate,
102+
// self-describing backwards compatible format.
103+
struct MemInfoBlock {
101104

102105
#define MIBEntryDef(NameTag, Name, Type) Type Name;
103106
#include "MIBEntryDef.inc"
104107
#undef MIBEntryDef
105108

106-
bool operator==(const MemInfoBlock& Other) const {
107-
bool IsEqual = true;
108-
#define MIBEntryDef(NameTag, Name, Type) \
109+
bool operator==(const MemInfoBlock &Other) const {
110+
bool IsEqual = true;
111+
#define MIBEntryDef(NameTag, Name, Type) \
109112
IsEqual = (IsEqual && Name == Other.Name);
110113
#include "MIBEntryDef.inc"
111114
#undef MIBEntryDef
112-
return IsEqual;
113-
}
115+
return IsEqual;
116+
}
114117

115-
MemInfoBlock() {
118+
MemInfoBlock() {
116119
#define MIBEntryDef(NameTag, Name, Type) Name = Type();
117120
#include "MIBEntryDef.inc"
118121
#undef MIBEntryDef
119-
}
120-
121-
MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
122-
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
123-
: MemInfoBlock() {
124-
AllocCount = 1U;
125-
TotalAccessCount = AccessCount;
126-
MinAccessCount = AccessCount;
127-
MaxAccessCount = AccessCount;
128-
TotalSize = Size;
129-
MinSize = Size;
130-
MaxSize = Size;
131-
AllocTimestamp = AllocTs;
132-
DeallocTimestamp = DeallocTs;
133-
TotalLifetime = DeallocTimestamp - AllocTimestamp;
134-
MinLifetime = TotalLifetime;
135-
MaxLifetime = TotalLifetime;
136-
// Access density is accesses per byte. Multiply by 100 to include the
137-
// fractional part.
138-
TotalAccessDensity = AccessCount * 100 / Size;
139-
MinAccessDensity = TotalAccessDensity;
140-
MaxAccessDensity = TotalAccessDensity;
141-
// Lifetime access density is the access density per second of lifetime.
142-
// Multiply by 1000 to convert denominator lifetime to seconds (using a
143-
// minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first
144-
// to reduce truncations to 0.
145-
TotalLifetimeAccessDensity =
146-
TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1);
147-
MinLifetimeAccessDensity = TotalLifetimeAccessDensity;
148-
MaxLifetimeAccessDensity = TotalLifetimeAccessDensity;
149-
AllocCpuId = AllocCpu;
150-
DeallocCpuId = DeallocCpu;
151-
NumMigratedCpu = AllocCpuId != DeallocCpuId;
152-
}
153-
154-
void Merge(const MemInfoBlock &newMIB) {
155-
AllocCount += newMIB.AllocCount;
156-
157-
TotalAccessCount += newMIB.TotalAccessCount;
158-
MinAccessCount = newMIB.MinAccessCount < MinAccessCount ? newMIB.MinAccessCount : MinAccessCount;
159-
MaxAccessCount = newMIB.MaxAccessCount > MaxAccessCount ? newMIB.MaxAccessCount : MaxAccessCount;
160-
161-
TotalSize += newMIB.TotalSize;
162-
MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
163-
MaxSize = newMIB.MaxSize > MaxSize ? newMIB.MaxSize : MaxSize;
164-
165-
TotalLifetime += newMIB.TotalLifetime;
166-
MinLifetime = newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
167-
MaxLifetime = newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
168-
169-
TotalAccessDensity += newMIB.TotalAccessDensity;
170-
MinAccessDensity = newMIB.MinAccessDensity < MinAccessDensity
171-
? newMIB.MinAccessDensity
172-
: MinAccessDensity;
173-
MaxAccessDensity = newMIB.MaxAccessDensity > MaxAccessDensity
174-
? newMIB.MaxAccessDensity
175-
: MaxAccessDensity;
176-
177-
TotalLifetimeAccessDensity += newMIB.TotalLifetimeAccessDensity;
178-
MinLifetimeAccessDensity =
179-
newMIB.MinLifetimeAccessDensity < MinLifetimeAccessDensity
180-
? newMIB.MinLifetimeAccessDensity
181-
: MinLifetimeAccessDensity;
182-
MaxLifetimeAccessDensity =
183-
newMIB.MaxLifetimeAccessDensity > MaxLifetimeAccessDensity
184-
? newMIB.MaxLifetimeAccessDensity
185-
: MaxLifetimeAccessDensity;
186-
187-
// We know newMIB was deallocated later, so just need to check if it was
188-
// allocated before last one deallocated.
189-
NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
190-
AllocTimestamp = newMIB.AllocTimestamp;
191-
DeallocTimestamp = newMIB.DeallocTimestamp;
192-
193-
NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
194-
NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
195-
AllocCpuId = newMIB.AllocCpuId;
196-
DeallocCpuId = newMIB.DeallocCpuId;
197-
}
122+
}
123+
124+
MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
125+
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
126+
uintptr_t Histogram, uint32_t HistogramSize)
127+
: MemInfoBlock() {
128+
AllocCount = 1U;
129+
TotalAccessCount = AccessCount;
130+
MinAccessCount = AccessCount;
131+
MaxAccessCount = AccessCount;
132+
TotalSize = Size;
133+
MinSize = Size;
134+
MaxSize = Size;
135+
AllocTimestamp = AllocTs;
136+
DeallocTimestamp = DeallocTs;
137+
TotalLifetime = DeallocTimestamp - AllocTimestamp;
138+
MinLifetime = TotalLifetime;
139+
MaxLifetime = TotalLifetime;
140+
// Access density is accesses per byte. Multiply by 100 to include the
141+
// fractional part.
142+
TotalAccessDensity = AccessCount * 100 / Size;
143+
MinAccessDensity = TotalAccessDensity;
144+
MaxAccessDensity = TotalAccessDensity;
145+
// Lifetime access density is the access density per second of lifetime.
146+
// Multiply by 1000 to convert denominator lifetime to seconds (using a
147+
// minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first
148+
// to reduce truncations to 0.
149+
TotalLifetimeAccessDensity =
150+
TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1);
151+
MinLifetimeAccessDensity = TotalLifetimeAccessDensity;
152+
MaxLifetimeAccessDensity = TotalLifetimeAccessDensity;
153+
AllocCpuId = AllocCpu;
154+
DeallocCpuId = DeallocCpu;
155+
NumMigratedCpu = AllocCpuId != DeallocCpuId;
156+
// For now we assume HistogramSize is the same as user requested size
157+
AccessHistogramSize = HistogramSize;
158+
AccessHistogram = Histogram;
159+
}
160+
161+
// Merge cannot free the AccessHistogram pointer, since we need to free either
162+
// with InternalFree or free depending on where the allocation is made
163+
// (runtime or profdata tool). The merge function expects the Histogram
164+
// pointer with the smaller size to be freed.
165+
void Merge(const MemInfoBlock &newMIB) {
166+
AllocCount += newMIB.AllocCount;
167+
168+
TotalAccessCount += newMIB.TotalAccessCount;
169+
MinAccessCount = newMIB.MinAccessCount < MinAccessCount
170+
? newMIB.MinAccessCount
171+
: MinAccessCount;
172+
MaxAccessCount = newMIB.MaxAccessCount > MaxAccessCount
173+
? newMIB.MaxAccessCount
174+
: MaxAccessCount;
175+
176+
TotalSize += newMIB.TotalSize;
177+
MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
178+
MaxSize = newMIB.MaxSize > MaxSize ? newMIB.MaxSize : MaxSize;
179+
180+
TotalLifetime += newMIB.TotalLifetime;
181+
MinLifetime =
182+
newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
183+
MaxLifetime =
184+
newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
185+
186+
TotalAccessDensity += newMIB.TotalAccessDensity;
187+
MinAccessDensity = newMIB.MinAccessDensity < MinAccessDensity
188+
? newMIB.MinAccessDensity
189+
: MinAccessDensity;
190+
MaxAccessDensity = newMIB.MaxAccessDensity > MaxAccessDensity
191+
? newMIB.MaxAccessDensity
192+
: MaxAccessDensity;
193+
194+
TotalLifetimeAccessDensity += newMIB.TotalLifetimeAccessDensity;
195+
MinLifetimeAccessDensity =
196+
newMIB.MinLifetimeAccessDensity < MinLifetimeAccessDensity
197+
? newMIB.MinLifetimeAccessDensity
198+
: MinLifetimeAccessDensity;
199+
MaxLifetimeAccessDensity =
200+
newMIB.MaxLifetimeAccessDensity > MaxLifetimeAccessDensity
201+
? newMIB.MaxLifetimeAccessDensity
202+
: MaxLifetimeAccessDensity;
203+
204+
// We know newMIB was deallocated later, so just need to check if it was
205+
// allocated before last one deallocated.
206+
NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
207+
AllocTimestamp = newMIB.AllocTimestamp;
208+
DeallocTimestamp = newMIB.DeallocTimestamp;
209+
210+
NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
211+
NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
212+
AllocCpuId = newMIB.AllocCpuId;
213+
DeallocCpuId = newMIB.DeallocCpuId;
214+
215+
// For merging histograms, we always keep the longer histogram, and add
216+
// values of shorter histogram to larger one.
217+
uintptr_t ShorterHistogram;
218+
uint32_t ShorterHistogramSize;
219+
if (newMIB.AccessHistogramSize > AccessHistogramSize) {
220+
ShorterHistogram = AccessHistogram;
221+
ShorterHistogramSize = AccessHistogramSize;
222+
// Swap histogram of current to larger histogram
223+
AccessHistogram = newMIB.AccessHistogram;
224+
AccessHistogramSize = newMIB.AccessHistogramSize;
225+
} else {
226+
ShorterHistogram = newMIB.AccessHistogram;
227+
ShorterHistogramSize = newMIB.AccessHistogramSize;
228+
}
229+
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
230+
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
231+
}
232+
}
198233

199234
#ifdef _MSC_VER
200235
} __pragma(pack(pop));
@@ -205,4 +240,4 @@ void Merge(const MemInfoBlock &newMIB) {
205240
} // namespace memprof
206241
} // namespace llvm
207242

208-
#endif
243+
#endif

0 commit comments

Comments
 (0)