Skip to content

Commit 11811a4

Browse files
[Memprof] Adds the option to collect AccessCountHistograms for memprof.
Adds compile time flag -mllvm -memprof-histogram to turn histogram collection on and off. The -memprof-histogram flag relies on -memprof-use-callbacks=true to work. The flag also sets a flag in the IR, that is used by the runtime to check if histogram mode is enabled. When collecting histograms, shadow mapping logic is updated from having one 8 byte counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only supports this granularity as of now. Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary format, including changing to a new version of the raw binary format from version 3 to version 4. The current MemprofReader is compatible and can read memprofraw version 3. Version 3 can no longer be produced. Add a test case memprofV3 to make sure RawMemprofReader remains backward compatible. Updates creating MemoryInfoBlocks with and without Histograms. When two MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter Histogram is removed. Adds a memprof_histogram test case. Initial commit for adding AccessCountHistograms up until RawProfile for memprof.
1 parent 0c97ac0 commit 11811a4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1097
-75
lines changed

compiler-rt/include/profile/MIBEntryDef.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
5151
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
5252
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
5353
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
54+
MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
55+
MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)

compiler-rt/include/profile/MemProfData.inc

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@
3333
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
3434

3535
// The version number of the raw binary format.
36-
#define MEMPROF_RAW_VERSION 3ULL
36+
#define MEMPROF_RAW_VERSION 4ULL
37+
38+
// Currently supported versions.
39+
#define MEMPROF_RAW_SUPPORTED_VERSIONS \
40+
{ 3ULL, 4ULL }
3741

3842
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
3943

@@ -119,7 +123,8 @@ MemInfoBlock() {
119123
}
120124

121125
MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
122-
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
126+
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
127+
uintptr_t Histogram, uint32_t HistogramSize)
123128
: MemInfoBlock() {
124129
AllocCount = 1U;
125130
TotalAccessCount = AccessCount;
@@ -149,6 +154,8 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
149154
AllocCpuId = AllocCpu;
150155
DeallocCpuId = DeallocCpu;
151156
NumMigratedCpu = AllocCpuId != DeallocCpuId;
157+
AccessHistogramSize = HistogramSize;
158+
AccessHistogram = Histogram;
152159
}
153160

154161
void Merge(const MemInfoBlock &newMIB) {
@@ -194,6 +201,24 @@ void Merge(const MemInfoBlock &newMIB) {
194201
NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
195202
AllocCpuId = newMIB.AllocCpuId;
196203
DeallocCpuId = newMIB.DeallocCpuId;
204+
205+
// For merging histograms, we always keep the longer histogram, and add
206+
// values of shorter histogram to larger one.
207+
uintptr_t ShorterHistogram;
208+
uint32_t ShorterHistogramSize;
209+
if (newMIB.AccessHistogramSize > AccessHistogramSize) {
210+
ShorterHistogram = AccessHistogram;
211+
ShorterHistogramSize = AccessHistogramSize;
212+
// Swap histogram of current to larger histogram
213+
AccessHistogram = newMIB.AccessHistogram;
214+
AccessHistogramSize = newMIB.AccessHistogramSize;
215+
} else {
216+
ShorterHistogram = newMIB.AccessHistogram;
217+
ShorterHistogramSize = newMIB.AccessHistogramSize;
218+
}
219+
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
220+
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
221+
}
197222
}
198223

199224
#ifdef _MSC_VER

compiler-rt/lib/memprof/memprof_allocator.cpp

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
#include <sched.h>
3535
#include <time.h>
3636

37+
#define MAX_HISTOGRAM_PRINT_SIZE 32U
38+
39+
extern bool __memprof_histogram;
40+
3741
namespace __memprof {
3842
namespace {
3943
using ::llvm::memprof::MemInfoBlock;
@@ -68,6 +72,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
6872
"cpu: %u, num same dealloc_cpu: %u\n",
6973
M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
7074
M.NumSameDeallocCpu);
75+
Printf("AccessCountHistogram[%u]: ", M.AccessHistogramSize);
76+
uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
77+
? MAX_HISTOGRAM_PRINT_SIZE
78+
: M.AccessHistogramSize;
79+
for (size_t i = 0; i < PrintSize; ++i) {
80+
Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
81+
}
82+
Printf("\n");
7183
}
7284
}
7385
} // namespace
@@ -216,15 +228,34 @@ u64 GetShadowCount(uptr p, u32 size) {
216228
return count;
217229
}
218230

231+
// Accumulates the access count from the shadow for the given pointer and size.
232+
// See memprof_mapping.h for an overview on histogram counters.
233+
u64 GetShadowCountHistogram(uptr p, u32 size) {
234+
u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
235+
u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size);
236+
u64 count = 0;
237+
for (; shadow <= shadow_end; shadow++)
238+
count += *shadow;
239+
return count;
240+
}
241+
219242
// Clears the shadow counters (when memory is allocated).
220243
void ClearShadow(uptr addr, uptr size) {
221244
CHECK(AddrIsAlignedByGranularity(addr));
222245
CHECK(AddrIsInMem(addr));
223246
CHECK(AddrIsAlignedByGranularity(addr + size));
224247
CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
225248
CHECK(REAL(memset));
226-
uptr shadow_beg = MEM_TO_SHADOW(addr);
227-
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
249+
uptr shadow_beg;
250+
uptr shadow_end;
251+
if (__memprof_histogram) {
252+
shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr);
253+
shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size);
254+
} else {
255+
shadow_beg = MEM_TO_SHADOW(addr);
256+
shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
257+
}
258+
228259
if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
229260
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
230261
} else {
@@ -279,6 +310,44 @@ struct Allocator {
279310
Print(Value->mib, Key, bool(Arg));
280311
}
281312

313+
// See memprof_mapping.h for an overview on histogram counters.
314+
static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
315+
if (__memprof_histogram) {
316+
return CreateNewMIBWithHistogram(p, m, user_size);
317+
} else {
318+
return CreateNewMIBWithoutHistogram(p, m, user_size);
319+
}
320+
}
321+
322+
static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
323+
u64 user_size) {
324+
325+
u64 c = GetShadowCountHistogram(p, user_size);
326+
long curtime = GetTimestamp();
327+
uint32_t HistogramSize =
328+
RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
329+
uintptr_t Histogram =
330+
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
331+
memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
332+
for (size_t i = 0; i < HistogramSize; ++i) {
333+
u8 Counter =
334+
*((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
335+
((uint64_t *)Histogram)[i] = (uint64_t)Counter;
336+
}
337+
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
338+
GetCpuId(), Histogram, HistogramSize);
339+
return newMIB;
340+
}
341+
342+
static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
343+
u64 user_size) {
344+
u64 c = GetShadowCount(p, user_size);
345+
long curtime = GetTimestamp();
346+
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
347+
GetCpuId(), 0, 0);
348+
return newMIB;
349+
}
350+
282351
void FinishAndWrite() {
283352
if (print_text && common_flags()->print_module_map)
284353
DumpProcessMap();
@@ -319,10 +388,7 @@ struct Allocator {
319388
if (!m)
320389
return;
321390
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
322-
u64 c = GetShadowCount(user_beg, user_requested_size);
323-
long curtime = GetTimestamp();
324-
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
325-
m->cpu_id, GetCpuId());
391+
MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
326392
InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
327393
},
328394
this);
@@ -451,11 +517,7 @@ struct Allocator {
451517
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
452518
if (memprof_inited && atomic_load_relaxed(&constructed) &&
453519
!atomic_load_relaxed(&destructing)) {
454-
u64 c = GetShadowCount(p, user_requested_size);
455-
long curtime = GetTimestamp();
456-
457-
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
458-
m->cpu_id, GetCpuId());
520+
MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
459521
InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
460522
}
461523

compiler-rt/lib/memprof/memprof_flags.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
3838
MEMPROF_FLAG(bool, print_text, false,
3939
"If set, prints the heap profile in text format. Else use the raw binary serialization format.")
4040
MEMPROF_FLAG(bool, print_terse, false,
41-
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")
41+
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")

compiler-rt/lib/memprof/memprof_mapping.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ static const u64 kDefaultShadowScale = 3;
2222

2323
#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
2424
#define MEMPROF_ALIGNMENT 32
25-
2625
namespace __memprof {
2726

2827
extern uptr kHighMemEnd; // Initialized in __memprof_init.
@@ -37,6 +36,34 @@ extern uptr kHighMemEnd; // Initialized in __memprof_init.
3736
#define MEM_TO_SHADOW(mem) \
3837
((((mem) & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
3938

39+
// Histogram shadow memory is laid different to the standard configuration:
40+
41+
// 8 bytes
42+
// +---+---+---+ +---+---+---+ +---+---+---+
43+
// Memory | a | | b | | c |
44+
// +---+---+---+ +---+---+---+ +---+---+---+
45+
46+
// +---+ +---+ +---+
47+
// Shadow | a | | b | | c |
48+
// +---+ +---+ +---+
49+
// 1 byte
50+
//
51+
// Where we have a 1 byte counter for each 8 bytes. HISTOGRAM_MEM_TO_SHADOW
52+
// translates a memory address to the address of its corresponding shadow
53+
// counter memory address. The same data is still provided in MIB whether
54+
// histograms are used or not. Total access counts per allocations are
55+
// computed by summing up all individual 1 byte counters. This can incur an
56+
// accuracy penalty.
57+
58+
#define HISTOGRAM_GRANULARITY 8U
59+
60+
#define HISTOGRAM_MAX_COUNTER 255U
61+
62+
#define HISTOGRAM_SHADOW_MASK ~(HISTOGRAM_GRANULARITY - 1)
63+
64+
#define HISTOGRAM_MEM_TO_SHADOW(mem) \
65+
((((mem) & HISTOGRAM_SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
66+
4067
#define SHADOW_ENTRY_SIZE (MEM_GRANULARITY >> SHADOW_SCALE)
4168

4269
#define kLowMemBeg 0
@@ -108,6 +135,14 @@ inline void RecordAccess(uptr a) {
108135
(*shadow_address)++;
109136
}
110137

138+
inline void RecordAccessHistogram(uptr a) {
139+
CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
140+
u8 *shadow_address = (u8 *)HISTOGRAM_MEM_TO_SHADOW(a);
141+
if (*shadow_address < HISTOGRAM_MAX_COUNTER) {
142+
(*shadow_address)++;
143+
}
144+
}
145+
111146
} // namespace __memprof
112147

113148
#endif // MEMPROF_MAPPING_H

compiler-rt/lib/memprof/memprof_mibmap.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,18 @@ void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
3030
} else {
3131
LockedMemInfoBlock *lmib = *h;
3232
SpinMutexLock lock(&lmib->mutex);
33+
uintptr_t ShorterHistogram;
34+
if (Block.AccessHistogramSize > lmib->mib.AccessHistogramSize)
35+
ShorterHistogram = lmib->mib.AccessHistogram;
36+
else
37+
ShorterHistogram = Block.AccessHistogram;
38+
3339
lmib->mib.Merge(Block);
40+
// The larger histogram is kept and the shorter histogram is discarded after
41+
// adding the counters to the larger historam. Free only the shorter
42+
// Histogram
43+
if (Block.AccessHistogramSize > 0 || lmib->mib.AccessHistogramSize > 0)
44+
InternalFree((void *)ShorterHistogram);
3445
}
3546
}
3647

compiler-rt/lib/memprof/memprof_rawprofile.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,24 +146,38 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds,
146146
// ---------- MIB Entry 0
147147
// Alloc Count
148148
// ...
149+
// ---- AccessHistogram Entry 0
150+
// ...
151+
// ---- AccessHistogram Entry AccessHistogramSize - 1
149152
// ---------- MIB Entry 1
150153
// Alloc Count
151154
// ...
155+
// ---- AccessHistogram Entry 0
156+
// ...
157+
// ---- AccessHistogram Entry AccessHistogramSize - 1
152158
// ----------
153159
void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
154160
const u64 ExpectedNumBytes, char *&Buffer) {
155161
char *Ptr = Buffer;
156162
const u64 NumEntries = StackIds.Size();
157163
Ptr = WriteBytes(NumEntries, Ptr);
158-
159164
for (u64 i = 0; i < NumEntries; i++) {
160165
const u64 Key = StackIds[i];
161166
MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false);
162167
CHECK(h.exists());
163168
Ptr = WriteBytes(Key, Ptr);
169+
// FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a
170+
// serialization schema will fix this issue. See also FIXME in
171+
// deserialization.
164172
Ptr = WriteBytes((*h)->mib, Ptr);
173+
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
174+
u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j];
175+
Ptr = WriteBytes(HistogramEntry, Ptr);
176+
}
177+
if ((*h)->mib.AccessHistogramSize > 0) {
178+
InternalFree((void *)((*h)->mib.AccessHistogram));
179+
}
165180
}
166-
167181
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
168182
"Expected num bytes != actual bytes written");
169183
}
@@ -192,7 +206,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
192206
// ---------- MIB Entry
193207
// Alloc Count
194208
// ...
195-
// ----------
209+
// ---- AccessHistogram Entry 0
210+
// ...
211+
// ---- AccessHistogram Entry AccessHistogramSize - 1
212+
// ---------- MIB Entry 1
213+
// Alloc Count
214+
// ...
215+
// ---- AccessHistogram Entry 0
216+
// ...
217+
// ---- AccessHistogram Entry AccessHistogramSize - 1
196218
// Optional Padding Bytes
197219
// ---------- Stack Info
198220
// Num Entries
@@ -218,13 +240,26 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
218240
const u64 NumMIBInfoBytes = RoundUpTo(
219241
sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8);
220242

243+
// Get Number of AccessHistogram entries in total
244+
u64 TotalAccessHistogramEntries = 0;
245+
MIBMap.ForEach(
246+
[](const uptr Key, UNUSED LockedMemInfoBlock *const &MIB, void *Arg) {
247+
u64 *TotalAccessHistogramEntries = (u64 *)Arg;
248+
*TotalAccessHistogramEntries += MIB->mib.AccessHistogramSize;
249+
},
250+
reinterpret_cast<void *>(&TotalAccessHistogramEntries));
251+
const u64 NumHistogramBytes =
252+
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8);
253+
221254
const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
222255

223256
// Ensure that the profile is 8b aligned. We allow for some optional padding
224257
// at the end so that any subsequent profile serialized to the same file does
225258
// not incur unaligned accesses.
226-
const u64 TotalSizeBytes = RoundUpTo(
227-
sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8);
259+
const u64 TotalSizeBytes =
260+
RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes +
261+
NumMIBInfoBytes + NumHistogramBytes,
262+
8);
228263

229264
// Allocate the memory for the entire buffer incl. info blocks.
230265
Buffer = (char *)InternalAlloc(TotalSizeBytes);
@@ -235,14 +270,16 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
235270
static_cast<u64>(TotalSizeBytes),
236271
sizeof(Header),
237272
sizeof(Header) + NumSegmentBytes,
238-
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
273+
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes +
274+
NumHistogramBytes};
239275
Ptr = WriteBytes(header, Ptr);
240276

241277
SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
242278
Ptr += NumSegmentBytes;
243279

244-
SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
245-
Ptr += NumMIBInfoBytes;
280+
SerializeMIBInfoToBuffer(MIBMap, StackIds,
281+
NumMIBInfoBytes + NumHistogramBytes, Ptr);
282+
Ptr += NumMIBInfoBytes + NumHistogramBytes;
246283

247284
SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);
248285

0 commit comments

Comments
 (0)