Skip to content

Commit dd2113d

Browse files
[Memprof] Adds the option to collect AccessCountHistograms for memprof.
Adds compile time flag -mllvm -memprof-histogram and runtime flag histogram=true|false to turn Histogram collection on and off. The -memprof-histogram flag relies on -memprof-use-callbacks=true to work. Updates shadow mapping logic in histogram mode from having one 8 byte counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only supports this granularity as of now. Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary format, including changing to a new version of the raw binary format from version 3 to version 4. The current MemprofReader is compatible and can read memprofraw version 3. Version 3 can no longer be produced. Add a test case memprofV3 to make sure RawMemprofReader remains backward compatible. Updates creating MemoryInfoBlocks with and without Histograms. When two MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter Histogram is removed. Adds a memprof_histogram test case. Initial commit for adding AccessCountHistograms up until RawProfile for memprof.
1 parent 4d20f49 commit dd2113d

37 files changed

+1241
-38
lines changed

compiler-rt/include/profile/MIBEntryDef.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
5151
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
5252
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
5353
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
54+
MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
55+
MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)

compiler-rt/include/profile/MemProfData.inc

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@
3333
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
3434

3535
// The version number of the raw binary format.
36-
#define MEMPROF_RAW_VERSION 3ULL
36+
#define MEMPROF_RAW_VERSION 4ULL
37+
38+
// Currently supported versions.
39+
#define MEMPROF_RAW_SUPPORTED_VERSIONS \
40+
{ 3ULL, 4ULL }
3741

3842
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
3943

@@ -119,7 +123,8 @@ MemInfoBlock() {
119123
}
120124

121125
MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
122-
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
126+
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
127+
uintptr_t Histogram, uint32_t HistogramSize)
123128
: MemInfoBlock() {
124129
AllocCount = 1U;
125130
TotalAccessCount = AccessCount;
@@ -149,6 +154,8 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
149154
AllocCpuId = AllocCpu;
150155
DeallocCpuId = DeallocCpu;
151156
NumMigratedCpu = AllocCpuId != DeallocCpuId;
157+
AccessHistogramSize = HistogramSize;
158+
AccessHistogram = Histogram;
152159
}
153160

154161
void Merge(const MemInfoBlock &newMIB) {
@@ -194,6 +201,24 @@ void Merge(const MemInfoBlock &newMIB) {
194201
NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
195202
AllocCpuId = newMIB.AllocCpuId;
196203
DeallocCpuId = newMIB.DeallocCpuId;
204+
205+
// For merging histograms, we always keep the longer histogram, and add
206+
// values of shorter histogram to larger one.
207+
uintptr_t ShorterHistogram;
208+
uint32_t ShorterHistogramSize;
209+
if (newMIB.AccessHistogramSize > AccessHistogramSize) {
210+
ShorterHistogram = AccessHistogram;
211+
ShorterHistogramSize = AccessHistogramSize;
212+
// Swap histogram of current to larger histogram
213+
AccessHistogram = newMIB.AccessHistogram;
214+
AccessHistogramSize = newMIB.AccessHistogramSize;
215+
} else {
216+
ShorterHistogram = newMIB.AccessHistogram;
217+
ShorterHistogramSize = newMIB.AccessHistogramSize;
218+
}
219+
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
220+
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
221+
}
197222
}
198223

199224
#ifdef _MSC_VER

compiler-rt/lib/memprof/memprof_allocator.cpp

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
#include <sched.h>
3535
#include <time.h>
3636

37+
#define MAX_HISTOGRAM_PRINT_SIZE 32U
38+
3739
namespace __memprof {
3840
namespace {
3941
using ::llvm::memprof::MemInfoBlock;
@@ -68,6 +70,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
6870
"cpu: %u, num same dealloc_cpu: %u\n",
6971
M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
7072
M.NumSameDeallocCpu);
73+
Printf("AcccessCountHistogram[%u]: ", M.AccessHistogramSize);
74+
uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
75+
? MAX_HISTOGRAM_PRINT_SIZE
76+
: M.AccessHistogramSize;
77+
for (size_t i = 0; i < PrintSize; ++i) {
78+
Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
79+
}
80+
Printf("\n");
7181
}
7282
}
7383
} // namespace
@@ -216,6 +226,32 @@ u64 GetShadowCount(uptr p, u32 size) {
216226
return count;
217227
}
218228

229+
// Accumulates the access count from the shadow for the given pointer and size.
230+
u64 GetShadowCountHistogram(uptr p, u32 size) {
231+
u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
232+
u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size);
233+
u64 count = 0;
234+
for (; shadow <= shadow_end; shadow++)
235+
count += *shadow;
236+
return count;
237+
}
238+
239+
// If we use the normal approach in clearCountersWithoutHistogram, the histogram
240+
// will clear to much data and may overwrite shadow counters that are in use.
241+
void clearCountersHistogram(uptr addr, uptr size) {
242+
u8 *shadow_8 = (u8 *)HISTOGRAM_MEM_TO_SHADOW(addr);
243+
u8 *shadow_end_8 = (u8 *)HISTOGRAM_MEM_TO_SHADOW(addr + size);
244+
for (; shadow_8 < shadow_end_8; shadow_8++) {
245+
*shadow_8 = 0;
246+
}
247+
}
248+
249+
void clearCountersWithoutHistogram(uptr addr, uptr size) {
250+
uptr shadow_beg = MEM_TO_SHADOW(addr);
251+
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
252+
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
253+
}
254+
219255
// Clears the shadow counters (when memory is allocated).
220256
void ClearShadow(uptr addr, uptr size) {
221257
CHECK(AddrIsAlignedByGranularity(addr));
@@ -226,7 +262,11 @@ void ClearShadow(uptr addr, uptr size) {
226262
uptr shadow_beg = MEM_TO_SHADOW(addr);
227263
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
228264
if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
229-
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
265+
if (flags()->histogram) {
266+
clearCountersHistogram(addr, size);
267+
} else {
268+
clearCountersWithoutHistogram(addr, size);
269+
}
230270
} else {
231271
uptr page_size = GetPageSizeCached();
232272
uptr page_beg = RoundUpTo(shadow_beg, page_size);
@@ -279,6 +319,43 @@ struct Allocator {
279319
Print(Value->mib, Key, bool(Arg));
280320
}
281321

322+
static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
323+
if (flags()->histogram) {
324+
return CreateNewMIBWithHistogram(p, m, user_size);
325+
} else {
326+
return CreateNewMIBWithoutHistogram(p, m, user_size);
327+
}
328+
}
329+
330+
static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
331+
u64 user_size) {
332+
333+
u64 c = GetShadowCountHistogram(p, user_size);
334+
long curtime = GetTimestamp();
335+
uint32_t HistogramSize =
336+
RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
337+
uintptr_t Histogram =
338+
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
339+
memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
340+
for (size_t i = 0; i < HistogramSize; ++i) {
341+
u8 Counter =
342+
*((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
343+
((uint64_t *)Histogram)[i] = (uint64_t)Counter;
344+
}
345+
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
346+
GetCpuId(), Histogram, HistogramSize);
347+
return newMIB;
348+
}
349+
350+
static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
351+
u64 user_size) {
352+
u64 c = GetShadowCount(p, user_size);
353+
long curtime = GetTimestamp();
354+
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
355+
GetCpuId(), 0, 0);
356+
return newMIB;
357+
}
358+
282359
void FinishAndWrite() {
283360
if (print_text && common_flags()->print_module_map)
284361
DumpProcessMap();
@@ -319,10 +396,7 @@ struct Allocator {
319396
if (!m)
320397
return;
321398
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
322-
u64 c = GetShadowCount(user_beg, user_requested_size);
323-
long curtime = GetTimestamp();
324-
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
325-
m->cpu_id, GetCpuId());
399+
MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
326400
InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
327401
},
328402
this);
@@ -451,11 +525,7 @@ struct Allocator {
451525
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
452526
if (memprof_inited && atomic_load_relaxed(&constructed) &&
453527
!atomic_load_relaxed(&destructing)) {
454-
u64 c = GetShadowCount(p, user_requested_size);
455-
long curtime = GetTimestamp();
456-
457-
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
458-
m->cpu_id, GetCpuId());
528+
MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
459529
InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
460530
}
461531

compiler-rt/lib/memprof/memprof_flags.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,6 @@ MEMPROF_FLAG(bool, print_text, false,
3939
"If set, prints the heap profile in text format. Else use the raw binary serialization format.")
4040
MEMPROF_FLAG(bool, print_terse, false,
4141
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")
42+
MEMPROF_FLAG(bool, histogram, false,
43+
"If set, collects a histogram in memory info blocks alongside one "
44+
"large counter.")

compiler-rt/lib/memprof/memprof_mapping.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ static const u64 kDefaultShadowScale = 3;
2323
#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
2424
#define MEMPROF_ALIGNMENT 32
2525

26+
#define HISTOGRAM_GRANULARITY 8
27+
#define HISTOGRAM_MAX_COUNTER 255U
28+
2629
namespace __memprof {
2730

2831
extern uptr kHighMemEnd; // Initialized in __memprof_init.
@@ -37,6 +40,11 @@ extern uptr kHighMemEnd; // Initialized in __memprof_init.
3740
#define MEM_TO_SHADOW(mem) \
3841
((((mem) & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
3942

43+
#define HISTOGRAM_SHADOW_MASK ~(HISTOGRAM_GRANULARITY - 1)
44+
45+
#define HISTOGRAM_MEM_TO_SHADOW(mem) \
46+
((((mem) & HISTOGRAM_SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
47+
4048
#define SHADOW_ENTRY_SIZE (MEM_GRANULARITY >> SHADOW_SCALE)
4149

4250
#define kLowMemBeg 0
@@ -104,8 +112,15 @@ inline bool AddrIsAlignedByGranularity(uptr a) {
104112
inline void RecordAccess(uptr a) {
105113
// If we use a different shadow size then the type below needs adjustment.
106114
CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
107-
u64 *shadow_address = (u64 *)MEM_TO_SHADOW(a);
108-
(*shadow_address)++;
115+
if (flags()->histogram) {
116+
u8 *shadow_address = (u8 *)HISTOGRAM_MEM_TO_SHADOW(a);
117+
if (*shadow_address < HISTOGRAM_MAX_COUNTER) {
118+
(*shadow_address)++;
119+
}
120+
} else {
121+
u64 *shadow_address = (u64 *)MEM_TO_SHADOW(a);
122+
(*shadow_address)++;
123+
}
109124
}
110125

111126
} // namespace __memprof

compiler-rt/lib/memprof/memprof_mibmap.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,16 @@ void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
3030
} else {
3131
LockedMemInfoBlock *lmib = *h;
3232
SpinMutexLock lock(&lmib->mutex);
33+
uintptr_t ShorterHistogram;
34+
if (Block.AccessHistogramSize > lmib->mib.AccessHistogramSize)
35+
ShorterHistogram = lmib->mib.AccessHistogram;
36+
else
37+
ShorterHistogram = Block.AccessHistogram;
38+
3339
lmib->mib.Merge(Block);
40+
// Free only the shorter Histogram
41+
if (Block.AccessHistogramSize > 0 || lmib->mib.AccessHistogramSize > 0)
42+
InternalFree((void *)ShorterHistogram);
3443
}
3544
}
3645

compiler-rt/lib/memprof/memprof_rawprofile.cpp

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,24 +146,35 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds,
146146
// ---------- MIB Entry 0
147147
// Alloc Count
148148
// ...
149+
// ---- AccessHistogram Entry 0
150+
// ...
151+
// ---- AccessHistogram Entry AccessHistogramSize - 1
149152
// ---------- MIB Entry 1
150153
// Alloc Count
151154
// ...
155+
// ---- AccessHistogram Entry 0
156+
// ...
157+
// ---- AccessHistogram Entry AccessHistogramSize - 1
152158
// ----------
153159
void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
154160
const u64 ExpectedNumBytes, char *&Buffer) {
155161
char *Ptr = Buffer;
156162
const u64 NumEntries = StackIds.Size();
157163
Ptr = WriteBytes(NumEntries, Ptr);
158-
159164
for (u64 i = 0; i < NumEntries; i++) {
160165
const u64 Key = StackIds[i];
161166
MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false);
162167
CHECK(h.exists());
163168
Ptr = WriteBytes(Key, Ptr);
164169
Ptr = WriteBytes((*h)->mib, Ptr);
170+
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
171+
u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j];
172+
Ptr = WriteBytes(HistogramEntry, Ptr);
173+
}
174+
if ((*h)->mib.AccessHistogramSize > 0) {
175+
InternalFree((void *)((*h)->mib.AccessHistogram));
176+
}
165177
}
166-
167178
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
168179
"Expected num bytes != actual bytes written");
169180
}
@@ -192,7 +203,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
192203
// ---------- MIB Entry
193204
// Alloc Count
194205
// ...
195-
// ----------
206+
// ---- AccessHistogram Entry 0
207+
// ...
208+
// ---- AccessHistogram Entry AccessHistogramSize - 1
209+
// ---------- MIB Entry 1
210+
// Alloc Count
211+
// ...
212+
// ---- AccessHistogram Entry 0
213+
// ...
214+
// ---- AccessHistogram Entry AccessHistogramSize - 1
196215
// Optional Padding Bytes
197216
// ---------- Stack Info
198217
// Num Entries
@@ -218,13 +237,26 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
218237
const u64 NumMIBInfoBytes = RoundUpTo(
219238
sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8);
220239

240+
// Get Number of AccessHistogram entries in total
241+
u64 TotalAccessHistogramEntries = 0;
242+
MIBMap.ForEach(
243+
[](const uptr Key, UNUSED LockedMemInfoBlock *const &MIB, void *Arg) {
244+
u64 *TotalAccessHistogramEntries = (u64 *)Arg;
245+
*TotalAccessHistogramEntries += MIB->mib.AccessHistogramSize;
246+
},
247+
reinterpret_cast<void *>(&TotalAccessHistogramEntries));
248+
const u64 NumHistogramBytes =
249+
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8);
250+
221251
const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
222252

223253
// Ensure that the profile is 8b aligned. We allow for some optional padding
224254
// at the end so that any subsequent profile serialized to the same file does
225255
// not incur unaligned accesses.
226-
const u64 TotalSizeBytes = RoundUpTo(
227-
sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8);
256+
const u64 TotalSizeBytes =
257+
RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes +
258+
NumMIBInfoBytes + NumHistogramBytes,
259+
8);
228260

229261
// Allocate the memory for the entire buffer incl. info blocks.
230262
Buffer = (char *)InternalAlloc(TotalSizeBytes);
@@ -235,14 +267,16 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
235267
static_cast<u64>(TotalSizeBytes),
236268
sizeof(Header),
237269
sizeof(Header) + NumSegmentBytes,
238-
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
270+
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes +
271+
NumHistogramBytes};
239272
Ptr = WriteBytes(header, Ptr);
240273

241274
SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
242275
Ptr += NumSegmentBytes;
243276

244-
SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
245-
Ptr += NumMIBInfoBytes;
277+
SerializeMIBInfoToBuffer(MIBMap, StackIds,
278+
NumMIBInfoBytes + NumHistogramBytes, Ptr);
279+
Ptr += NumMIBInfoBytes + NumHistogramBytes;
246280

247281
SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);
248282

llvm/include/llvm/ProfileData/MIBEntryDef.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
5151
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
5252
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
5353
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
54+
MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
55+
MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ struct PortableMemInfoBlock {
123123
OS << " " << #Name << ": " << Name << "\n";
124124
#include "llvm/ProfileData/MIBEntryDef.inc"
125125
#undef MIBEntryDef
126+
if (AccessHistogramSize > 0) {
127+
OS << " " << "AccessHistogramValues" << ":";
128+
for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
129+
OS << " -" << ((uint64_t *)AccessHistogram)[I];
130+
}
131+
OS << "\n";
132+
}
126133
}
127134

128135
// Return the schema, only for unit tests.

0 commit comments

Comments
 (0)