-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Memprof] Adds the option to collect AccessCountHistograms for memprof. #94264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,10 @@ | |
#include <sched.h> | ||
#include <time.h> | ||
|
||
#define MAX_HISTOGRAM_PRINT_SIZE 32U | ||
|
||
extern bool __memprof_histogram; | ||
|
||
namespace __memprof { | ||
namespace { | ||
using ::llvm::memprof::MemInfoBlock; | ||
|
@@ -68,6 +72,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) { | |
"cpu: %u, num same dealloc_cpu: %u\n", | ||
M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu, | ||
M.NumSameDeallocCpu); | ||
Printf("AccessCountHistogram[%u]: ", M.AccessHistogramSize); | ||
uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE | ||
? MAX_HISTOGRAM_PRINT_SIZE | ||
: M.AccessHistogramSize; | ||
for (size_t i = 0; i < PrintSize; ++i) { | ||
Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]); | ||
} | ||
Printf("\n"); | ||
} | ||
} | ||
} // namespace | ||
|
@@ -216,15 +228,34 @@ u64 GetShadowCount(uptr p, u32 size) { | |
return count; | ||
} | ||
|
||
// Accumulates the access count from the shadow for the given pointer and size. | ||
// See memprof_mapping.h for an overview on histogram counters. | ||
u64 GetShadowCountHistogram(uptr p, u32 size) { | ||
u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p); | ||
u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size); | ||
u64 count = 0; | ||
for (; shadow <= shadow_end; shadow++) | ||
count += *shadow; | ||
return count; | ||
} | ||
|
||
// Clears the shadow counters (when memory is allocated). | ||
void ClearShadow(uptr addr, uptr size) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at this function more, there are a lot of uses of the MEM_TO_SHADOW computed values, which is not the right mapping function to use with the smaller granularity of the histogram case. I think you probably need to version the calls to MEM_TO_SHADOW here, then all of the rest of the code can work as is? I.e. you wouldn't need the 2 versions of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function uses the MEM_TO_SHADOW computed pointers "kind of", but in reality it just rounds these pointers up to nearest page_sizes. So in effect, full pages should be cleared no matter if it is with histogram or without. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it works, because the shadow granularities are less than the page size, and because they are both scaling by the same 8 to 1 scale, and also I guess because the clear_shadow_mmap_threshold is an optimization and doesn't need to be exact. However, it still feels a little wonky to me (and also means that we have to do extra mapping operations here and again in
I.e. set shadow_beg/end based on whether we are doing the histogramming or not, leave the rest as-is. Would that work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I agree, this is nicer. Changed. |
||
CHECK(AddrIsAlignedByGranularity(addr)); | ||
CHECK(AddrIsInMem(addr)); | ||
CHECK(AddrIsAlignedByGranularity(addr + size)); | ||
CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY)); | ||
CHECK(REAL(memset)); | ||
uptr shadow_beg = MEM_TO_SHADOW(addr); | ||
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; | ||
uptr shadow_beg; | ||
uptr shadow_end; | ||
if (__memprof_histogram) { | ||
shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr); | ||
shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size); | ||
} else { | ||
shadow_beg = MEM_TO_SHADOW(addr); | ||
shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; | ||
} | ||
|
||
if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) { | ||
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); | ||
} else { | ||
|
@@ -279,6 +310,44 @@ struct Allocator { | |
Print(Value->mib, Key, bool(Arg)); | ||
} | ||
|
||
// See memprof_mapping.h for an overview on histogram counters. | ||
static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) { | ||
if (__memprof_histogram) { | ||
return CreateNewMIBWithHistogram(p, m, user_size); | ||
} else { | ||
return CreateNewMIBWithoutHistogram(p, m, user_size); | ||
} | ||
} | ||
|
||
static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m, | ||
u64 user_size) { | ||
|
||
u64 c = GetShadowCountHistogram(p, user_size); | ||
long curtime = GetTimestamp(); | ||
uint32_t HistogramSize = | ||
RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY; | ||
uintptr_t Histogram = | ||
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t)); | ||
memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t)); | ||
for (size_t i = 0; i < HistogramSize; ++i) { | ||
u8 Counter = | ||
*((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i)); | ||
((uint64_t *)Histogram)[i] = (uint64_t)Counter; | ||
} | ||
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id, | ||
GetCpuId(), Histogram, HistogramSize); | ||
return newMIB; | ||
} | ||
|
||
static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m, | ||
u64 user_size) { | ||
u64 c = GetShadowCount(p, user_size); | ||
long curtime = GetTimestamp(); | ||
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id, | ||
GetCpuId(), 0, 0); | ||
return newMIB; | ||
} | ||
|
||
void FinishAndWrite() { | ||
if (print_text && common_flags()->print_module_map) | ||
DumpProcessMap(); | ||
|
@@ -319,10 +388,7 @@ struct Allocator { | |
if (!m) | ||
return; | ||
uptr user_beg = ((uptr)m) + kChunkHeaderSize; | ||
u64 c = GetShadowCount(user_beg, user_requested_size); | ||
long curtime = GetTimestamp(); | ||
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, | ||
m->cpu_id, GetCpuId()); | ||
MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size); | ||
InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap); | ||
}, | ||
this); | ||
|
@@ -451,11 +517,7 @@ struct Allocator { | |
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire); | ||
if (memprof_inited && atomic_load_relaxed(&constructed) && | ||
!atomic_load_relaxed(&destructing)) { | ||
u64 c = GetShadowCount(p, user_requested_size); | ||
long curtime = GetTimestamp(); | ||
|
||
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, | ||
m->cpu_id, GetCpuId()); | ||
MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size); | ||
InsertOrMerge(m->alloc_context_id, newMIB, MIBMap); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,24 +146,38 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds, | |
// ---------- MIB Entry 0 | ||
// Alloc Count | ||
// ... | ||
// ---- AccessHistogram Entry 0 | ||
// ... | ||
// ---- AccessHistogram Entry AccessHistogramSize - 1 | ||
// ---------- MIB Entry 1 | ||
// Alloc Count | ||
// ... | ||
// ---- AccessHistogram Entry 0 | ||
// ... | ||
// ---- AccessHistogram Entry AccessHistogramSize - 1 | ||
// ---------- | ||
void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, | ||
const u64 ExpectedNumBytes, char *&Buffer) { | ||
char *Ptr = Buffer; | ||
const u64 NumEntries = StackIds.Size(); | ||
Ptr = WriteBytes(NumEntries, Ptr); | ||
|
||
for (u64 i = 0; i < NumEntries; i++) { | ||
const u64 Key = StackIds[i]; | ||
MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false); | ||
CHECK(h.exists()); | ||
Ptr = WriteBytes(Key, Ptr); | ||
// FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a | ||
// serialization schema will fix this issue. See also FIXME in | ||
// deserialization. | ||
Ptr = WriteBytes((*h)->mib, Ptr); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we end up serializing out the AccessHistogram pointer unnecessarily? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, if we change to a schema based serialization (similar to the PortableMIB), it would fix it. Alternatively, we can write down each field seperately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added FIXME here as well to mention this |
||
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) { | ||
u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j]; | ||
Ptr = WriteBytes(HistogramEntry, Ptr); | ||
} | ||
if ((*h)->mib.AccessHistogramSize > 0) { | ||
InternalFree((void *)((*h)->mib.AccessHistogram)); | ||
} | ||
} | ||
|
||
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) && | ||
"Expected num bytes != actual bytes written"); | ||
} | ||
|
@@ -192,7 +206,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, | |
// ---------- MIB Entry | ||
// Alloc Count | ||
// ... | ||
// ---------- | ||
// ---- AccessHistogram Entry 0 | ||
// ... | ||
// ---- AccessHistogram Entry AccessHistogramSize - 1 | ||
// ---------- MIB Entry 1 | ||
// Alloc Count | ||
// ... | ||
// ---- AccessHistogram Entry 0 | ||
// ... | ||
// ---- AccessHistogram Entry AccessHistogramSize - 1 | ||
// Optional Padding Bytes | ||
// ---------- Stack Info | ||
// Num Entries | ||
|
@@ -218,13 +240,26 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules, | |
const u64 NumMIBInfoBytes = RoundUpTo( | ||
sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8); | ||
|
||
// Get Number of AccessHistogram entries in total | ||
u64 TotalAccessHistogramEntries = 0; | ||
MIBMap.ForEach( | ||
[](const uptr Key, UNUSED LockedMemInfoBlock *const &MIB, void *Arg) { | ||
u64 *TotalAccessHistogramEntries = (u64 *)Arg; | ||
*TotalAccessHistogramEntries += MIB->mib.AccessHistogramSize; | ||
}, | ||
reinterpret_cast<void *>(&TotalAccessHistogramEntries)); | ||
const u64 NumHistogramBytes = | ||
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8); | ||
|
||
const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8); | ||
|
||
// Ensure that the profile is 8b aligned. We allow for some optional padding | ||
// at the end so that any subsequent profile serialized to the same file does | ||
// not incur unaligned accesses. | ||
const u64 TotalSizeBytes = RoundUpTo( | ||
sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8); | ||
const u64 TotalSizeBytes = | ||
RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes + | ||
NumMIBInfoBytes + NumHistogramBytes, | ||
8); | ||
|
||
// Allocate the memory for the entire buffer incl. info blocks. | ||
Buffer = (char *)InternalAlloc(TotalSizeBytes); | ||
|
@@ -235,14 +270,16 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules, | |
static_cast<u64>(TotalSizeBytes), | ||
sizeof(Header), | ||
sizeof(Header) + NumSegmentBytes, | ||
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes}; | ||
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes + | ||
NumHistogramBytes}; | ||
Ptr = WriteBytes(header, Ptr); | ||
|
||
SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr); | ||
Ptr += NumSegmentBytes; | ||
|
||
SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr); | ||
Ptr += NumMIBInfoBytes; | ||
SerializeMIBInfoToBuffer(MIBMap, StackIds, | ||
NumMIBInfoBytes + NumHistogramBytes, Ptr); | ||
Ptr += NumMIBInfoBytes + NumHistogramBytes; | ||
|
||
SerializeStackToBuffer(StackIds, NumStackBytes, Ptr); | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.