Skip to content

Commit d10b449

Browse files
committed
[scudo] Add partial chunk heuristic to retrieval algorithm.
Previously the secondary cache retrieval algorithm would not allow retrievals of memory chunks where the number of unused bytes would be greater than than `MaxUnusedCachePages * PageSize` bytes. This meant that even if a memory chunk satisfied the requirements of the optimal fit algorithm, it may not be returned. This remains true if memory tagging is enabled. However, if memory tagging is disabled, a new heuristic has been put in place. Specifically, If a memory chunk is a non-optimal fit, the cache retrieval algorithm will attempt to release the excess memory to force a cache hit while keeping RSS down. In the event that a memory chunk is a non-optimal fit, the retrieval algorithm will release excess memory as long as the amount of memory to be released is less than or equal to 16 KB. If the amount of memory to be released exceeds 16 KB, the retrieval algorithm will not consider that cached memory chunk valid for retrieval.
1 parent 0abb779 commit d10b449

File tree

2 files changed

+90
-30
lines changed

2 files changed

+90
-30
lines changed

compiler-rt/lib/scudo/standalone/secondary.h

Lines changed: 86 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ namespace {
7272
struct CachedBlock {
7373
static constexpr u16 CacheIndexMax = UINT16_MAX;
7474
static constexpr u16 InvalidEntry = CacheIndexMax;
75+
static constexpr uptr ReleaseMemoryUpperBound = 1 << 14;
7576

7677
uptr CommitBase = 0;
7778
uptr CommitSize = 0;
@@ -90,8 +91,9 @@ struct CachedBlock {
9091
template <typename Config> class MapAllocatorNoCache {
9192
public:
9293
void init(UNUSED s32 ReleaseToOsInterval) {}
93-
CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
94-
UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
94+
CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
95+
UNUSED uptr Alignment, UNUSED uptr HeadersSize,
96+
UNUSED uptr &EntryHeaderPos) {
9597
return {};
9698
}
9799
void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,61 +336,110 @@ class MapAllocatorCache {
334336
}
335337
}
336338

337-
CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
338-
uptr &EntryHeaderPos) EXCLUDES(Mutex) {
339+
CachedBlock retrieve(uptr MaxAllowedFragmentedBytes, uptr Size,
340+
uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
341+
EXCLUDES(Mutex) {
339342
const uptr PageSize = getPageSizeCached();
340343
// 10% of the requested size proved to be the optimal choice for
341344
// retrieving cached blocks after testing several options.
342345
constexpr u32 FragmentedBytesDivisor = 10;
343-
bool Found = false;
346+
bool FoundOptimalFit = false;
344347
CachedBlock Entry;
345348
EntryHeaderPos = 0;
346349
{
347350
ScopedLock L(Mutex);
348351
CallsToRetrieve++;
349352
if (EntriesCount == 0)
350353
return {};
351-
u32 OptimalFitIndex = 0;
354+
u16 OptimalFitIndex = CachedBlock::InvalidEntry;
352355
uptr MinDiff = UINTPTR_MAX;
353-
for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
356+
357+
// Since allocation sizes don't always match cached memory chunk sizes
358+
// we allow some memory to be unused (called fragmented bytes). The
359+
// amount of unused bytes is exactly EntryHeaderPos - CommitBase.
360+
//
361+
// CommitBase CommitBase + CommitSize
362+
// V V
363+
// +---+------------+-----------------+---+
364+
// | | | | |
365+
// +---+------------+-----------------+---+
366+
// ^ ^ ^
367+
// Guard EntryHeaderPos Guard-page-end
368+
// page-begin
369+
//
370+
// [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
371+
// well as the header metadata. If EntryHeaderPos - CommitBase exceeds
372+
// MaxAllowedFragmentedBytes, the cached memory chunk is not considered
373+
// valid for retrieval.
374+
for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
354375
I = Entries[I].Next) {
355376
const uptr CommitBase = Entries[I].CommitBase;
356377
const uptr CommitSize = Entries[I].CommitSize;
357378
const uptr AllocPos =
358379
roundDown(CommitBase + CommitSize - Size, Alignment);
359380
const uptr HeaderPos = AllocPos - HeadersSize;
360-
if (HeaderPos > CommitBase + CommitSize)
361-
continue;
362-
if (HeaderPos < CommitBase ||
363-
AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
381+
if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
364382
continue;
365-
}
366-
Found = true;
383+
367384
const uptr Diff = HeaderPos - CommitBase;
368-
// immediately use a cached block if it's size is close enough to the
369-
// requested size.
370-
const uptr MaxAllowedFragmentedBytes =
371-
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
372-
if (Diff <= MaxAllowedFragmentedBytes) {
373-
OptimalFitIndex = I;
374-
EntryHeaderPos = HeaderPos;
375-
break;
376-
}
377-
// keep track of the smallest cached block
378-
// that is greater than (AllocSize + HeaderSize)
379-
if (Diff > MinDiff)
385+
386+
if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
380387
continue;
381-
OptimalFitIndex = I;
388+
382389
MinDiff = Diff;
390+
OptimalFitIndex = I;
383391
EntryHeaderPos = HeaderPos;
392+
393+
const uptr OptimalFitThesholdBytes =
394+
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
395+
if (Diff <= OptimalFitThesholdBytes) {
396+
FoundOptimalFit = true;
397+
break;
398+
}
384399
}
385-
if (Found) {
400+
if (OptimalFitIndex != CachedBlock::InvalidEntry) {
386401
Entry = Entries[OptimalFitIndex];
387402
remove(OptimalFitIndex);
388403
SuccessfulRetrieves++;
389404
}
390405
}
391406

407+
// The difference between the retrieved memory chunk and the request
408+
// size is at most MaxAllowedFragmentedBytes
409+
//
410+
// / MaxAllowedFragmentedBytes \
411+
// +--------------------------+-----------+
412+
// | | |
413+
// +--------------------------+-----------+
414+
// \ Bytes to be released / ^
415+
// |
416+
// (may or may not have commited)
417+
//
418+
// The maximum number of bytes released to the OS is capped by
419+
// ReleaseMemoryUpperBound
420+
//
421+
// * ReleaseMemoryUpperBound default is currently 16 KB
422+
// - We arrived at this value after noticing that mapping
423+
// in larger memory regions performs better than releasing
424+
// memory and forcing a cache hit. According to the data,
425+
// it suggests that beyond 16KB, the release execution time is
426+
// longer than the map execution time. In this way, the default
427+
// is dependent on the platform.
428+
//
429+
// TODO : Considering to make ReleaseMemoryUpperBound configurable since
430+
// the release to OS API can vary across systems.
431+
if (!FoundOptimalFit && Entry.Time != 0) {
432+
const uptr FragmentedBytes = EntryHeaderPos - Entry.CommitBase;
433+
const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
434+
if (FragmentedBytes > MaxUnusedCacheBytes) {
435+
uptr BytesToRelease =
436+
roundUp(Min<uptr>(CachedBlock::ReleaseMemoryUpperBound,
437+
FragmentedBytes - MaxUnusedCacheBytes),
438+
PageSize);
439+
Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
440+
}
441+
}
442+
392443
return Entry;
393444
}
394445

@@ -659,8 +710,15 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
659710
FillContentsMode FillContents) {
660711
CachedBlock Entry;
661712
uptr EntryHeaderPos;
713+
uptr MaxAllowedFragmentedBytes;
714+
const uptr PageSize = getPageSizeCached();
715+
716+
MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
717+
if (LIKELY(!useMemoryTagging<Config>(Options)))
718+
MaxAllowedFragmentedBytes += CachedBlock::ReleaseMemoryUpperBound;
662719

663-
Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
720+
Entry = Cache.retrieve(MaxAllowedFragmentedBytes, Size, Alignment,
721+
getHeadersSize(), EntryHeaderPos);
664722
if (!Entry.isValid())
665723
return nullptr;
666724

compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,8 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
327327
for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
328328
scudo::uptr EntryHeaderPos;
329329
scudo::CachedBlock Entry =
330-
Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
330+
Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
331+
PageSize, 0, EntryHeaderPos);
331332
EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
332333
}
333334

@@ -351,7 +352,8 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
351352
for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
352353
scudo::uptr EntryHeaderPos;
353354
RetrievedEntries.push_back(
354-
Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
355+
Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
356+
PageSize, 0, EntryHeaderPos));
355357
EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
356358
}
357359

0 commit comments

Comments
 (0)